|
/******************************************************************
[Copy Right Info]
File Name: SDI.h
Description:
Declarations of Constants, Macros and APIs used for "Displaying
sound". Internally using A3D, IBM Viavoice TTS and DirectSound/Music,
the SDI, or Sound Device Interface, handles all the work needed to position
synthesized speech and MIDI notes, which is also known as earcons in
HCI(Human-Computer Interaction), in virtual 3D world. Note that the MIDI notes are parsed
from string like "C42 D42 E42 F42 G42 A42 B43" (representing measures "|1234|567-||")
on the fly, instead of loaded from standard MIDI file, thus making it possible to play
notes programmatically at runtime.
However, due to technical problems, the positioning of MIDI note is dreadful.
See readme.txt for more information.
Note:
Be sure to place eci.h and ia3dapi.h right beside this file(in the same directory).
[Version Info]
********************************************************************/
#if !defined(SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED)
#define SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED
//////////////////////////////////////////////////////////////
// Included Headers
////////////////////////
#include <objbase.h>
#include "eci.h"
#include "ia3dapi.h"
#include <dmusici.h>
//////////////////////////////////////////////////////////////
// Constants
////////////////////////
typedef DWORD HSDIOBJECT;
typedef HSDIOBJECT HSPEECH;
typedef HSDIOBJECT HEARCON;
// A3D object style mask [InitializeSDI.a3dStyle]
// Refer to "A3D 3.0 API Reference Manual" for these flags.
#define SDI_A3D_1ST_REFLECTION 0x00000001
#define SDI_A3D_DISABLE_FOCUS_MUTE 0x00000002
#define SDI_A3D_DISABLE_SPLASHSCREEN 0x00000004
#define SDI_A3D_GEOMETRIC_REVERB 0x00000008
#define SDI_A3D_OCCLUSIONS 0x00000010
#define SDI_A3D_REVERB 0x00000020
#define SDI_A3D_CL_EXCLUSIVE 0x00000040
#define SDI_A3D_LEFT_HAND_COORD 0x00000080
#define SDI_A3D_OUTPUT_HEADPHONES 0x00000000
#define SDI_A3D_OUTPUT_SPEAKERS_WIDE 0x00000100
#define SDI_A3D_OUTPUT_SPEAKERS_NARROW 0x00000200
#define SDI_A3D_OUTPUT_MODE_QUAD 0x00000400
#define SDI_A3D_STREAMING_PRIORITY_NORMAL 0x00000000
#define SDI_A3D_STREAMING_PRIORITY_HIGH 0x00000800
#define SDI_A3D_STREAMING_PRIORITY_HIGHEST 0x00001000
// Speech object style mask [CreateSpeech.dwECIStyle]
// Refer to "IBM ViaVoice TTS API Reference" for these flags
#define SDI_LANGUAGE_GENERAL_AMERICAN_ENGLISH 0x00000000
#define SDI_LANGUAGE_BRITISH_ENGLISH 0x00000001
#define SDI_LANGUAGE_MANDARIN_CHINESE 0x00000002
#define SDI_LANGUAGE_TAIWANESE_MANDARIN 0x00000003
#define SDI_DONTUSE_ABBR_DICTIONARY 0x00000010
#define SDI_ANNOTATED_TEXT 0x00000020
#define SDI_4DIGIT_AS_YEAR 0x00000040
#define SDI_SAMPLERATE_8000 0x00000000
#define SDI_SAMPLERATE_11024 0x00000080
#define SDI_SAMPLERATE_22048 0x00000100
// Preset Voice
#define SDI_VOICE_ADULTMALE1 ((VOICEPARAM*)0)
#define SDI_VOICE_ADULTFEMALE1 ((VOICEPARAM*)1)
#define SDI_VOICE_CHILD ((VOICEPARAM*)2)
#define SDI_VOICE_ADULTMALE2 ((VOICEPARAM*)3)
#define SDI_VOICE_ADULTMALE3 ((VOICEPARAM*)4)
#define SDI_VOICE_ADULTFEMALE2 ((VOICEPARAM*)5)
#define SDI_VOICE_ELDERLYFEMALE ((VOICEPARAM*)6)
#define SDI_VOICE_ELDERLYMALE ((VOICEPARAM*)7)
/************************************************************
Interface Definition
************************************************************/
// SDI Evironment
struct SDIVECTOR
{
float x;
float y;
float z;
};
BOOL InitializeSDI(LPGUID audioDevice,DWORD a3dStyle,HINSTANCE hInstance,
DWORD dwFlag);
BOOL ReleaseSDI();
BOOL DeleteSDIObject(HSDIOBJECT hObject);
// Generic SDI Object Operation
BOOL Play(HSDIOBJECT hObject);
BOOL Pause(HSDIOBJECT hObject);
BOOL Stop(HSDIOBJECT hObject);
BOOL SetPosition(HSDIOBJECT hObject, SDIVECTOR* pos);
BOOL GetPosition(HSDIOBJECT hObject, SDIVECTOR* pos);
BOOL SetVolume(HSDIOBJECT hObject, FLOAT fGain);
FLOAT GetVolume(HSDIOBJECT hObject);
// Speech Object
typedef struct tagVOICEPARAM
{
BYTE breathiness; // 0-100 (100 for whisper)
BYTE gender; // 0: Male 1: Female
BYTE headSize; // 0-100
BYTE pitchBaseline; // 0-100 (Corresponding to 40-442Hz in real world)
// 1 unit for 4.02Hz
BYTE pitchFluctuation; // 0-100 (0 for monotonous voice)
BYTE roughness; // 0-100
BYTE speed; // 0-250 (Corresponding to 70-1297 words per second)
// 1 unit for 4.908 word per second
BYTE volume; // 0-100
} VOICEPARAM, *LPVOICEPARAM;
HSPEECH CreateSpeech(DWORD dwECIStyle, VOICEPARAM* pVoice, DWORD dwFlag);
BOOL PlayText(HSPEECH hSpeech, PCTSTR psText);
BOOL AddText(HSPEECH hSpeech, PCTSTR psText);
BOOL GetPresetVoice(int nIndex, VOICEPARAM* pVoice);
BOOL SetVoice(HSPEECH hSpeech, VOICEPARAM* pVoice);
BOOL SetECIStyle(HSPEECH hSpeech, DWORD dwECIStyle);
// Earcon Object
struct NOTERANGE{
DWORD dwLowNote;
DWORD dwHighNote;
};
#define MAKE_TIMESIGNATURE(nn, dd, cc, bb) \
((DWORD)((nn)<<24 | (dd)<<16 | (cc)<<8 | (bb)))
HEARCON CreateEarcon(DWORD dwTempo, WORD nTicksPerBeat, DWORD timeSignature, WORD keySignature);
BOOL SetChannelInstrument(HEARCON hEarcon, WORD nChannel, int nPatch,
NOTERANGE* noteRange);
BOOL PlaySegment(HEARCON hEarcon, BYTE** ppTracks, BYTE nTracks);
BOOL PlayNotation(HEARCON hEarcon, PTSTR psMusic);
int ParseNotation(HEARCON hEarcon, PTSTR psMusic, BYTE* pbData);
#endif //!defined(SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED)
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.
A list of licenses authors might use can be found here
A student at Zhejiang University, Zhejiang, China.
Major in Automation.
Now I want to study machine vision and robotics, but I'm really consumed with choices between hardware and software, and between research and engineering.
I'll be glad if you can give some suggestions.