Click here to Skip to main content
15,880,967 members
Articles / Desktop Programming / MFC

SDI (Sound Device Interface)--A library for Auditory Display

Rate me:
Please Sign up or sign in to vote.
4.60/5 (3 votes)
2 May 20046 min read 110.2K   3.7K   45  
A GDI-like API for 3D positioning of speech, and MIDI composition using a single string.
/******************************************************************************
SDI 1.0
    A library for Auditory Display
Copyright 2004 Dong Lin

This file is part of SDI.

    SDI is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    SDI is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SDI; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

Last Updated : Apr.15th, 2004

For any question, suggestion or failure report, please contact me by:
e-mail: jonathan1983@126.com
*******************************************************************************/

/******************************************************************

File Name: SDI.h

Description:
	Declarations of Constants, Macros and APIs used for "Displaying
	sound". Internally using A3D, IBM Viavoice TTS and DirectSound/Music,
	the SDI, or Sound Device Interface, handles all the work needed to position
	synthesized speech and MIDI notes, which is also known as earcons in 
	HCI(Human-Computer Interaction), in virtual 3D world. Note that the MIDI notes are parsed
	from string like "C42 D42 E42 F42 G42 A42 B43" (representing measures "|1234|567-||") 
	on the fly, instead of loaded from standard MIDI file, thus making it possible to play
	notes programmatically at runtime.
	However, due to technical problems, the positioning of MIDI note is dreadful.
	See readme.txt for more information.

Note:
	Be sure to place eci.h and ia3dapi.h right beside this file(in the same directory).

[Version Info]


********************************************************************/

#if !defined(SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED)
#define SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED

//////////////////////////////////////////////////////////////
// Included Headers
////////////////////////
#include <objbase.h>

#include "eci.h"
#include "ia3dapi.h"
#include <dmusici.h>

//////////////////////////////////////////////////////////////
// Constants
////////////////////////
typedef DWORD	HSDIOBJECT;
typedef HSDIOBJECT HSPEECH;
typedef HSDIOBJECT HEARCON;


// A3D object style mask [InitializeSDI.a3dStyle]
// Refer to "A3D 3.0 API Reference Manual" for these flags.
#define SDI_A3D_1ST_REFLECTION					0x00000001
#define SDI_A3D_DISABLE_FOCUS_MUTE				0x00000002
#define SDI_A3D_DISABLE_SPLASHSCREEN			0x00000004
#define SDI_A3D_GEOMETRIC_REVERB				0x00000008
#define SDI_A3D_OCCLUSIONS						0x00000010
#define SDI_A3D_REVERB							0x00000020
#define SDI_A3D_CL_EXCLUSIVE					0x00000040
#define SDI_A3D_LEFT_HAND_COORD					0x00000080
#define SDI_A3D_OUTPUT_HEADPHONES				0x00000000
#define SDI_A3D_OUTPUT_SPEAKERS_WIDE			0x00000100
#define SDI_A3D_OUTPUT_SPEAKERS_NARROW			0x00000200
#define SDI_A3D_OUTPUT_MODE_QUAD				0x00000400
#define SDI_A3D_STREAMING_PRIORITY_NORMAL		0x00000000
#define SDI_A3D_STREAMING_PRIORITY_HIGH			0x00000800
#define SDI_A3D_STREAMING_PRIORITY_HIGHEST		0x00001000


// Speech object style mask [CreateSpeech.dwECIStyle]
// Refer to "IBM ViaVoice TTS API Reference" for these flags
#define SDI_LANGUAGE_GENERAL_AMERICAN_ENGLISH		0x00000000
#define SDI_LANGUAGE_BRITISH_ENGLISH				0x00000001
#define SDI_LANGUAGE_MANDARIN_CHINESE				0x00000002
#define SDI_LANGUAGE_TAIWANESE_MANDARIN				0x00000003
#define SDI_DONTUSE_ABBR_DICTIONARY					0x00000010
#define SDI_ANNOTATED_TEXT							0x00000020
#define SDI_4DIGIT_AS_YEAR							0x00000040
#define SDI_SAMPLERATE_8000							0x00000000
#define SDI_SAMPLERATE_11024						0x00000080
#define SDI_SAMPLERATE_22048						0x00000100

// Preset Voice
#define SDI_VOICE_ADULTMALE1			((VOICEPARAM*)0)
#define SDI_VOICE_ADULTFEMALE1			((VOICEPARAM*)1)
#define SDI_VOICE_CHILD					((VOICEPARAM*)2)
#define SDI_VOICE_ADULTMALE2			((VOICEPARAM*)3)
#define SDI_VOICE_ADULTMALE3			((VOICEPARAM*)4)
#define SDI_VOICE_ADULTFEMALE2			((VOICEPARAM*)5)
#define SDI_VOICE_ELDERLYFEMALE			((VOICEPARAM*)6)
#define SDI_VOICE_ELDERLYMALE			((VOICEPARAM*)7)
												

/************************************************************
Interface Definition
************************************************************/
// SDI Evironment
struct SDIVECTOR
{
	float x;
	float y;
	float z;
};

BOOL	InitializeSDI(LPGUID audioDevice,DWORD a3dStyle,HINSTANCE hInstance,
					  DWORD dwFlag);
BOOL	ReleaseSDI();
BOOL	DeleteSDIObject(HSDIOBJECT hObject);

// Generic SDI Object Operation
BOOL	Play(HSDIOBJECT hObject);
BOOL	Pause(HSDIOBJECT hObject);
BOOL	Stop(HSDIOBJECT hObject);
BOOL	SetPosition(HSDIOBJECT hObject, SDIVECTOR* pos);
BOOL	GetPosition(HSDIOBJECT hObject, SDIVECTOR* pos);
BOOL	SetVolume(HSDIOBJECT hObject, FLOAT fGain);
FLOAT	GetVolume(HSDIOBJECT hObject);

// Speech Object
typedef struct tagVOICEPARAM
{
	BYTE breathiness;		// 0-100 (100 for whisper)
	BYTE gender;			// 0: Male 1: Female
	BYTE headSize;			// 0-100
	BYTE pitchBaseline;		// 0-100 (Corresponding to 40-442Hz in real world)
							// 1 unit for 4.02Hz
	BYTE pitchFluctuation;	// 0-100 (0 for monotonous voice)
	BYTE roughness;			// 0-100
	BYTE speed;				// 0-250 (Corresponding to 70-1297 words per second)
							// 1 unit for 4.908 word per second
	BYTE volume;			// 0-100
} VOICEPARAM, *LPVOICEPARAM;


HSPEECH CreateSpeech(DWORD dwECIStyle, VOICEPARAM* pVoice, DWORD dwFlag);
BOOL	PlayText(HSPEECH hSpeech, PCTSTR psText);
BOOL	AddText(HSPEECH hSpeech, PCTSTR psText);
BOOL	GetPresetVoice(int nIndex,	VOICEPARAM* pVoice);
BOOL	SetVoice(HSPEECH hSpeech, VOICEPARAM* pVoice);
BOOL	SetECIStyle(HSPEECH hSpeech, DWORD dwECIStyle);

// Earcon Object
struct NOTERANGE{
	DWORD dwLowNote;
	DWORD dwHighNote;
};

#define MAKE_TIMESIGNATURE(nn, dd, cc, bb)	\
	((DWORD)((nn)<<24 | (dd)<<16 | (cc)<<8 | (bb)))
#define MAKE_KEYSIGNATURE(sf, mi)  \
	((WORD)((sf)<<8 | (mi)))

HEARCON CreateEarcon(DWORD dwTempo, WORD nTicksPerQN, DWORD timeSignature, WORD keySignature);
BOOL	SetChannelInstrument(HEARCON hEarcon, WORD nChannel, int nPatch,
							 NOTERANGE* noteRange);
BOOL	PlaySegment(HEARCON hEarcon, BYTE** ppTracks, BYTE nTracks);
BOOL	PlayNotation(HEARCON  hEarcon, PTSTR psMusic);
int 	ParseNotation(HEARCON hEarcon, PTSTR psMusic, BYTE* pbData);


#endif //!defined(SDI_H_2A37E7B7_6DB0_4A8C_8906_B68244280F7B_INCLUDED)

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Engineer
China China
A student at Zhejiang University, Zhejiang, China.
Major in Automation.
Now I want to study machine vision and robotics, but I'm really consumed with choices between hardware and software, and between research and engineering.
I'll be glad if you can give some suggestions.

Comments and Discussions