Click here to Skip to main content
15,867,453 members
Articles / Programming Languages / C

Fun with Google Text To Speech eBook reader using minimalistic approach

Rate me:
Please Sign up or sign in to vote.
4.74/5 (31 votes)
9 Feb 2013CPOL3 min read 139.7K   97   27
Let Google read your eBooks aloud or convert to MP3! This free TTS engine used by Android phone just sounds fantastic so here is a simple eBook reader app with C++ source code

GoogleTTS-Ebook-Reader/GoogleTTS2.jpg

Introduction

It's interesting to see how much free tools Google started to provide to people thanks to it's massive cloud computing capability.

I was so blown away by sound quality that I created this simple program to read my favorite eBooks aloud. So far there are 7 different language voices in exceptional quality

English French Italian Spanish German Czech Haitian-Creole Hindi 

And unfortunately 27 [sub-par] quality voices that got recently integrated via 3th party OpenSource ESpeech engine.

Afrikaans, Albanian, Catalan, Chinese (Mandarin), Croatian, Danish, Dutch, Finnish, Greek, Hungarian, Icelandic, Indonesian, Latvian, Macedonian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Swahili, Swedish, Turkish, Vietnamese, Welsh.

Google keeps replacing them with more quality versions as time progresses. As was lately added Czech language. 

Let Google TTS say your text of chosen language via simple URL

http://translate.google.com/translate_tts?tl=en&q=hello+world  

Yes. It's the same service that is integrated to Google's Android and powers pronunciation in Google's translate.

Anyway even though it's web based service. It's free and it's sending you back mp3 with TTS that is for some languages light years ahead of most paid for TTS engines.

Let Google Translate detect language of your text 

http://translate.google.com/translate_a/t?client=t&sl=auto&text=hello+world 

What we receive is detected language which we in turn use to let TTS know which voice we wana hear. notice sl=auto. it denotes "source language" parameter autodetection

Yes google language detection is often unreliable as you can see on official google translate page. So you better set language in your app manually but it's interesting feature to test anyway.      

Code 

The Code is slightly larger because we need to detect language per line + split text to max 100 char chunks and send it as URL encoded HTTP GET request. Google sends back mp3 file which we stream as it's received thanks to DirectShow streaming nature and installed mp3 codec. This is minimal sample so you can focus on how it works. Unimportant code like hook is folded in snippet form but feel free to unfold and format the code in the way you like. Replace all static buffers if you plan to use code safely plus cleanup and more robust error handling was left out s you can focus on important parts but still a lot of fun.

So enjoy ;)

C++
#include <windows.h>
#include <shlwapi.h>
#include <Richedit.h>
#include <dshow.h>
#include <winsock.h>

#pragma comment(lib,"Strmiids.lib")
#pragma comment(lib,"Shlwapi.lib") 
#pragma comment(lib,"wsock32.lib")

#define DsHook(a,b,c) if (!c##_) { 
               INT_PTR* p=b+*(INT_PTR**)a;  VirtualProtect(&c##_,4,PAGE_EXECUTE_READWRITE,&no);
                      *(INT_PTR*)&c##_=*p;  VirtualProtect(p,4,PAGE_EXECUTE_READWRITE,&no);  *p=(INT_PTR)c; }

HRESULT ( __stdcall * SyncReadAlligned_ ) ( void* inst, IMediaSample *smp ) ; HANDLE out;
HRESULT   __stdcall   SyncReadAlligned    ( void* inst, IMediaSample *smp ) {	
    HRESULT ret =     SyncReadAlligned_   ( inst, smp );
    BYTE*   buf;      smp->GetPointer(&buf); 
    DWORD   len =     smp->GetActualDataLength(),no;	WriteFile(out,buf,len,&no,0);
    return  ret;  
}

int WINAPI WinMain(HINSTANCE inst,HINSTANCE prev,LPSTR cmd,int show) {
    MSG msg={0}; WSADATA wsa; DWORD no; HRESULT hr; 

    CoInitialize(0);   WSAStartup(MAKEWORD(1,1),&wsa);   LoadLibraryA("RichEd20"); 

    // connect to google translate for text language autodetection
    SOCKET s=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);  sockaddr_in addr={AF_INET,htons(80)};
    HOSTENT* dns=gethostbyname("translate.google.com"); memcpy(&addr.sin_addr.s_addr,dns->h_addr,4);
 
    if(connect(s,(sockaddr*)&addr,sizeof(addr)) != 0)  return 0;

    HWND hwnd = CreateWindowA("RICHEDIT20W",0,WS_SIZEBOX|ES_MULTILINE|WS_VISIBLE|ES_AUTOVSCROLL|ES_AUTOHSCROLL|WS_SYSMENU|WS_CAPTION|WS_MINIMIZE|WS_HSCROLL|WS_VSCROLL,500,500,500,300,0,0,0,0);

    while ( IsWindowVisible(hwnd) ) {
        if( PeekMessage(&msg,0,0,0,1) ) { TranslateMessage( &msg ); DispatchMessage( &msg ); }
        if( msg.wParam==VK_RETURN && msg.message == WM_KEYDOWN ) {

            DWORD  len =  2+GetWindowTextLength(hwnd)*2; CHARRANGE ch={0,-1}; SendMessage(hwnd,EM_EXSETSEL,0,(LPARAM)&ch);
            WCHAR* Txt =  (WCHAR*)calloc(len,1),*e,*txt=Txt; SendMessage(hwnd,EM_GETSELTEXT,0,(LPARAM)Txt); ch.cpMin=-1; 
                                                             SendMessage(hwnd,EM_EXSETSEL,0,(LPARAM)&ch); 
                   out =  CreateFile("c:/out.mp3",GENERIC_WRITE,FILE_SHARE_READ,0,CREATE_ALWAYS,0,0);

            while(*txt) { 
                // since sended text can not be larger than 100 we try to break sentences
                if((e=wcschr(txt,L'.')))                              *e=0; 
                if(wcslen(txt)>100 &&(e=wcschr(txt,L',')))            *e=0;
                if(wcslen(txt)>100) { e=txt+100; while(*e!=L' ') e--; *e=0; }
                
                // detect language by asking google translate service so we can switch voice language per sentence as needed                
                char utf[1000],esc[1000]={0},*a,*b=utf; WideCharToMultiByte(CP_UTF8,0,txt,-1,utf,1000,0,0); 
                while(*b) sprintf(esc+strlen(esc),"%%%0.2x",*(BYTE*)b++); txt+=wcslen(txt)+1; //escape utf-8 chars

                char buf[1000]; sprintf(buf,"GET /translate_a/t?client=t&sl=auto&text=%s HTTP/1.1\r\nUser-Agent: Mozilla/5.0\r\n\r\n\r\n\r\n",esc);
                send(s,buf,strlen(buf),0);    // we send text sentence to google translate server
                recv(s,buf,sizeof(buf),0);    // and receive detected language
                char lng[3]={"en"}; if((a=strstr(buf,"]],,\""))) memcpy(lng,a+5,2);	

                // This triplet with RenderFile is all you need to play anything with aprropriate codec on windows. 
                IGraphBuilder* graph= 0; CoCreateInstance( CLSID_FilterGraph, 0, CLSCTX_INPROC,IID_IGraphBuilder, (void **)&graph );
                IMediaControl* ctrl = 0; graph->QueryInterface( IID_IMediaControl, (void **)&ctrl );
                IMediaEvent*   event= 0; graph->QueryInterface( IID_IMediaEventEx, (void **)&event ); 

                // This sends text (sentence) encoded in get request  and progressively plays mp3 stream from google as it is received. 
                // So all TTS is done on server and this is only work that client does
                WCHAR url[1000];     wsprintfW(url,L"http://translate.google.com/translate_tts?tl=%S&q=%S",lng,esc); 
                if((hr=ctrl->RenderFile(url))) continue;   

                // we hook the source filter and append to global mp3 file on disk
                IBaseFilter*  filter;  graph->FindFilterByName(url,&filter);
                IPin*         pin;     filter->FindPin(L"Output",&pin); 
                IAsyncReader* reader;  pin->QueryInterface(IID_IAsyncReader,(void**)&reader);
                
                //  redirect  7th member func of IAsyncReader (SyncReadAlligned) to grab mp3 data from output pin of source filter
                DsHook(reader,6,SyncReadAlligned);

                // we run and wait for mp3 to finish before we ask another sentence
                hr=ctrl->Run(); long code=0,c; 
                while( code != EC_COMPLETE ) { 
                    if( PeekMessage(&msg,0,0,0,1) ) { TranslateMessage( &msg ); DispatchMessage( &msg ); } event->GetEvent(&code, &c, &c, 0); 
                    Sleep(1); 
                } 

                ctrl->Release(); event->Release(); graph->Release();
            } 
            free(Txt); 
            CloseHandle(out);
        }
    }
} 

Points of Interest

Notice that we are passing web address directly to DirectShow. RenderFile() call actually generates whole graph including stream splitter, mp3 decoder and output to sound device.

GoogleTTS-Ebook-Reader/graph.JPG

This simple trick allows us to for example listen to internet radios etc without much work. It requires you to have at least some mp3 codec installed. Which most of you probably have. And if not. Then install ffdshow which is free multicodec that plays pretty much everything you throw at it.

Another thing is that the correct way to grab received data would be to implement and connect sample grabber filter between src and splitter filter. But Then that would require you to use DirectShow SDK which is pretty complicated thing to make compilable and hardly beats implementing just one procedure.

Unknown languages are not played but you can make a lot of substitutions like let's say "nl" do "de" etc. Mix sentences in different languages just for fun ;)

History

  • 28.3 first version
  • 31.3 combo box for manual language selection replaced by automatic language detection (per sentence)
  • 3.4 added capturing of received stream to mp3 file on disk
  • 15.5 added info that 29 new languages are synthesized now. Poor quality thou.
  • 16.5 changed code to Unicode and uploaded fixed exe so Chinese Hindi etc works now
  • 2.6.2011 uses DNS instead of IP + updated language detection to reflect Google changes. added source to zip

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
Slovakia Slovakia
Past Projects:
[Siemens.sk]Mobile network software: HLR-Inovation for telering.at (Corba)
Medical software: CorRea module for CT scanner
[cauldron.sk]Computer Games:XboxLive/net code for Conan, Knights of the temple II, GeneTroopers, CivilWar, Soldier of fortune II
[www.elveon.com]Computer Games:XboxLive/net code for Elveon game based on Unreal Engine 3
ESET Reasearch.
Looking for job

Comments and Discussions

 
Bugsuspicious Pin
Member 871442123-Oct-14 11:01
Member 871442123-Oct-14 11:01 
GeneralRe: suspicious Pin
Ladislav Nevery30-Oct-15 10:12
Ladislav Nevery30-Oct-15 10:12 
Questioni need the c# ~~~hope you can made one ... Pin
startxp6-Dec-13 15:04
startxp6-Dec-13 15:04 
GeneralMy vote of 5 Pin
Fidel Pérez22-May-13 3:14
Fidel Pérez22-May-13 3:14 
SuggestionAmazing article, Thanks!! Pin
Fidel Pérez21-May-13 23:07
Fidel Pérez21-May-13 23:07 
GeneralRe: Amazing article, Thanks!! Pin
Ladislav Nevery23-May-13 0:55
Ladislav Nevery23-May-13 0:55 
GeneralMy vote of 5 Pin
plagwitz24-Feb-13 15:44
plagwitz24-Feb-13 15:44 
GeneralMy vote of 5 Pin
eslipak12-Feb-13 10:43
professionaleslipak12-Feb-13 10:43 
GeneralRe: My vote of 5 Pin
Ladislav Nevery12-Feb-13 22:37
Ladislav Nevery12-Feb-13 22:37 
GeneralRe: My vote of 5 Pin
eslipak13-Feb-13 2:56
professionaleslipak13-Feb-13 2:56 
GeneralMy vote of 4 Pin
WebMaster29-Dec-12 14:16
WebMaster29-Dec-12 14:16 
QuestionMy vote 5 Pin
serega4673-Apr-12 23:54
serega4673-Apr-12 23:54 
Generalany service for OCR by Google ? Pin
Member 15084529-Jun-11 0:59
Member 15084529-Jun-11 0:59 
GeneralRe: any service for OCR by Google ? Pin
Ladislav Nevery15-Jun-11 8:10
Ladislav Nevery15-Jun-11 8:10 
GeneralArticle fixed Pin
Ladislav Nevery2-Jun-11 10:29
Ladislav Nevery2-Jun-11 10:29 
GeneralMy vote of 4 Pin
Sergey Chepurin17-May-11 23:13
Sergey Chepurin17-May-11 23:13 
GeneralProgram doesn't work with Windows XP!!! Pin
TheEvilGerman23-Jan-11 2:55
TheEvilGerman23-Jan-11 2:55 
GeneralRe: Program doesn't work with Windows XP!!! Pin
Sergey Chepurin17-May-11 23:12
Sergey Chepurin17-May-11 23:12 
GeneralLanguage detection broken Pin
xtract1-Jul-10 4:55
xtract1-Jul-10 4:55 
GeneralCode in C# Pin
Nitin S16-May-10 23:15
professionalNitin S16-May-10 23:15 
QuestionReverse way, STT possible? Pin
TSchind12-Apr-10 22:17
TSchind12-Apr-10 22:17 
AnswerRe: Reverse way, STT possible? Pin
Ladislav Nevery13-Apr-10 1:39
Ladislav Nevery13-Apr-10 1:39 
GeneralRe: Reverse way, STT possible? Pin
phildal22-Jun-10 4:36
phildal22-Jun-10 4:36 
AnswerRe: Reverse way, STT possible? Pin
hobnob5-Jun-11 4:35
hobnob5-Jun-11 4:35 
GeneralNorton Fixed their False Positive Pin
Ladislav Nevery9-Apr-10 20:49
Ladislav Nevery9-Apr-10 20:49 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.