Introduction
After scouring the web for a ways to perform Voice Recognition I found quite
a few options. After checking out a few different APIs and services I found
that C# does it right out of the box. The Microsoft SpeechRecognitionEngine
class found in the System.Speech assembly can be used to perform speech recognition natively within your C# apps without having to use a 3rd party
library or service.
To use it you will need to reference the System.Speech assembly in your project.

And let's skeleton out our class with a return object like this:
class SpeechReconizer
{
SpeechRecognitionEngine _speechRecognitionEngine;
public SpeechReconitionResult ReadResult { get; set; }
public SpeechReconizer()
{
}
}
public class SpeechReconitionResult
{
public string Text { get; set; }
public bool Success { get; set; }
public string ErrorMessage { get; set; }
public bool Complete { get; set; }
}
To initialize the SpeechRecognitionEngine we need to new it up and set some
tolerance values.
public SpeechReconizer()
{
Grammar dictation = new DictationGrammar();
dictation.Name = "Dictation Grammar";
_speechRecognitionEngine = new SpeechRecognitionEngine();
_speechRecognitionEngine.LoadGrammar(dictation);
_speechRecognitionEngine.BabbleTimeout = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.EndSilenceTimeout = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.EndSilenceTimeoutAmbiguous = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.InitialSilenceTimeout = TimeSpan.FromSeconds(10.0);
}
The next step involves Delegates. We have two events we need to subscribe to – When the speech has been recognized, and when the recognition is complete.
So 1st add the delegate subscriptions to the constructor and then use resharper to stub out our delegate function.
_speechRecognitionEngine.SpeechRecognized += RecognizerSpeechRecognized;
_speechRecognitionEngine.RecognizeCompleted += RecognizerRecognizeCompleted;
private void RecognizerSpeechRecognized(object sender, SpeechRecognizedEventArgs e) {
throw new NotImplementedException();
}
private void RecognizerRecognizeCompleted(object sender, RecognizeCompletedEventArgs e) {
throw new NotImplementedException();
}Now were going to capture the result in the RecognizerSpeechRecognized delegate and update our result object.
private void RecognizerSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result != null && e.Result.Text != null)
{
ReadResult.Success = true;
ReadResult.Text = e.Result.Text;
}
else
{
ReadResult.Text = "Recognized text not available.";
}
}And add functionality to capture any errors in the RecognizerSpeechRecognized delegate like so:
private void RecognizerRecognizeCompleted(object sender, RecognizeCompletedEventArgs e)
{
if (e.Error != null)
{
ReadResult.Success = false;
ReadResult.ErrorMessage = string.Format("{0}: {1}", e.Error.GetType().Name, e.Error.Message);
}
if (e.Cancelled)
{
ReadResult.Success = false;
ReadResult.ErrorMessage = "Operation cancelled.";
}
}And with that our class in 95% of the way complete – the final piece is to
wire up a method to take in an audio recording and translate.
In my example I’m going to simply write it up to use a wav file. you can
overload this method to take a stream, or if you really want to be a tricky you
can wire it up to the microphone of the computer and read in speech that way.
public SpeechReconitionResult ReadSpeech(string sourceAudio)
{
ReadResult = new SpeechReconitionResult();
_speechRecognitionEngine.SetInputToWaveFile(sourceAudio);
_speechRecognitionEngine.Recognize();
return ReadResult;
}And with that were done.
Here is a complete copy of the finished class:
public class SpeechReconizer
{
SpeechRecognitionEngine _speechRecognitionEngine;
public SpeechReconitionResult ReadResult { get; set; }
public SpeechReconizer()
{
Grammar dictation = new DictationGrammar();
dictation.Name = "Dictation Grammar";
_speechRecognitionEngine = new SpeechRecognitionEngine();
_speechRecognitionEngine.LoadGrammar(dictation);
_speechRecognitionEngine.BabbleTimeout = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.EndSilenceTimeout = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.EndSilenceTimeoutAmbiguous = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.InitialSilenceTimeout = TimeSpan.FromSeconds(10.0);
_speechRecognitionEngine.SpeechRecognized += RecognizerSpeechRecognized;
_speechRecognitionEngine.RecognizeCompleted += RecognizerRecognizeCompleted;
}
public SpeechReconitionResult ReadSpeech(string sourceAudio)
{
ReadResult = new SpeechReconitionResult();
_speechRecognitionEngine.SetInputToWaveFile(sourceAudio);
_speechRecognitionEngine.Recognize();
return ReadResult;
}
private void RecognizerSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
if (e.Result != null && e.Result.Text != null)
{
ReadResult.Success = true;
ReadResult.Text = e.Result.Text;
}
else
{
ReadResult.Text = "Recognized text not available.";
}
}
private void RecognizerRecognizeCompleted(object sender, RecognizeCompletedEventArgs e)
{
if (e.Error != null)
{
ReadResult.Success = false;
ReadResult.ErrorMessage = string.Format("{0}: {1}",
e.Error.GetType().Name, e.Error.Message);
}
if (e.Cancelled)
{
ReadResult.Success = false;
ReadResult.ErrorMessage = "Operation cancelled.";
}
}
}As with all speech recognition engines it’s not perfect but it does a pretty good job – in my testing it’s pretty fast, and most of all it’s native to C# and
is a much better choice than using some third part vendor to try and link this functionality into your application.
if you like this guide you can find more like it over at my blog: http://www.sympletech.com .