.net - Time offset of "Recognized Phrase" in Microsoft.Speech.Recognition -

September 15, 2015

using microsoft's speech api i'd have recognize specific phrase return me time in phrase detected in audio.

for example:

phrase: "i apples"

audio: 5 minutes of audio

say @ 3m30s phrase detected, i'd store information along fact phrase exists in audio. possible?

found example uses "audioposition", seems i'm looking for:

source: http://msdn.microsoft.com/en-us/library/microsoft.speech.recognition.speechrecognizedeventargs(v=office.14).aspx

using system; using system.collections.generic; using microsoft.speech.recognition;  namespace samplerecognition {   class program   {     static void main(string[] args)      // initialize speechrecognitionengine object.     {       using (speechrecognitionengine recognizer =          new speechrecognitionengine(new system.globalization.cultureinfo("en-us")))       {          // create semanticresultvalue objects contain cities , airport codes.         semanticresultvalue chicago = new semanticresultvalue("chicago", "ord");         semanticresultvalue boston = new semanticresultvalue("boston", "bos");         semanticresultvalue miami = new semanticresultvalue("miami", "mia");         semanticresultvalue dallas = new semanticresultvalue("dallas", "dfw");          // create choices object , add semanticresultvalue objects.         choices cities = new choices();         cities.add(new choices(new grammarbuilder[] { chicago, boston, miami, dallas }));          // build phrase , add semanticresultkeys.         grammarbuilder choosecities = new grammarbuilder();         choosecities.append("i want fly from");         choosecities.append(new semanticresultkey("origin", cities));         choosecities.append("to");         choosecities.append(new semanticresultkey("destination", cities));          // build grammar object grammarbuilder.         grammar bookflight = new grammar(choosecities);         bookflight.name = "book flight";          // add handler speechrecognized event.         recognizer.speechrecognized +=           new eventhandler<speechrecognizedeventargs>(recognizer_speechrecognized);          // load grammar object recognizer.         recognizer.loadgrammarasync(bookflight);          // set input recognizer.         recognizer.setinputtodefaultaudiodevice();          // start recognition.         recognizer.recognizeasync();         console.writeline("starting asynchronous recognition...");          // keep console window open.         console.readline();       }     }      // handle speechrecognized event.     static void recognizer_speechrecognized(object sender, speechrecognizedeventargs e)     {       console.writeline("recognition result summary:");       console.writeline(         "  recognized phrase: {0}\n" +         "  confidence score {1}\n" +         "  grammar used: {2}\n",         e.result.text, e.result.confidence, e.result.grammar.name);        // display semantic values in recognition result.       console.writeline("  semantic results:");       foreach (keyvaluepair<string, semanticvalue> child in e.result.semantics)       {         console.writeline("    {0} city {1}",           child.key, child.value.value ?? "null");       }       console.writeline();        // display information words in recognition result.       console.writeline("  word summary: ");       foreach (recognizedwordunit word in e.result.words)       {         console.writeline(           "    lexical form ({1})" +           " pronunciation ({0})" +           " display form ({2})",           word.pronunciation, word.lexicalform, word.displayattributes);       }        // display information audio in recognition result.       console.writeline("  input audio summary:\n" +             "    candidate phrase at:       {0} msec\n" +             "    phrase length:             {1} msec\n" +             "    input state time:          {2}\n" +             "    input format:              {3}\n",             e.result.audio.audioposition,             e.result.audio.duration,             e.result.audio.starttime,             e.result.audio.format.encodingformat);        // display information alternate recognitions in recognition result.       console.writeline("  alternate phrase collection:");       foreach (recognizedphrase phrase in e.result.alternates)       {         console.writeline("    phrase: " + phrase.text);         console.writeline("    confidence score: " + phrase.confidence);       }        // display information text replaced during normalization.       if (e.result.replacementwordunits.count != 0)       {         console.writeline("  replacement text:\n");         foreach (replacementtext rep in e.result.replacementwordunits)         {           console.writeline("      @ index {0} {1} words. text: {2}\n",           rep.firstwordindex, rep.countofwords, rep.text);         }         //label.text += string.format("\n\n");        }       else       {         console.writeline();          console.writeline("no text replaced");       }     }   } }

Search This Blog

DIs

.net - Time offset of "Recognized Phrase" in Microsoft.Speech.Recognition -

Comments

Post a Comment

Popular posts from this blog

css - Text drops down with smaller window -

php - Boolean search on database with 5 million rows, very slow -

c# - DetailsView in ASP.Net - How to add another column on the side/add a control in each row? -