Project

General

Profile

Template
reco.cpp
Go to the documentation of this file.
1/*
2-----------------------------------------------------------------------------
3This source file is part of OpenSpace3D
4For the latest info, see http://www.openspace3d.com
5
6Copyright (c) 2012 I-maginer
7
8This program is free software; you can redistribute it and/or modify it under
9the terms of the GNU Lesser General Public License as published by the Free Software
10Foundation; either version 2 of the License, or (at your option) any later
11version.
12
13This program is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16
17You should have received a copy of the GNU Lesser General Public License along with
18this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19Place - Suite 330, Boston, MA 02111-1307, USA, or go to
20http://www.gnu.org/copyleft/lesser.txt
21
22-----------------------------------------------------------------------------
23*/
24
34#include "reco.h"
35
38{
39 m_bGotReco = FALSE;
40 m_bInSound = FALSE;
41 cpAudio = NULL;
42}
43
44
45Recognition::Recognition(std::string pathtobin, std::string lang)
46{
47 m_bGotReco = FALSE;
48 m_bInSound = FALSE;
49 cpAudio = NULL;
50}
51
52
55{
56 if (cpAudio)
57 cpAudio->SetState(SPAS_CLOSED, 0);
58
59 cpRecoContext.Release();
60 cpRecoGrammar.Release();
61 pRecog.Release();
62}
63
64
66void Recognition::AddWord(std::string s_Rule, std::string s_Word)
67{
68 if (!cpRecoGrammar)
69 return;
70
71 // Declare local identifiers:
72 HRESULT hr = S_OK;
73 SPSTATEHANDLE hStateTravel = NULL;
74
75 wchar_t* w_Rule = convertCharToLPCWSTR((char*)s_Rule.c_str());
76
77 // Retrieve grammar rule's initial state
78 hr = cpRecoGrammar->GetRule(w_Rule, 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel);
79 SAFE_DELETE(w_Rule);
80 if (FAILED(hr))
81 {
82 MMechostr(MSKDEBUG,"Error: Can't add word\n");
83 return;
84 }
85
86 wchar_t* w_Word = convertCharToLPCWSTR((char*)s_Word.c_str());
87
88 // Add a word to the grammar
89 hr = cpRecoGrammar->AddWordTransition(hStateTravel, NULL, w_Word, L" ", SPWT_LEXICAL, 1, NULL);
90 SAFE_DELETE(w_Word);
91
92 if (FAILED(hr))
93 {
94 MMechostr(MSKDEBUG,"Error: Can't add word\n");
95 return;
96 }
97
98 // Update the SR engine's language model
99 hr = cpRecoGrammar->Commit(NULL);
100
101 if (FAILED(hr))
102 {
103 MMechostr(MSKDEBUG,"Error: Can't add word\n");
104 return;
105 }
106
107 // Enable the grammar to let applications receive notifications about recognition
108 hr = cpRecoGrammar->SetGrammarState(SPGS_ENABLED);
109
110 if (FAILED(hr))
111 {
112 MMechostr(MSKDEBUG,"Error: Can't add word\n");
113 return;
114 }
115
116 // Activate a new rule
117 // $AS: A rule should be specified by its name, and the second parameter must always be set to NULL
118 hr = cpRecoGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
119}
120
121
124{
125 HRESULT hr;
126
127 // Initialize a new instance of Speech Recognition engine
128 hr = pRecog.CoCreateInstance(CLSID_SpInprocRecognizer);
129 if (FAILED(hr))
130 {
131 MMechostr(MSKDEBUG,"Error: Can't create SAPI Speech Recognizer (ISpRecognizer)\n") ;
132 return false;
133 }
134
135 // Create a new context of SR engine
136 hr = pRecog->CreateRecoContext(&cpRecoContext);
137 if (hr != S_OK)
138 {
139 MMechostr(MSKDEBUG,"Error: Cannot create SAPI Recognition Context (ISpRecoContext)");
140 return false;
141 }
142
143 // Get an object token of AudioInput type (mic)
144 hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
145 if (!SUCCEEDED(hr))
146 {
147 MMechostr(MSKDEBUG,"Error: Can't create default audio object\n");
148 return false;
149 }
150
151 // Specify the input stream which must be used by the SR engin (in our case, audio input from mic)
152 hr = pRecog->SetInput(cpAudio, TRUE);
153 if (!SUCCEEDED(hr))
154 {
155 MMechostr(MSKDEBUG,"Error: Can't init audio object\n");
156 return false;
157 }
158
159 hr = cpAudio->SetVolumeLevel(1000);
160 if (!SUCCEEDED(hr))
161 {
162 MMechostr(MSKDEBUG,"Error: volume reco\n");
163 return false;
164 }
165
166 hr = cpRecoContext->SetAudioOptions(SPAO_NONE, NULL, NULL);
167 if (!SUCCEEDED(hr))
168 return false;
169
170 // Enable the speech recognition engine
171 hr = pRecog->SetRecoState(SPRST_ACTIVE);
172 if (!SUCCEEDED(hr))
173 {
174 MMechostr(MSKDEBUG,"Error: Can't active recognition\n");
175 return false;
176 }
177
178 // grammar
179 if (SUCCEEDED(hr))
180 {
181 // Create a new grammar object
182 if (hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar) != S_OK)
183 {
184 MMechostr(MSKDEBUG,"Error: Failed to create grammar\n");
185 return false;
186 }
187 else
188 {
189 // Load a dictation topic into the SR engine
190 // The grammar is loaded statically (rules can't be modified in real-time)
191 hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC);
192
193 // Enable the dictation topic to be notified of recognitions
194 if (SUCCEEDED(hr))
195 cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
196 }
197 }
198
199 // Set up the SR instance to send notifications
200 hr = cpRecoContext->SetNotifyCallbackFunction(&sapi_callback, 0, LPARAM(this));
201 if (!SUCCEEDED(hr))
202 {
203 MMechostr(MSKDEBUG,"Error: Cannot set notify callback function. (SetNofifyCallbackFunction)");
204 return false;
205 }
206
207 // Set which type of events the engine must be notified of
208 // If SetInterest() is not called, the SR engine defaults to SPEI_RECOGNITION as the only event interest
209 const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | // Audible sound is available through the input stream
210 SPFEI(SPEI_SOUND_END) | // Audible sound is no longer available, or sound stream has been inactive for a while
211 SPFEI(SPEI_RECOGNITION); // A full recognition has been returned
212 hr = cpRecoContext->SetInterest(ullInterest, ullInterest);
213
214 if (!SUCCEEDED(hr))
215 {
216 MMechostr(MSKDEBUG,"Error: Cannot correctly set notifications for the Speech Recognizer");
217 return false;
218 }
219
220 return (hr == S_OK) ? true : false;
221}
222
223
225void _stdcall Recognition::sapi_callback(WPARAM wParam, LPARAM lParam)
226{
227 Recognition* pThis = (Recognition*)lParam;
228 pThis->callbackEventReco();
229}
230
231
234{
235 USES_CONVERSION;
236 CSpEvent evt;
237
238 // Clear the current instance, and retrieve the next event from the message queue
239 while (evt.GetFrom(cpRecoContext) == S_OK)
240 {
241 switch (evt.eEventId)
242 {
243 case SPEI_SOUND_START:
244 {
245 m_bInSound = TRUE;
246 OBJpostEvent(RECOGNITION_START_CB, SCOL_PTR this, 0);
247 }
248 break;
249
250 case SPEI_SOUND_END:
251 {
252 if (m_bInSound)
253 {
254 m_bInSound = FALSE;
255 OBJpostEvent(RECOGNITION_END_CB, SCOL_PTR this, 0);
256 m_bGotReco = FALSE;
257 }
258 }
259 break;
260
261 case SPEI_RECOGNITION:
262 {
263 HRESULT hr = S_OK;
264 const USHORT MY_MAX_ALTERNATES = 10;
265 CComPtr<ISpPhraseAlt> pcpPhraseAlt[MY_MAX_ALTERNATES];
266 SPPHRASE* pPhrase;
267 std::string betterResult;
268 float ConfidenceMax = 0.0;
269 ULONG ulCount;
270 std::list<pTextRec*> lPhraseRec;
271 //std::list<std::string> lWordsRec;
272
273 // Retrieve information about the recognized phrase
274 hr = evt.RecoResult()->GetPhrase(&pPhrase);
275 if (SUCCEEDED(hr))
276 {
277 // Retrieve a list of MY_MAX_ALTERNATES alternative phrases related to the recognized phrase
278 hr = evt.RecoResult()->GetAlternates(pPhrase->Rule.ulFirstElement,
279 pPhrase->Rule.ulCountOfElements,
280 MY_MAX_ALTERNATES,
281 (ISpPhraseAlt**) pcpPhraseAlt,
282 &ulCount);
283 }
284 if (SUCCEEDED(hr))
285 {
286 // Browse the list of alternative phrases in order of highest likelyhood with the original phrase
287 for (unsigned int i = 0; i < ulCount; i++)
288 {
289 SPPHRASE* pPhraseAlt;
290 CSpDynamicString pwszAlternate;
291
292 // Retrieve information about the current alternative phrase
293 pcpPhraseAlt[i]->GetPhrase(&pPhraseAlt);
294
295 // Get the phrase's entire text string
296 hr = pcpPhraseAlt[i]->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pwszAlternate, NULL);
297 if (SUCCEEDED(hr))
298 {
299 float confidence = pPhraseAlt->pElements->SREngineConfidence;
300 pTextRec* paramTextRec = new pTextRec();
301 std::string content = pwszAlternate.CopyToChar();
302 paramTextRec->altText = content;
303 paramTextRec->altConfidence = confidence;
304 lPhraseRec.push_back(paramTextRec);
305 if (ConfidenceMax < confidence)
306 {
307 ConfidenceMax = confidence;
308 betterResult = content;
309 }
310 }
311 }
312 }
313
314 cbData* cbdataTextAlt = new cbData(lPhraseRec);
315 OBJpostEvent(RECOGNITION_TEXTS_ALT_CB, SCOL_PTR this, SCOL_PTR cbdataTextAlt);
316
317 cbData* cbdataText = new cbData(betterResult);
318 OBJpostEvent(RECOGNITION_TEXT_CB, SCOL_PTR this, SCOL_PTR cbdataText);
319 }
320 break;
321 }
322 }
323}
324
325
328{
329 HRESULT hr = S_FALSE;
330 int volume = -1;
331 hr = cpAudio->GetVolumeLevel((ULONG *)&volume);
332 if(FAILED(hr))
333 return -1;
334 else
335 return volume / 100;
336}
337
338
341{
342 HRESULT hr = S_OK;
343 hr = cpAudio->SetVolumeLevel((ULONG)(volume * 100));
344 if(FAILED(hr )) MMechostr(MSKDEBUG,"Error: volume reco\n") ;
345}
Management of the recognition class .
Definition reco.h:50
void setVolume(int volume)
Set The volume for the recognition.
Definition reco.cpp:340
void callbackEventReco()
Recognition Event Callback.
Definition reco.cpp:233
void AddWord(std::string s_Rule, std::string s_Word)
Add word to recognition.
Definition reco.cpp:66
Recognition()
utils libraries
Definition reco.cpp:37
bool initializeObjects()
Init Recognition Objects.
Definition reco.cpp:123
int getVolume()
Get The volume from recognition.
Definition reco.cpp:327
~Recognition()
Recognition Destructor.
Definition reco.cpp:54
static void _stdcall sapi_callback(WPARAM wParam, LPARAM lParam)
Defines the SAPI callback.
Definition reco.cpp:225
wchar_t * convertCharToLPCWSTR(char *s_text)
Utils Conversions.
int RECOGNITION_TEXT_CB
Definition plugin.cpp:102
int RECOGNITION_START_CB
Definition plugin.cpp:105
int RECOGNITION_TEXTS_ALT_CB
Definition plugin.cpp:108
int RECOGNITION_END_CB
Definition plugin.cpp:99