Project

General

Profile

Template
reco_sphinx.cpp
Go to the documentation of this file.
1/*
2-----------------------------------------------------------------------------
3This source file is part of OpenSpace3D
4For the latest info, see http://www.openspace3d.com
5
6Copyright (c) 2012 I-maginer
7
8This program is free software; you can redistribute it and/or modify it under
9the terms of the GNU Lesser General Public License as published by the Free Software
10Foundation; either version 2 of the License, or (at your option) any later
11version.
12
13This program is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16
17You should have received a copy of the GNU Lesser General Public License along with
18this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19Place - Suite 330, Boston, MA 02111-1307, USA, or go to
20http://www.gnu.org/copyleft/lesser.txt
21
22-----------------------------------------------------------------------------
23*/
24
34#include "reco_sphinx.h"
35
36#define MAX_BUFFER 262144
37
38#include <iostream>
39#include <sstream>
40#include <algorithm>
41
42#ifdef _WIN32
43#include <windows.h>
44
45#define P_SLASH "\\"
46
47bool fileExists(const std::string& filePath)
48{
49 DWORD fileAttributes = GetFileAttributes(filePath.c_str());
50 return (fileAttributes != INVALID_FILE_ATTRIBUTES);
51}
52#else
53#include <iostream>
54#include <sys/stat.h>
55
56#define P_SLASH "/"
57
58bool fileExists(const std::string& filePath)
59{
60 struct stat buffer;
61 return (stat(filePath.c_str(), &buffer) == 0);
62}
63#endif
64
65std::string getDirectoryFromFilePath(const std::string& filePath)
66{
67 size_t found = filePath.find_last_of("/\\");
68
69 if (found != std::string::npos)
70 return filePath.substr(0, found);
71
72 return "";
73}
74
75std::string getFileNameWithoutExtension(const std::string& filePath)
76{
77 size_t lastSlash = filePath.find_last_of("/\\");
78 size_t lastDot = filePath.find_last_of(".");
79 if (lastDot != std::string::npos && (lastSlash == std::string::npos || lastDot > lastSlash))
80 return filePath.substr(lastSlash + 1, lastDot - lastSlash - 1);
81
82 return filePath.substr(lastSlash + 1);
83}
84
85int countWords(const std::string& text)
86{
87 std::istringstream iss(text);
88 int wordCount = 0;
89 std::string word;
90
91 while (iss >> word)
92 ++wordCount;
93
94 return wordCount;
95}
96
98{
99 mValid = false;
100 mBuffer = 0;
101 mEndPointer = NULL;
102 mDecoder = NULL;
103 mConfig = NULL;
104}
105
107Recognition::Recognition(std::string pathtobin, std::string lang)
108{
109 mValid = false;
110 mBuffer = NULL;
111 mEndPointer = NULL;
112 mDecoder = NULL;
113 mKeySearch = false;
114
115 mConfig = ps_config_init(NULL);
116
117 std::string pathDir = getDirectoryFromFilePath(pathtobin);
118 std::string pathToDict = pathDir + P_SLASH + getFileNameWithoutExtension(pathtobin) + ".dict";
119
120 if (fileExists(pathtobin) && fileExists(pathToDict) && fileExists(pathDir + P_SLASH + lang))
121 {
122 ps_config_set_int(mConfig, "samprate", 16000);
123 //ps_config_set_int(mConfig, "maxwpf", 5);
124 ps_config_set_int(mConfig, "pl_window", 3);
125 ps_config_set_int(mConfig, "topn", 3);
126 ps_config_set_str(mConfig, "hmm", (pathDir + P_SLASH + lang).c_str()); // path to lang
127 ps_config_set_str(mConfig, "lm", pathtobin.c_str());
128 ps_config_set_str(mConfig, "dict", pathToDict.c_str());
129
130 mDecoder = ps_init(mConfig);
131 mEndPointer = ps_endpointer_init(0, 0.0, PS_VAD_MEDIUM_LOOSE, 0, 0);
132 if ((mDecoder != NULL) && (mEndPointer != NULL))
133 {
134 mFrameSize = ps_endpointer_frame_size(mEndPointer);
135 mValid = true;
136 }
137 }
138
139 if (!mValid)
140 {
141 if (mEndPointer != NULL)
142 ps_endpointer_free(mEndPointer);
143 mEndPointer = NULL;
144
145 if (mDecoder != NULL)
146 ps_free(mDecoder);
147 mDecoder = NULL;
148
149 if (mConfig != NULL)
150 ps_config_free(mConfig);
151 mConfig = NULL;
152 }
153 else
154 {
155 mBuffer = new Buffer(MAX_BUFFER);
156 mThread = std::thread(std::bind(&Recognition::cbThread, this));
157 }
158}
159
162{
163 mValid = false;
164 if (mThread.joinable())
165 mThread.join();
166
167 if (mEndPointer != NULL)
168 ps_endpointer_free(mEndPointer);
169
170 if (mDecoder != NULL)
171 ps_free(mDecoder);
172
173 if (mConfig != NULL)
174 ps_config_free(mConfig);
175
176 SAFE_DELETE(mBuffer);
177}
178
179
180void Recognition::fillAudioBuffer(const char* data, size_t lenght)
181{
182 if (mBuffer == NULL)
183 return;
184
185 const std::lock_guard<std::mutex> lock(mMutex);
186 mBuffer->fill(data, lenght);
187}
188
190{
191 //consume buffer and send callbacks
192 const int16 *speech;
193 int16 buffer[MAX_BUFFER];
194
195 while (mValid)
196 {
197 const std::lock_guard<std::mutex> lock(mMutexConfig);
198 int prev_in_speech = ps_endpointer_in_speech(mEndPointer);
199
200 //nothing to consume
201 if (mBuffer->getCount() < mFrameSize)
202 {
203 std::this_thread::sleep_for(std::chrono::milliseconds(10));
204 continue;
205 }
206
207 //todo fill buffer from char * with convertCharToInt16
208 {
209 const std::lock_guard<std::mutex> lock(mMutex);
210 mBuffer->getInt16(buffer, mFrameSize);
211 }
212 speech = ps_endpointer_process(mEndPointer, buffer);
213
214 if (speech != NULL)
215 {
216 const char *hyp;
217 if (!prev_in_speech)
218 {
219 OBJpostEvent(RECOGNITION_START_CB, SCOL_PTR this, 0);
220 ps_start_utt(mDecoder);
221 }
222
223 if (ps_process_raw(mDecoder, speech, mFrameSize, FALSE, FALSE) < 0)
224 {
225 std::this_thread::sleep_for(std::chrono::milliseconds(10));
226 continue;
227 }
228
229 if (!ps_endpointer_in_speech(mEndPointer))
230 {
231 ps_end_utt(mDecoder);
232
233 if ((hyp = ps_get_hyp(mDecoder, NULL)) != NULL)
234 {
235 if (hyp == NULL)
236 continue;
237
238 cbData* cbdataText = new cbData(from_utf8(hyp));
239 OBJpostEvent(RECOGNITION_TEXT_CB, SCOL_PTR this, SCOL_PTR cbdataText);
240 }
241
242 // to get the best 10 results if there are engough
243 std::list<pTextRec*> lPhraseRec;
244 ps_nbest_t* nbest = ps_nbest(mDecoder);
245 int32 score;
246 for (int n = 0; nbest && n < 10; nbest = ps_nbest_next(nbest), n++)
247 {
248 hyp = ps_nbest_hyp(nbest, &score);
249 if (hyp == NULL)
250 continue;
251
252 pTextRec* paramTextRec = new pTextRec();
253 paramTextRec->altText = from_utf8(hyp);
254 paramTextRec->altConfidence = (float)score * 0.01; // ?? what is score value range ?
255 lPhraseRec.push_back(paramTextRec);
256 }
257
258 if (!lPhraseRec.empty())
259 {
260 cbData* cbdataTextAlt = new cbData(lPhraseRec);
261 OBJpostEvent(RECOGNITION_TEXTS_ALT_CB, SCOL_PTR this, SCOL_PTR cbdataTextAlt);
262 }
263
264 OBJpostEvent(RECOGNITION_END_CB, SCOL_PTR this, 0);
265 }
266 }
267
268 std::this_thread::sleep_for(std::chrono::milliseconds(10));
269 }
270}
271
272
274void Recognition::AddWord(std::string s_Rule, std::string s_Word)
275{
276 const std::lock_guard<std::mutex> lock(mMutexConfig);
277 mKeyWords.push_back(s_Word);
278
279 std::string wlist;
280 for (unsigned int i = 0; i < mKeyWords.size(); i++)
281 {
282 std::string w = mKeyWords[i];
283 std::transform(w.begin(), w.end(), w.begin(), ::tolower);
284 int wcnt = countWords(w);
285 wlist = wlist + to_utf8(w) + " /1e-" + std::to_string(std::min(wcnt * 10, 60)) + "/\n";
286 }
287
288 ps_add_keyphrase(mDecoder, "keyword", wlist.c_str());
289 ps_activate_search(mDecoder, "keyword");
290
291 mKeySearch = true;
292}
293
296{
297 return mValid;
298}
299
302{
303 return -1;
304}
305
306
308void Recognition::setVolume(int volume)
309{
310}
void getInt16(int16_t *int16Array, size_t length)
Definition reco_sphinx.h:85
size_t getCount() const
void fill(const char *newData, size_t newDataSize)
Definition reco_sphinx.h:60
void setVolume(int volume)
Set The volume for the recognition.
Definition reco.cpp:340
void AddWord(std::string s_Rule, std::string s_Word)
Add word to recognition.
Definition reco.cpp:66
void fillAudioBuffer(const char *data, size_t lenght)
Recognition()
utils libraries
Definition reco.cpp:37
bool initializeObjects()
Init Recognition Objects.
Definition reco.cpp:123
int getVolume()
Get The volume from recognition.
Definition reco.cpp:327
~Recognition()
Recognition Destructor.
Definition reco.cpp:54
int RECOGNITION_TEXT_CB
Definition plugin.cpp:102
int RECOGNITION_START_CB
Definition plugin.cpp:105
int RECOGNITION_TEXTS_ALT_CB
Definition plugin.cpp:108
int RECOGNITION_END_CB
Definition plugin.cpp:99
bool fileExists(const std::string &filePath)
std::string getDirectoryFromFilePath(const std::string &filePath)
int countWords(const std::string &text)
#define P_SLASH
std::string getFileNameWithoutExtension(const std::string &filePath)
#define MAX_BUFFER
Definition reco_sphinx.h:49