Project

General

Profile

Template
reco_vosk.cpp
Go to the documentation of this file.
1/*
2-----------------------------------------------------------------------------
3This source file is part of OpenSpace3D
4For the latest info, see http://www.openspace3d.com
5
6Copyright (c) 2012 I-maginer
7
8This program is free software; you can redistribute it and/or modify it under
9the terms of the GNU Lesser General Public License as published by the Free Software
10Foundation; either version 2 of the License, or (at your option) any later
11version.
12
13This program is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16
17You should have received a copy of the GNU Lesser General Public License along with
18this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19Place - Suite 330, Boston, MA 02111-1307, USA, or go to
20http://www.gnu.org/copyleft/lesser.txt
21
22-----------------------------------------------------------------------------
23*/
24
34#include "reco_vosk.h"
35
36#define DEF_SAMPLES 16000
37#define FRAME_SIZE 320
38#define MAX_RECOGNITION_TIME 2500
39#define MAX_BUFFER DEF_SAMPLES*10
40#define DEF_BUFFER DEF_SAMPLES
41
42#include <iostream>
43#include <sstream>
44#include <algorithm>
45
46#include "nlohmann/json.hpp"
47
48using json = nlohmann::json;
49
50#ifdef _WIN32
51#include <windows.h>
52
53#define P_SLASH "\\"
54
55bool fileExists(const std::string& filePath)
56{
57 DWORD fileAttributes = GetFileAttributes(filePath.c_str());
58 return (fileAttributes != INVALID_FILE_ATTRIBUTES);
59}
60#else
61#include <iostream>
62#include <sys/stat.h>
63
64bool fileExists(const std::string& filePath)
65{
66 struct stat buffer;
67 return (stat(filePath.c_str(), &buffer) == 0);
68}
69#endif
70
71#define P_SLASH "/"
72
73#ifdef ANDROID
74#include <android/asset_manager.h>
75#include <array>
76
77void copyAssetFile(AAssetManager* manager, const std::string& assetFilePath, const std::string& destFilePath)
78{
79 AAsset* asset = AAssetManager_open(manager, assetFilePath.c_str(), AASSET_MODE_STREAMING);
80 if (asset == nullptr)
81 {
82 // Handle error opening the asset file
83 return;
84 }
85
86 // Get the file size
87 off_t assetSize = AAsset_getLength(asset);
88
89 // Open the destination file
90 std::ofstream destFile(destFilePath, std::ios::out | std::ios::binary);
91 if (!destFile.is_open())
92 {
93 // Handle error opening the destination file
94 AAsset_close(asset);
95 return;
96 }
97
98 // Set the buffer size for reading and writing chunks
99 const int bufferSize = 8192; // 8 KB
100 std::array<char, bufferSize> buffer;
101
102 // Copy the asset file in chunks
103 while (true)
104 {
105 int bytesRead = AAsset_read(asset, buffer.data(), bufferSize);
106 if (bytesRead <= 0) {
107 // End of file or error occurred
108 break;
109 }
110
111 destFile.write(buffer.data(), bytesRead);
112 }
113
114 // Close the destination file
115 destFile.close();
116
117 // Close the asset file
118 AAsset_close(asset);
119}
120
121void copyAssetDirectory(AAssetManager* manager, const std::string& assetDir, const std::string& destDir)
122{
123 // Create the destination directory
124 mkdir(destDir.c_str(), S_IRWXU | S_IRWXG | S_IRWXO);
125
126 AAssetDir* assetDirObj = AAssetManager_openDir(manager, assetDir.c_str());
127 if (assetDirObj == nullptr)
128 {
129 // Handle error opening the asset directory
130 return;
131 }
132
133 const char* fileName = nullptr;
134 while ((fileName = AAssetDir_getNextFileName(assetDirObj)) != nullptr)
135 {
136 std::string assetFilePath = assetDir + P_SLASH + std::string(fileName);
137 std::string destFilePath = destDir + P_SLASH + std::string(fileName);
138
139 copyAssetFile(manager, assetFilePath, destFilePath);
140 }
141
142 // Close the asset directory
143 AAssetDir_close(assetDirObj);
144}
145
146
147std::string android_extract_dir(std::string dir)
148{
149 std::string assetDir = dir;
150 std::string destDir;
151
152 std::transform(assetDir.begin(), assetDir.end(), assetDir.begin(), ::tolower);
153 if (assetDir.compare(0, 4, "apk/") != 0)
154 return assetDir;
155
156 assetDir = assetDir.substr(4).c_str();
157
158 struct android_app* mApp = (struct android_app*)SCgetExtra("this_inst");
159
160 const char* internalPath = mApp->activity->internalDataPath;
161 destDir = std::string(internalPath) + P_SLASH + assetDir;
162
163 AAssetManager* manager = mApp->activity->assetManager;
164
165 if (fileExists(destDir))
166 return destDir;
167
168 //Creates destination directories
169 std::string subPath;
170 for (const char& c : destDir)
171 {
172 subPath += c;
173 if (c == '/')
174 mkdir(subPath.c_str(), S_IRWXU | S_IRWXG | S_IRWXO);
175 }
176
177 copyAssetDirectory(manager, assetDir, destDir);
178 copyAssetDirectory(manager, assetDir + P_SLASH + "am", destDir + P_SLASH + "am");
179 copyAssetDirectory(manager, assetDir + P_SLASH + "conf", destDir + P_SLASH + "conf");
180 copyAssetDirectory(manager, assetDir + P_SLASH + "graph", destDir + P_SLASH + "graph");
181 copyAssetDirectory(manager, assetDir + P_SLASH + "ivector", destDir + P_SLASH + "ivector");
182
183 return destDir;
184}
185#endif //ANDROID
186
187std::string getDirectoryFromFilePath(const std::string& filePath)
188{
189 size_t found = filePath.find_last_of("/\\");
190
191 if (found != std::string::npos)
192 return filePath.substr(0, found);
193
194 return "";
195}
196
197std::string getFileNameWithoutExtension(const std::string& filePath)
198{
199 size_t lastSlash = filePath.find_last_of("/\\");
200 size_t lastDot = filePath.find_last_of(".");
201 if (lastDot != std::string::npos && (lastSlash == std::string::npos || lastDot > lastSlash))
202 return filePath.substr(lastSlash + 1, lastDot - lastSlash - 1);
203
204 return filePath.substr(lastSlash + 1);
205}
206
207std::string getValueFromJson(const std::string& jsonString, const std::string& key)
208{
209 try
210 {
211 json jsonData = json::parse(jsonString);
212 if (jsonData.contains(key))
213 {
214 return jsonData[key].get<std::string>();
215 }
216 else
217 {
218 // Key not found
219 return "";
220 }
221 }
222 catch (const json::parse_error&)
223 {
224 // JSON parsing error
225 return "";
226 }
227}
228
229int countWords(const std::string& text)
230{
231 std::istringstream iss(text);
232 int wordCount = 0;
233 std::string word;
234
235 while (iss >> word)
236 ++wordCount;
237
238 return wordCount;
239}
240
242{
243 mValid = false;
244 mInSpeech = false;
245 mBuffer = 0;
246 mModel = NULL;
247 mRecognizer = NULL;
248 mTimeOutSamples = MAX_RECOGNITION_TIME * DEF_SAMPLES / 1000;
249}
250
252Recognition::Recognition(std::string pathtobin, std::string lang)
253{
254 mValid = false;
255 mInSpeech = false;
256 mBuffer = NULL;
257 mModel = NULL;
258 mRecognizer = NULL;
259 mKeySearch = false;
260 mTimeOutSamples = MAX_RECOGNITION_TIME * DEF_SAMPLES / 1000;
261
262 std::string pathDir = getDirectoryFromFilePath(pathtobin);
263
264#ifdef ANDROID
265 //Extract the models in a place that kaldi can read on android
266 pathDir = android_extract_dir(pathDir);
267#endif
268
269 mModel = vosk_model_new(pathDir.c_str());
270 if (mModel != NULL)
271 mRecognizer = vosk_recognizer_new(mModel, DEF_SAMPLES);
272
273 if (mRecognizer != NULL)
274 mValid = true;
275
276 if (!mValid)
277 {
278 if (mRecognizer != NULL)
279 vosk_recognizer_free(mRecognizer);
280
281 if (mModel != NULL)
282 vosk_model_free(mModel);
283
284 mModel = NULL;
285 mRecognizer = NULL;
286 }
287 else
288 {
289 mBuffer = new Buffer(MAX_BUFFER);
290 mThread = std::thread(std::bind(&Recognition::cbThread, this));
291 }
292}
293
296{
297 mValid = false;
298 if (mThread.joinable())
299 mThread.join();
300
301 if (mRecognizer != NULL)
302 vosk_recognizer_free(mRecognizer);
303
304 if (mModel != NULL)
305 vosk_model_free(mModel);
306
307 SAFE_DELETE(mBuffer);
308}
309
310
311void Recognition::fillAudioBuffer(const char* data, size_t lenght)
312{
313 if (mBuffer == NULL)
314 return;
315
316 mBuffer->fill(data, lenght);
317}
318
320{
321 int final = 0;
322 //consume buffer and send callbacks
323 char buffer[DEF_BUFFER];
324 int dataSize = 0;
325 int frameSize = 0;
326 int bufferCursor = 0;
327
328 int remainingSamples = mTimeOutSamples;
329 std::string lastPartial;
330
331 while (mValid)
332 {
333 const std::lock_guard<std::mutex> lock(mMutexConfig);
334
335 // Check if recognition has taken too long
336 if (remainingSamples <= 0)
337 {
338 if (mInSpeech)
339 OBJpostEvent(RECOGNITION_END_CB, SCOL_PTR this, 0);
340
341 // Reset recognition
342 vosk_recognizer_reset(mRecognizer);
343
344 mInSpeech = false;
345 lastPartial = "";
346 remainingSamples = mTimeOutSamples;
347 dataSize = 0;
348 }
349
350 //nothing to consume
351 if (!mBuffer->getCount())
352 {
353 std::this_thread::sleep_for(std::chrono::milliseconds(1));
354 continue;
355 }
356
357 //if (mBuffer->getCount() > (DEF_SAMPLES * 5))
358 // mBuffer->consume_safe(DEF_SAMPLES * 4);
359
360 if (dataSize == 0)
361 {
362 dataSize = mBuffer->ProcessBuffer(buffer, (size_t)DEF_BUFFER, 0.01f);
363 bufferCursor = 0;
364 remainingSamples = mTimeOutSamples;
365
366 if (dataSize == 0)
367 {
368 remainingSamples = 0;
369 continue;
370 }
371 }
372
373 frameSize = std::min(dataSize, FRAMESIZE);
374 dataSize -= frameSize;
375 remainingSamples -= frameSize;
376
377 final = vosk_recognizer_accept_waveform(mRecognizer, buffer + bufferCursor, frameSize);
378 bufferCursor += frameSize;
379
380 if (!final)
381 {
382 std::string partial = getValueFromJson(vosk_recognizer_partial_result(mRecognizer), "partial").c_str();
383 if (!partial.empty() && (partial != "[unk]"))
384 {
385 if (!mInSpeech)
386 OBJpostEvent(RECOGNITION_START_CB, SCOL_PTR this, 0);
387
388 if (lastPartial != partial)
389 {
390 cbData* cbdataText = new cbData(from_utf8(partial));
391 OBJpostEvent(RECOGNITION_TEXT_CB, SCOL_PTR this, SCOL_PTR cbdataText);
392 }
393
394 lastPartial = partial;
395 mInSpeech = true;
396 }
397 }
398 else
399 {
400 std::string result = getValueFromJson(vosk_recognizer_result(mRecognizer), "text").c_str();
401
402 if (!result.empty() && (result != "[unk]"))
403 {
404 if (!mInSpeech)
405 OBJpostEvent(RECOGNITION_START_CB, SCOL_PTR this, 0);
406
407 if (lastPartial != result)
408 {
409 cbData* cbdataText = new cbData(from_utf8(result));
410 OBJpostEvent(RECOGNITION_TEXT_CB, SCOL_PTR this, SCOL_PTR cbdataText);
411 }
412
413 OBJpostEvent(RECOGNITION_END_CB, SCOL_PTR this, 0);
414 mInSpeech = false;
415
416 remainingSamples = mTimeOutSamples;
417
418 dataSize = 0;
419 }
420 else
421 {
422 if (mInSpeech)
423 OBJpostEvent(RECOGNITION_END_CB, SCOL_PTR this, 0);
424
425 mInSpeech = false;
426 continue;
427 }
428
429 lastPartial = "";
430 }
431 }
432}
433
434
436void Recognition::AddWord(std::string s_Rule, std::string s_Word)
437{
438 const std::lock_guard<std::mutex> lock(mMutexConfig);
439 mKeyWords.push_back(s_Word);
440
441 std::string wlist = "[";
442 for (unsigned int i = 0; i < mKeyWords.size(); i++)
443 {
444 std::string w = mKeyWords[i];
445 std::transform(w.begin(), w.end(), w.begin(), ::tolower);
446 if (i == 0)
447 wlist += "\"" + to_utf8(w) + "\"";
448 else
449 wlist += ",\"" + to_utf8(w) + "\"";
450 }
451 if (mKeyWords.size() > 0)
452 wlist += ", \"[unk]\"";
453
454 wlist += "]";
455
456 vosk_recognizer_set_grm(mRecognizer, wlist.c_str());
457}
458
461{
462 return mValid;
463}
464
467{
468 return -1;
469}
470
471
473void Recognition::setVolume(int volume)
474{
475}
size_t getCount() const
size_t ProcessBuffer(char *buffer, size_t length, float threshold)
Definition reco_vosk.h:173
void fill(const char *newData, size_t newDataSize)
Definition reco_sphinx.h:60
void setVolume(int volume)
Set The volume for the recognition.
Definition reco.cpp:340
void AddWord(std::string s_Rule, std::string s_Word)
Add word to recognition.
Definition reco.cpp:66
void fillAudioBuffer(const char *data, size_t lenght)
Recognition()
utils libraries
Definition reco.cpp:37
bool initializeObjects()
Init Recognition Objects.
Definition reco.cpp:123
int getVolume()
Get The volume from recognition.
Definition reco.cpp:327
~Recognition()
Recognition Destructor.
Definition reco.cpp:54
basic_json<> json
default specialization
Definition json.hpp:3408
int RECOGNITION_TEXT_CB
Definition plugin.cpp:102
int RECOGNITION_START_CB
Definition plugin.cpp:105
int RECOGNITION_END_CB
Definition plugin.cpp:99
std::string getDirectoryFromFilePath(const std::string &filePath)
#define MAX_BUFFER
Definition reco_sphinx.h:49
#define DEF_BUFFER
Definition reco_vosk.cpp:40
bool fileExists(const std::string &filePath)
Definition reco_vosk.cpp:64
std::string getDirectoryFromFilePath(const std::string &filePath)
int countWords(const std::string &text)
#define DEF_SAMPLES
utils libraries
Definition reco_vosk.cpp:36
#define P_SLASH
Definition reco_vosk.cpp:71
#define MAX_RECOGNITION_TIME
Definition reco_vosk.cpp:38
std::string getValueFromJson(const std::string &jsonString, const std::string &key)
std::string getFileNameWithoutExtension(const std::string &filePath)