xfei_speech_recog.cpp
Go to the documentation of this file.
2 #include <boost/thread.hpp>
3 #include <fstream>
4 #include <iostream>
5 #include <thread>
7 #include "common_config.h"
8 #include "rapidjson/document.h"
10 #include "rapidjson/writer.h"
11 #include "asr/xunfei/qisr.h"
12 
13 void XfeiSpeechRecog::setAsrParams(const std::string base_path, const std::string pcm_file, const std::string params,
14  const int channel)
15 {
17  pcm_file_ = pcm_file;
18  asr_params_ = params;
19  channel_ = channel;
20 }
21 
23 {
24  int error_code = 0;
25  session_id_ = QISRSessionBegin(NULL, (const char*)asr_params_.c_str(), &error_code);
26  if (MSP_SUCCESS != error_code)
27  {
28  CommonConfig& xunfei_config = CommonConfig::get_instance();
29  std::cout << "QISRSessionBegin failed : errorcode:" << error_code << std::endl;
30  }
31 }
32 
33 // 从用于语音唤醒的pcm音频文件中读取数据
35 {
36  FileOperation pcm_file;
38 }
39 
40 void XfeiSpeechRecog::writeAudioData(const char* audio_data, unsigned int audio_len)
41 {
42  int err_code;
43  err_code = QISRAudioWrite(session_id_, (const void*)audio_data, audio_len, speech_recog_.audio_status,
45  if (MSP_SUCCESS != err_code)
46  {
47  std::cout << "QIVWAudioWrite failed! error code:" << err_code << std::endl;
49  }
50 }
51 
53 {
54  int ret = 0;
55  const char* qisr_result = NULL;
56  int rss_status = MSP_REC_STATUS_INCOMPLETE;
57  while (MSP_REC_STATUS_COMPLETE != rss_status && MSP_SUCCESS == ret)
58  {
59  qisr_result = QISRGetResult(session_id_, &rss_status, 0, &ret);
60  if (MSP_SUCCESS != ret)
61  {
62  if (MSP_SUCCESS != ret)
63  {
64  std::cout << "QISRGetResult failed ! errorcode:" << ret << std::endl;
65  }
66  }
67  if (qisr_result != NULL)
68  {
69  rec_result_ = (char*)malloc(BUFFER_SIZE);
70  if (rec_result_ == NULL)
71  {
72  std::cout << "\n Malloc failed in rec_result !!!\n";
74  }
75  rec_result_ = strncpy(rec_result_, qisr_result, BUFFER_SIZE);
76  }
77  }
78 }
79 
80 // 从双声道的音频数据中分离出单声道数据
82 {
83  struct DataBuff pcm_two_channel = pcm_data_;
84  pcm_data_ = { NULL, 0 };
85  pcm_data_.size = pcm_two_channel.size / 2;
86 
87  pcm_data_.data = new char[pcm_data_.size];
88  for (int i = 0; i < pcm_data_.size / 2; i++)
89  {
90  memcpy((uint16_t*)pcm_data_.data + i, ((uint32_t*)(pcm_two_channel.data)) + i, 2);
91  }
92  return pcm_data_;
93 }
94 
96 {
97  rec_result_ = NULL;
98  long pcm_index = 0; // pcm数据分段索引,0表示第一段音频的起始位置.
99  bool is_last_audio = false; // 是否已读取到最后一块音频的标志.
100  int ret = 0; // 错误标志位
101  int rss_status = MSP_REC_STATUS_INCOMPLETE;
102 
103  // 对iat中一些变量进行初始化.
107 
108  // 根据音频数据是单声道还是双声道选择接口
109  if (channel_ == 2)
110  {
112  }
113 
114  // 循环写入pcm数据用于语音识别,当ep_stat的状态变为MSP_EP_AFTER_SPEECH表示音频输入结束
115  // 以及pcm_data.size的大小小于0时,break.
116  std::cout << "-----------Start Recognizing--------" << std::endl;
117  while (1)
118  {
119  //定义音频数据长度,单位字节.一秒32k,6400为200ms的数据。
120  const unsigned int default_wave_len = 6400;
121  unsigned int wave_len = default_wave_len;
122 
123  //如果pcm_data.size小于6400,说明已经处理到最后一块音频.
124  if (pcm_data_.size < default_wave_len)
125  {
126  wave_len = pcm_data_.size;
127  is_last_audio = true;
128  }
129 
131  if (0 == pcm_index)
133  writeAudioData(&pcm_data_.data[pcm_index], wave_len);
134 
135  //每写入长度为wave_len的一段音频数据后,数据数组的索引加wave_len,pcm数据的长度-wave_len.
136  pcm_index += wave_len;
137  pcm_data_.size -= wave_len;
138  // 最后一块音频数据处理结束,跳出循环.
139  if (pcm_data_.size <= 0)
140  break;
142  break;
143  }
144  //检测到音频结束,写入NULL空数据主动停止识别.
146  writeAudioData(NULL, 0);
148  return rec_result_;
149 }
151 {
152  pcm_data_ = pcm_buff;
153 }
154 
155 // 一次对话结束后释放资源
157 {
158  // 结束本次语音识别
159  int ret = QISRSessionEnd(session_id_, "normal end");
160  if (MSP_SUCCESS != ret)
161  {
162  std::cout << "QISRSessionEnd failed !errorcode:" << ret << std::endl;
163  }
164  if (rec_result_)
165  {
166  free(rec_result_);
167  rec_result_ = NULL;
168  }
169  final_recog_result_ = "";
170  recog_confidence_ = 0;
171  recog_result_vector[0] = "";
172  recog_result_vector[1] = "";
173  pcm_data_ = { NULL, 0 };
174 }
175 
176 // 从完整的json语音识别结果解析出string类型的关键词和置信度,
177 // 并认为置信度低于40的识别结果属于无效结果
178 std::vector<std::string> XfeiSpeechRecog::resultFromJson()
179 {
181  doc.Parse(rec_result_);
182  if (doc.HasParseError())
183  {
184  rapidjson::ParseErrorCode code = doc.GetParseError();
185  std::cout << "JSON解析错误" << code << std::endl;
187  }
188  else
189  {
190  rapidjson::Value& vConfidence = doc["sc"];
191  recog_confidence_ = vConfidence.GetInt();
192  rapidjson::Value& wordArr = doc["ws"];
193  for (int i = 0; i < wordArr.Size(); ++i)
194  {
195  rapidjson::Value& wordUnit = wordArr[i];
196  if (wordUnit.HasMember("cw"))
197  {
198  rapidjson::Value& contentWord = wordUnit["cw"];
199  rapidjson::Value& word = contentWord[0];
200  final_recog_result_ = word["w"].GetString();
201  }
202  }
203  }
205  recog_result_vector[1] = std::to_string(recog_confidence_);
206 
207  std::cout << "Speech recognition result:" << final_recog_result_ << " |confidence:" << recog_confidence_ << std::endl;
208 
209  return recog_result_vector;
210 }
211 
212 // Fixme:测试用的,确定后记得删除
215 {
216  std::string save_file_name = pcm_file_operation.setFileName("-asr.pcm");
217  std::string save_file = base_path_ + "/cache/pcm" + save_file_name;
218  std::ofstream pcm_file(save_file, std::ofstream::binary);
219  pcm_file.write(pcm_data_.data, pcm_data_.size);
220  pcm_file.close();
221 }
222 
223 struct DataBuff XfeiSpeechRecog::recordThroughMIC(const float record_time, bool enable_audio_save)
224 {
225  int success_code = 0;
226  int errorcode;
227  std::cout << "-----------Start ASR Recording Thread --------" << std::endl;
228  // 采用默认设备获取音频
229  record_dev_id device_id = getDefaultInputDevice();
230  int errcode = 0;
231  // 使用WAVEFORMATEX结构指定pcm数据格式。
232  WAVEFORMATEX wavfmt = { WAVE_FORMAT_PCM, 1, 16000, 32000, 2, 16, sizeof(WAVEFORMATEX) };
233  if (getInputDeviceNum() == 0)
234  {
235  std::cout << "\nNo active record device find! ";
236  }
237  else
238  {
239  std::cout << "The total number of active input devices is : " << getInputDeviceNum() << std::endl;
240  }
241  // 设置myrec用于存储录音信息.
242  asr_record_ = (struct recorder*)malloc(sizeof(struct recorder));
243  if (asr_record_ == NULL)
244  {
245  std::cout << "\n Malloc failed in asr_record_ !!!\n";
246  exit(ASR_ERROR_MALLOC_FAIL);
247  }
248 
249  memset(asr_record_, 0, sizeof(struct recorder));
250 
252 
253  asr_record_->pcm_file_path = base_path_ + "/cache/pcm";
254  record_alsa.initRecord(asr_record_, device_id, &wavfmt);
256  int buf_count = 0; //分段录音计数
257  struct DataBuff record_pcm;
258 
259  // Fixme:测试用的,确定后记得删除
260  //std::ofstream outFile(base_path_ + "/cache/" + std::to_string(test_vda_count) + "test.txt", std::ios::app);
261  test_vda_count++;
262  VDADetection VDA_detec;
263  int speech_count = 0;
264  bool is_speech = false;
265  bool is_speech_end = false;
266  float backgrand_energy = 0;
267  int end_count = 0;
268  while (record_loops_ > 0)
269  {
270  float level_energy = 0;
271  record_pcm = record_alsa.startRecord();
272  if (buf_count < 15)
273  {
274  backgrand_energy = VDA_detec.levelEnergy(record_pcm, 15);
275  //outFile << backgrand_energy << std::endl;
276  }
277  else
278  {
279  level_energy = VDA_detec.levelEnergy(record_pcm, 15);
280  //outFile << level_energy << std::endl;
281  }
282 
283  if ((level_energy - backgrand_energy) > 2.2)
284  {
285  speech_count++;
286  }
287  if (speech_count >= 30)
288  {
289  is_speech = true;
290  }
291  if ((buf_count >= 450) && (is_speech == true) && (buf_count <= 550))
292  {
293  if (level_energy < (backgrand_energy + 1))
294  {
295  end_count++;
296  }
297  if (end_count >= 70)
298  {
299  is_speech_end = true;
300  }
301  }
302  if (is_speech_end == true)
303  {
304  std::cout << "speech done!" << std::endl;
305  break;
306  }
307  record_loops_--;
308  pcm_data_.data = (char*)realloc(pcm_data_.data, record_pcm.size * (buf_count + 1));
309 
310  if (pcm_data_.data == NULL)
311  {
312  std::cout << "ERROR:buf_new realloc error!" << std::endl;
314  }
315  std::memcpy(&pcm_data_.data[record_pcm.size * buf_count], record_pcm.data, record_pcm.size);
316 
317  buf_count += 1;
318  }
319  pcm_data_.size = record_pcm.size * buf_count;
320  //outFile.close();
321  // 根据选项选择是否保存录下的音频数据到文件。
322  if (enable_audio_save)
323  {
324  std::thread save_pcm_to_file(&XfeiSpeechRecog::saveRecordDataToFile, this);
325  save_pcm_to_file.detach();
326  }
327  return pcm_data_;
328 }
329 
331 {
333 }
volatile int state
Definition: linuxrec.h:46
std::string asr_params_
int MSPAPI QISRSessionEnd(const char *sessionID, const char *hints)
struct DataBuff startRecord()
Definition: linuxrec.cpp:188
void closeRecord()
Definition: linuxrec.cpp:215
struct speech_recog speech_recog_
struct recorder * asr_record_
std::string pcm_file_
void initAsr()
科大讯飞识别模块的初始化.
FileOperation pcm_file_operation
static CommonConfig & get_instance()
void uninitAsr()
一次识别结束后释放资源.
char * getRecogResultLoop()
获取完整的json语音识别结果.
unsigned short uint16_t
Definition: stdint.h:125
const char *MSPAPI QISRSessionBegin(const char *grammarList, const char *params, int *errorCode)
科大讯飞语音识别模块接口头文件. TODO: 还需要添加版权、版本等信息
int getInputDeviceNum()
Definition: linuxrec.cpp:693
const char * session_id_
static const int BUFFER_SIZE
Definition: BuildGrammar.h:18
int setRecordDuration(const float duration_time)
Definition: linuxrec.cpp:181
std::string setFileName(std::string file_type)
char * dataLoopRecog()
将全部音频数据循环写入科大讯飞接口进行识别并获取完整的json识别结果.
std::vector< std::string > recog_result_vector
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
Definition: document.h:2915
unsigned int uint32_t
Definition: stdint.h:126
void writeAudioData(const char *audio_data, unsigned int audio_len)
将pcm音频写入科大讯飞QISRAudioWrite接口.
struct DataBuff recordThroughMIC(const float record_time, bool enable_audio_save)
录音接口.
ParseErrorCode
Error code of parsing.
Definition: error.h:64
iFLY Speech Recognizer Header File
void initRecord(struct recorder *rec, record_dev_id dev, WAVEFORMATEX *fmt)
Definition: linuxrec.cpp:80
char * data
Definition: file_operation.h:8
float levelEnergy(struct DataBuff pcm_data, const int call_count)
Definition: linuxrec.cpp:36
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:3411
std::string base_path_
void setAsrParams(const std::string base_path, const std::string pcm_file, const std::string params, const int channel)
设置语音模块需外部传入的路径等参数.
record_dev_id getDefaultInputDevice()
Definition: linuxrec.cpp:680
std::string final_recog_result_
const std::string base_path
void stopRecordThroughMIC()
关闭录音设备.
RecordAlsaAPI record_alsa
int test_vda_count
const char *MSPAPI QISRGetResult(const char *sessionID, int *rsltStatus, int waitTime, int *errorCode)
void saveRecordDataToFile()
把录音数据存入/cache/pcm/目录下的pcm文件里,以次序和时间命名.
#define WAVE_FORMAT_PCM
Definition: formats.h:5
struct tWAVEFORMATEX WAVEFORMATEX
int MSPAPI QISRAudioWrite(const char *sessionID, const void *waveData, unsigned int waveLen, int audioStatus, int *epStatus, int *recogStatus)
std::string pcm_file_path
Definition: linuxrec.h:60
struct DataBuff getOneChannelData()
从双声道数据中分离出单声道数据.
std::vector< std::string > resultFromJson()
从完整的json语音识别结果中解析出需要的字符串结果和置信度值.
void getPcmFileData()
读取pcm文件里的音频数据,并将数据的内容和数据大小存入pcm_data_.
void getPCMData(struct DataBuff pcm_buff)
获取pcm数据接口.
struct DataBuff pcm_data_
struct DataBuff readFileAsDatabuffer(const std::string file_path)


xbot_talker
Author(s): wangxiaoyun
autogenerated on Sat Oct 10 2020 03:27:54