asr_online.cpp
Go to the documentation of this file.
1 #include "asr/baidu/asr_online.h"
2 #include <memory.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <thread>
6 #include "rapidjson/document.h"
7 #include "rapidjson/rapidjson.h"
9 #include "rapidjson/writer.h"
11 {
13  doc.Parse(url_response_);
14  if (doc.HasParseError())
15  {
16  rapidjson::ParseErrorCode code = doc.GetParseError();
17  std::cout << "JSON解析错误" << code << std::endl;
19  }
20  else
21  {
22  rapidjson::Value& vConfidence = doc["access_token"];
23  token_ = vConfidence.GetString();
24  std::cout << "token from url_response json is :" << token_ << std::endl;
25  }
26 }
27 
28 void BaiduAsrOnline::setAsrParams(const std::string base_path, const std::string pcm_file, const int channel)
29 {
31  pcm_file_ = pcm_file;
32  channel_ = channel;
33 }
35 {
36  // CURL初始化
37  curl_global_init(CURL_GLOBAL_ALL);
38  // 配置asr相关参数
39  // 填写网页上申请的appkey 如 g_api_key="g8eBUMSokVB1BHGmgxxxxxx"
40  asr_config.api_key = "kVcnfD9iW2XVZSMaLMrtLYIz";
41  // 填写网页上申请的APP SECRET 如
42  // $secretKey="94dc99566550d87f8fa8ece112xxxxx"
43  asr_config.secret_key = "O9o1O213UgG5LFn0bDGNtoRN3VWl2du6";
44  // 文件后缀仅支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
45  asr_config.format = "pcm";
46  asr_config.url = "http://vop.baidu.com/server_api"; // 可改为https
47  // 1537 表示识别普通话,使用输入法模型。1536表示识别普通话,使用搜索模型
48  asr_config.dev_pid = 1537;
49  // 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有
50  asr_config.scope = "audio_voice_assistant_get";
51  // 采样率固定值
52  asr_config.rate = 16000;
53  asr_config.cuid = "1234567C";
54 }
56 {
57  std::string url = API_TOKEN_URL + "?grant_type=client_credentials&client_id=" + asr_config.api_key +
58  "&client_secret=" + asr_config.secret_key;
59  std::cout << "URL is : " << url << std::endl;
60  CURL* curl = curl_easy_init();
61 
62  curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); // 注意返回值判读
63  curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 5);
64  curl_easy_setopt(curl, CURLOPT_TIMEOUT, 60); // 60s超时
65  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writefunc);
66  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &url_response_);
67  CURLcode res_curl = curl_easy_perform(curl);
68  int res = ASR_RETURN_OK;
69  if (res_curl != CURLE_OK)
70  {
71  std::cout << "perform curl error: " << res << "," << curl_easy_strerror(res_curl) << std::endl;
73  }
74  else
75  {
76  // 解析token,结果保存在token里
77  praseToken();
78  }
79  free(url_response_);
80  curl_easy_cleanup(curl);
81 }
82 
84 {
86 }
87 
89 {
90  // 根据音频数据是单声道还是双声道选择接口
91  if (channel_ == 2)
92  {
94  }
95  CURL* curl = curl_easy_init(); // 需要释放
96  char* cuid = curl_easy_escape(curl, asr_config.cuid.c_str(), std::strlen(asr_config.cuid.c_str())); // 需要释放
97 
98  std::string url =
99  asr_config.url + "?cuid=" + cuid + "&token=" + token_ + "&dev_pid=" + std::to_string(asr_config.dev_pid);
100  std::cout << "runAsrAndRecog:: url = :" << url << std::endl;
101 
102  free(cuid);
103 
104  struct curl_slist* headerlist = NULL;
105  std::string header;
106  header = "Content-Type: audio/" + asr_config.format + "; rate=" + std::to_string(asr_config.rate);
107  headerlist = curl_slist_append(headerlist, header.c_str()); // 需要释放
108  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
109  curl_easy_setopt(curl, CURLOPT_POST, 1);
110  curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 5); // 连接5s超时
111  curl_easy_setopt(curl, CURLOPT_TIMEOUT, 60); // 整体请求60s超时
112  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerlist); // 添加http header Content-Type
113  curl_easy_setopt(curl, CURLOPT_POSTFIELDS, pcm_data_.data); // 音频数据
114  curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, pcm_data_.size); // 音频数据长度
115  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writefunc);
116  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &rec_result_); // 需要释放
117 
118  CURLcode res_curl = curl_easy_perform(curl);
119  std::cout << "Baidu online speech recognition result is :" << rec_result_ << std::endl;
120  curl_slist_free_all(headerlist);
121  pcm_data_.data = NULL;
122  pcm_data_.size = 0;
123  curl_easy_cleanup(curl);
124  return rec_result_;
125 }
126 
127 // libcurl 返回回调
128 size_t BaiduAsrOnline::writefunc(void* ptr, size_t size, size_t nmemb, char** result)
129 {
130  size_t result_len = size * nmemb;
131  int is_new = (*result == NULL);
132  if (is_new)
133  {
134  *result = (char*)malloc(result_len + 1);
135  if (*result == NULL)
136  {
137  std::cout << "realloc failure!\n";
138  return 1;
139  }
140  memcpy(*result, ptr, result_len);
141  (*result)[result_len] = '\0';
142  }
143  else
144  {
145  size_t old_size = strlen(*result);
146  *result = (char*)realloc(*result, result_len + old_size);
147  if (*result == NULL)
148  {
149  std::cout << "realloc failure!\n";
150  return 1;
151  }
152  memcpy(*result + old_size, ptr, result_len);
153  (*result)[result_len + old_size] = '\0';
154  }
155  return result_len;
156 }
157 
159 {
160  pcm_data_ = pcm_buff;
161 }
162 
163 // 从双声道的音频数据中分离出单声道数据
165 {
166  struct DataBuff pcm_two_channel = pcm_data_;
167  pcm_data_ = { NULL, 0 };
168  pcm_data_.size = pcm_two_channel.size / 2;
169 
170  pcm_data_.data = new char[pcm_data_.size];
171  for (int i = 0; i < pcm_data_.size / 2; i++)
172  {
173  memcpy((uint16_t*)pcm_data_.data + i, ((uint32_t*)(pcm_two_channel.data)) + i, 2);
174  }
175  return pcm_data_;
176 }
177 
179 {
180  std::string save_file_name = pcm_file_operation.setFileName("-asr.pcm");
181  std::string save_file = base_path_ + "/cache/pcm" + save_file_name;
182  std::ofstream pcm_file(save_file, std::ofstream::binary);
183  pcm_file.write(pcm_data_.data, pcm_data_.size);
184  pcm_file.close();
185 }
186 
187 struct DataBuff BaiduAsrOnline::recordThroughMIC(const float record_time, bool enable_audio_save)
188 {
189  int success_code = 0;
190  int errorcode;
191  std::cout << "-----------Start ASR Recording Thread --------" << std::endl;
192  // 采用默认设备获取音频
193  record_dev_id device_id = getDefaultInputDevice();
194  int errcode = 0;
195  // 使用WAVEFORMATEX结构指定pcm数据格式。
196  WAVEFORMATEX wavfmt = { WAVE_FORMAT_PCM, 1, 16000, 32000, 2, 16, sizeof(WAVEFORMATEX) };
197  if (getInputDeviceNum() == 0)
198  {
199  std::cout << "\nNo active record device find! " << std::endl;
200  }
201  else
202  {
203  std::cout << "The total number of active input devices is : " << getInputDeviceNum() << std::endl;
204  }
205  // 设置myrec用于存储录音信息.
206  asr_record_ = (struct recorder*)malloc(sizeof(struct recorder));
207 
208  memset(asr_record_, 0, sizeof(struct recorder));
209 
211 
212  asr_record_->pcm_file_path = base_path_ + "/cache/pcm";
213  record_alsa.initRecord(asr_record_, device_id, &wavfmt);
215 
216  int buf_count = 0; //分段录音计数
217  struct DataBuff record_pcm;
218  while (record_loops_ > 0)
219  {
220  record_pcm = record_alsa.startRecord();
221  record_loops_--;
222  pcm_data_.data = (char*)realloc(pcm_data_.data, record_pcm.size * (buf_count + 1));
223 
224  if (pcm_data_.data == NULL)
225  {
226  std::cout << "ERROR:buf_new realloc error!" << std::endl;
228  }
229  std::memcpy(&pcm_data_.data[record_pcm.size * buf_count], record_pcm.data, record_pcm.size);
230  buf_count += 1;
231  }
232  pcm_data_.size = record_pcm.size * buf_count;
233  // 根据选项选择是否保存录下的音频数据到文件。
234  if (enable_audio_save)
235  {
236  std::thread save_pcm_to_file(&BaiduAsrOnline::saveRecordDataToFile, this);
237  save_pcm_to_file.detach();
238  }
239  return pcm_data_;
240 }
242 {
244  doc.Parse(rec_result_);
245  if (doc.HasParseError())
246  {
247  rapidjson::ParseErrorCode code = doc.GetParseError();
248  std::cout << "JSON解析错误" << code << std::endl;
250  }
251  if (doc.HasMember("result"))
252  {
253  rapidjson::Value& result = doc["result"];
254 
255  online_final_result = result[0].GetString();
256  std::cout << "online_final_result is :" << online_final_result << std::endl;
257  }
258  return online_final_result;
259 }
260 
262 {
263  record_loops_ = 0;
265 }
266 
267 // 一次对话结束后释放资源
269 {
270  if (rec_result_)
271  {
272  free(rec_result_);
273  rec_result_ = NULL;
274  online_final_result = " ";
275  }
276 }
static size_t writefunc(void *ptr, size_t size, size_t nmemb, char **result)
libcurl 返回回调函数.
Definition: asr_online.cpp:128
std::string format
Definition: asr_online.h:24
volatile int state
Definition: linuxrec.h:46
FileOperation pcm_file_operation
Definition: asr_online.h:147
struct DataBuff getOneChannelData()
从双声道数据中分离出单声道数据.
Definition: asr_online.cpp:164
struct DataBuff startRecord()
Definition: linuxrec.cpp:188
void closeRecord()
Definition: linuxrec.cpp:215
struct AsrConfig asr_config
Definition: asr_online.h:149
void praseToken()
从json中解析出token值.
Definition: asr_online.cpp:10
void initAndConfigAsr()
百度语音识别模块的初始化,设置asr相关参数.
Definition: asr_online.cpp:34
unsigned short uint16_t
Definition: stdint.h:125
std_msgs::Header * header(M &m)
int getInputDeviceNum()
Definition: linuxrec.cpp:693
int rate
Definition: asr_online.h:26
std::string pcm_file_
Definition: asr_online.h:140
struct recorder * asr_record_
Definition: asr_online.h:146
void setAsrParams(const std::string base_path, const std::string pcm_file, const int channel)
设置语音模块需外部传入的路径等参数.
Definition: asr_online.cpp:28
int setRecordDuration(const float duration_time)
Definition: linuxrec.cpp:181
std::string setFileName(std::string file_type)
std::string url
Definition: asr_online.h:34
void uninitAsr()
一次识别结束后释放资源.
Definition: asr_online.cpp:268
void stopRecordThroughMIC()
关闭录音设备.
Definition: asr_online.cpp:261
百度语音识别模块接口头文件. TODO: 还需要添加版权、版本等信息
char * rec_result_
Definition: asr_online.h:137
const std::string API_TOKEN_URL
Definition: asr_online.h:145
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
Definition: document.h:2915
unsigned int uint32_t
Definition: stdint.h:126
struct DataBuff pcm_data_
Definition: asr_online.h:136
std::string resultFromJson()
从完整的json语音识别结果中解析出需要的字符串结果.
Definition: asr_online.cpp:241
ParseErrorCode
Error code of parsing.
Definition: error.h:64
void initRecord(struct recorder *rec, record_dev_id dev, WAVEFORMATEX *fmt)
Definition: linuxrec.cpp:80
char * data
Definition: file_operation.h:8
char * url_response_
Definition: asr_online.h:144
std::string cuid
Definition: asr_online.h:30
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:3411
record_dev_id getDefaultInputDevice()
Definition: linuxrec.cpp:680
std::string token_
Definition: asr_online.h:143
char * runAsrAndRecog()
将pcm_data_结构体里存储的音频用于百度在线语音识别并获取识别结果.
Definition: asr_online.cpp:88
std::string base_path_
Definition: asr_online.h:139
void speechGetToken()
获取百度在线语音识别的Token.
Definition: asr_online.cpp:55
const std::string base_path
struct DataBuff recordThroughMIC(const float record_time, bool enable_audio_save)
录音接口.
Definition: asr_online.cpp:187
std::string api_key
Definition: asr_online.h:20
std::string online_final_result
Definition: asr_online.h:138
std::string scope
Definition: asr_online.h:32
int dev_pid
Definition: asr_online.h:28
void saveRecordDataToFile()
把录音数据存入/cache/pcm/目录下的pcm文件里,以次序和时间命名.
Definition: asr_online.cpp:178
common definitions and configuration
#define WAVE_FORMAT_PCM
Definition: formats.h:5
struct tWAVEFORMATEX WAVEFORMATEX
std::string pcm_file_path
Definition: linuxrec.h:60
void getPcmFileData()
读取pcm文件里的音频数据,并将数据的内容和数据大小存入pcm_data_结构体.
Definition: asr_online.cpp:83
RecordAlsaAPI record_alsa
Definition: asr_online.h:148
std::string secret_key
Definition: asr_online.h:22
void getPCMData(struct DataBuff pcm_buff)
获取pcm数据接口.
Definition: asr_online.cpp:158
struct DataBuff readFileAsDatabuffer(const std::string file_path)


xbot_talker
Author(s): wangxiaoyun
autogenerated on Sat Oct 10 2020 03:27:53