text_to_speech.cpp
Go to the documentation of this file.
1 #include "tts/text_to_speech.h"
2 #include <unistd.h>
3 #include <cstdlib>
4 #include <fstream>
5 #include <iostream>
6 #include "alsa/asoundlib.h"
7 #include "common_config.h"
8 //静态map成员text_audio_map初始化
10 
12 {
13  //暂时处理为本地绝对路径
14  basePath = "/home/chenying/catkin_ws/src/xbot_talker";
15  src_text = "你好,欢迎来到重德智能";
16  audioFile = "greet_visitor.wav";
17 }
18 
20 {
21  src_text = src_text;
22 }
23 
24 TextToSpeech::TextToSpeech(const string& basePath, const char* src_text, const string& audioFile)
25 {
27  src_text = src_text;
29  std::cout << "TextToSpeech::TextToSpeech(string basePath, const char* "
30  "src_text, string audioFile) is called."
31  << std::endl;
32  std::cout << basePath << std::endl;
33 }
34 
36 {
37 }
38 
39 map<string, string> TextToSpeech::createMap()
40 {
41  map<string, string> text_audio_tempmap;
42  return text_audio_tempmap;
43 }
44 
50 string TextToSpeech::getAudioFile(map<string, string> current_map)
51 {
52  return basePath + "/" + std::to_string(current_map.size()) + ".wav";
53 }
54 
61 bool TextToSpeech::audioConverter(const std::string base_path, const char* src_text)
62 {
64  int ret = MSP_ERROR_FAIL;
65  const char* sessionID = NULL;
66  unsigned int audio_len = 0;
67  wave_pcm_hdr wav_hdr = default_wav_hdr;
68  int synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA;
69 
70  //判断src_text是否在text_audio_map中,
71  //若存在,则直接播放对应的音频文件
72  //不存在,则调用讯飞tts接口生成相应的音频文件并保存到相应路径
73  map<string, string>::iterator filter = text_audio_map.find(src_text);
74  if (filter != text_audio_map.end())
75  {
76  //播放已存在的缓存文件
77  system(string("play " + text_audio_map[src_text]).c_str());
78  std::cout << "Playing the file cached done. " << std::endl;
79  }
80  else
81  {
82  std::cout << "需要调用xunfei tts函数生成相应音频文件并保存" << std::endl;
83 
84  string tmp_file = getAudioFile(text_audio_map);
85  std::cout << tmp_file << std::endl;
86  const char* tts_begin_params = "engine_type = local,voice_name=xiaoyan, text_encoding = UTF8, "
87  "tts_res_path = fo|res/tts/xiaoyan.jet;fo|res/tts/common.jet, "
88  "sample_rate = 44100, speed = 50, volume = 50, pitch = 50, rdn = 2";
89  if (NULL == src_text)
90  {
91  std::cout << "param is error!" << std::endl;
92  return false;
93  }
94  std::ofstream outfile(tmp_file.c_str(), std::ios::out | std::ios::binary);
95  if (!outfile.is_open())
96  {
97  std::cout << "open file path error " << tmp_file << std::endl;
98  return false;
99  }
100  // 开始一次语音合成,分配语音合成资源。
101  // 返回:函数调用成功返回字符串格式的sessionID,失败返回NULL
102  // 参数:params[in]
103  // 传入的参数列表:
104  // http://mscdoc.xfyun.cn/windows/api/iFlytekMSCReferenceManual/qtts_8h.html#a3fba4ad9599445073335851cc9479542
105  // errorCode[out] 函数调用成功则其值为 MSP_SUCCESS,否则返回错误代码
106  sessionID = QTTSSessionBegin(tts_begin_params, &ret);
107  if (MSP_SUCCESS != ret)
108  {
109  std::cout << "QTTSSessionBegin failed, error code: " << ret << std::endl;
110  outfile.close();
111  return false;
112  }
113  // 写入要合成的文本。
114  // 返回:函数调用成功则其值为 MSP_SUCCESS,否则返回错误代码
115  // 参数:sessionID[in] 由QTTSSessionBegin返回的句柄。
116  // textString[in] 字符串指针。指向待合成的文本字符串。
117  // textLen[in] 合成文本长度,最大支持8192个字节(不含’\0’)。
118  // params[in] 本次合成所用的参数,只对本次合成的文本有效。目前为空
119  // 备注:本接口不支持连续被调用。调用本接口写入合成文本后,
120  // 用户需要反复调用QTTSAudioGet 接口来获取音频。
121  ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL);
122  if (MSP_SUCCESS != ret)
123  {
124  std::cout << "QTTSTextPut failed, error code:" << ret << std::endl;
125  QTTSSessionEnd(sessionID, "TextPutError");
126 
127  return false;
128  }
129 
130  // 添加wav音频头,使用采样率为16000
131  outfile.write(reinterpret_cast<char*>(&wav_hdr), sizeof(wav_hdr));
132  // 获取合成音频
133  // 返回:函数调用成功且有音频数据时返回非空指针。调用失败或无音频数据时,返回NULL。
134  // 参数:sessionID[in] 由QTTSSessionBegin返回的句柄。
135  // audioLen[out] 合成音频长度,单位字节。
136  // synthStatus[out] 合成音频状态
137  // http://mscdoc.xfyun.cn/windows/api/iFlytekMSCReferenceManual/qtts_8h.html#a4e4f6bed4b9e4ea553aa00ccf539c22a
138  // errorCode[out] 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码
139  // 备注:用户需要反复获取音频,直到音频获取完毕或函数调用失败。在重复获取音频时,如果暂未获得音频数据,需要将当前线程sleep一段时间,以防频繁调用浪费CPU资源。
140  while (1)
141  {
142  const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret);
143  if (MSP_SUCCESS != ret)
144  break;
145  if (NULL != data)
146  {
147  outfile.write(static_cast<const char*>(const_cast<void*>(data)), audio_len);
148  //计算data_size大小
149  wav_hdr.data_size += audio_len;
150  }
151  if (MSP_TTS_FLAG_DATA_END == synth_status)
152  break;
153  }
154 
155  if (MSP_SUCCESS != ret)
156  {
157  std::cout << "QTTSAudioGet failed, error code: " << ret << std::endl;
158  QTTSSessionEnd(sessionID, "AudioGetError");
159 
160  return false;
161  }
162  // 修正wav文件头数据的大小
163  wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8);
164 
165  // 将修正过的数据写回文件头部,音频文件为wav格式
166  outfile.seekp(4, std::ios::beg);
167  // 写入size_8的值
168  outfile.write(reinterpret_cast<char*>(&wav_hdr.size_8), sizeof(wav_hdr.size_8));
169  // 将文件指针偏移到存储data_size值的位置
170  outfile.seekp(40, std::ios::beg);
171  // 写入data_size的值
172  outfile.write(reinterpret_cast<char*>(&wav_hdr.data_size), sizeof(wav_hdr.data_size));
173  outfile.close();
174  // 合成完毕,播放音频
175  ret = QTTSSessionEnd(sessionID, "Normal");
176  if (MSP_SUCCESS != ret)
177  {
178  std::cout << "QTTSSessionEnd failed, error code: " << ret << std::endl;
179  }
180  std::cout << "Synthesize completed." << std::endl;
181 
182  //保存相应文本和语音文件到text_audio_map
183  text_audio_map.insert(std::make_pair(src_text, tmp_file));
184 
185  std::cout << "Start talking....... " << tmp_file << std::endl;
186  play_wav(tmp_file.c_str());
187  std::cout << "Playing done. " << std::endl;
188  }
189 
190  return true;
191 }
192 int TextToSpeech::set_pcm_play(FILE* fp, wave_pcm_hdr* wav_header)
193 {
194  int rc;
195  int ret;
196  int size;
197  snd_pcm_t* handle; // PCI设备句柄
198  snd_pcm_hw_params_t* params; //硬件信息和PCM流配置
199  unsigned int val;
200  int dir = 0;
201  snd_pcm_uframes_t frames;
202  char* buffer;
203  int channels = wav_header->channels;
204  int frequency = wav_header->samples_per_sec;
205  int bit = wav_header->bits_per_sample;
206  int datablock = wav_header->block_align;
207 
208  rc = snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, 0);
209  if (rc < 0)
210  {
211  perror("\nopen PCM device failed:");
213  }
214 
215  snd_pcm_hw_params_alloca(&params); //分配params结构体
216  if (rc < 0)
217  {
218  perror("\nsnd_pcm_hw_params_alloca:");
220  }
221 
222  rc = snd_pcm_hw_params_any(handle, params); //初始化params
223  if (rc < 0)
224  {
225  perror("\nsnd_pcm_hw_params_any:");
227  }
228  rc = snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); //初始化访问权限
229  if (rc < 0)
230  {
231  perror("\nsed_pcm_hw_set_access:");
233  }
234 
235  //采样位数
236  switch (bit / 8)
237  {
238  case 1:
239  snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_U8);
240  break;
241  case 2:
242  snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
243  break;
244  case 3:
245  snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S24_LE);
246  break;
247  }
248  rc = snd_pcm_hw_params_set_channels(handle, params, channels); //设置声道,1表示单声>道,2表示立体声
249  if (rc < 0)
250  {
251  perror("\nsnd_pcm_hw_params_set_channels:");
253  }
254  val = frequency;
255  rc = snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir); //设置>频率
256  if (rc < 0)
257  {
258  perror("\nsnd_pcm_hw_params_set_rate_near:");
260  }
261 
262  rc = snd_pcm_hw_params(handle, params);
263  if (rc < 0)
264  {
265  perror("\nsnd_pcm_hw_params: ");
267  }
268 
269  rc = snd_pcm_hw_params_get_period_size(params, &frames, &dir); /*获取周期长度*/
270  if (rc < 0)
271  {
272  perror("\nsnd_pcm_hw_params_get_period_size:");
274  }
275 
276  size = frames * datablock; /*4 代表数据块长度*/
277 
278  buffer = (char*)malloc(size);
279  if (buffer == NULL)
280  {
281  std::cout << "\n Malloc failed in tts module::buffer !!!\n";
282  exit(TTS_ERROR_MALLOC_FAIL);
283  }
284  fseek(fp, 58, SEEK_SET); //定位歌曲到数据区,从起始位置偏移量为58.
285 
286  while (true)
287  {
288  memset(buffer, 0, sizeof(buffer)); //初始化新申请的内存
289 
290  ret = fread(buffer, 1, size, fp);
291  if (ret == 0)
292  {
293  break;
294  }
295  else if (ret != size)
296  {
297  break;
298  }
299  // 写音频数据到PCM设备
300  snd_pcm_writei(handle, buffer, frames);
301 
302  if (ret == -EPIPE)
303  {
304  // EPIPE means underrun
305  fprintf(stderr, "underrun occurred\n");
306  //完成硬件参数设置,使设备准备好
307  snd_pcm_prepare(handle);
308  }
309  else if (ret < 0)
310  {
311  fprintf(stderr, "error from writei: %s\n", snd_strerror(ret));
312  }
313  usleep(2 * 1000);
314  }
315 
316  snd_pcm_drain(handle);
317  snd_pcm_close(handle);
318  free(buffer);
319  return 0;
320 }
321 
322 int TextToSpeech::play_wav(const string& file_path)
323 {
324  FILE* fp;
325  wave_pcm_hdr wav_header;
326  fp = fopen(file_path.c_str(), "rb");
327  if (fp == NULL)
328  {
330  }
331  int nread = fread(&wav_header, 1, sizeof(wav_header), fp);
332  set_pcm_play(fp, &wav_header);
333 }
const char *MSPAPI QTTSSessionBegin(const char *params, int *errorCode)
int set_pcm_play(FILE *fp, wave_pcm_hdr *wav_header)
const char * src_text
static map< string, string > text_audio_map
static map< string, string > createMap()
const void *MSPAPI QTTSAudioGet(const char *sessionID, unsigned int *audioLen, int *synthStatus, int *errorCode)
string audioFile
short int bits_per_sample
static wave_pcm_hdr default_wav_hdr
int MSPAPI QTTSSessionEnd(const char *sessionID, const char *hints)
string getAudioFile(map< string, string > current_map)
short int block_align
short int channels
bool audioConverter(const std::string base_path, const char *src_text)
const std::string base_path
int play_wav(const string &file_path)
int MSPAPI QTTSTextPut(const char *sessionID, const char *textString, unsigned int textLen, const char *params)


xbot_talker
Author(s): wangxiaoyun
autogenerated on Sat Oct 10 2020 03:27:54