simple_voice: voice_node.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: utf-8 -*-
00003 
00004 """Copyright (c) 2016 Xu Zhihao (Howe).  All rights reserved.
00005 This program is free software; you can redistribute it and/or modify
00006 This programm is tested on kuboki base turtlebot."""
00007 
00008 from pyaudio import PyAudio, paInt16
00009 import json
00010 import base64
00011 import os
00012 import sys
00013 import requests
00014 import wave
00015 import rospy
00016 import numpy as np 
00017 import array
00018 import chunk
00019 
00020 
00021 class recoder():
00022 
00023  def __init__(self):
00024   if_continue=''
00025   while not rospy.is_shutdown() and if_continue == '':
00026   
00027    self.define()
00028 
00029    self.recode()
00030   
00031    self.reg()
00032    
00033    #self.savewav("testing")#testing
00034    
00035    if_continue = raw_input('pls input ＥＮＴＥＲ to continue')
00036 
00037  def reg(self):
00038  
00039   #get token
00040   requestData = {       "grant_type":           self.Grant_type, 
00041                         "client_id":            self.Api_Key, 
00042                         "client_secret":        self.Secrect_Key}
00043   
00044   result = requests.post(url = self.Token_url, data = requestData)
00045   
00046   token_data = json.loads(result.text)
00047   
00048   #self.Print_Response(token_data)
00049   
00050   if 'access_token' in token_data:  
00051    token = token_data['access_token']  
00052    rospy.loginfo('token success\n')
00053   else:
00054    rospy.loginfo('token failed\n')
00055    
00056   
00057   #self.print_data_len(self.Voice_String)
00058   
00059   str_voice=self.conventor(self.Voice_String)
00060   
00061   speech = base64.b64encode(str_voice)
00062 
00063   size = len(str_voice)
00064 
00065   
00066   RegData = {   "format":       self.FORMAT,
00067                 "rate":         self.SAMPLING_RATE,
00068                 "channel":      self.nchannel,
00069                 "cuid":         self.USER_ID,
00070                 "token":        token,
00071                 "len":          size,
00072                 "speech":       speech,
00073                 "lan":          self.LAN}
00074                 
00075   HTTP_HEADER=          {  'Content-Type':      'audio/%s;rate=%s'%(self.FORMAT,self.SAMPLING_RATE),
00076                            'Content-length':    len(json.dumps(RegData))}
00077 
00078   r = requests.post(url = self.Reg_url, data = json.dumps(RegData, sort_keys=True), headers=HTTP_HEADER)
00079   
00080 
00081   rospy.loginfo( 'response')
00082   self.Print_Response(r.headers)
00083   result = json.loads(r.text)
00084   self.Print_Response(result)
00085   rospy.loginfo( 'result: %s \n'%result['err_msg'])#,type(result)
00086   rospy.loginfo( 'response\n')
00087   
00088   if result[u'err_msg']=='success.':
00089    word = result['result'][0].encode('utf-8')
00090    if word!='':
00091     if word[len(word)-3:len(word)]=='，':
00092      rospy.loginfo('cog. result:　%s \n'%word[0:len(word)-3])
00093      return word[0:len(word)-3]
00094     else:
00095      rospy.loginfo(word)
00096      return word
00097    else:
00098     rospy.loginfo("音频文件不存在或格式错误\n")
00099     return '音频文件不存在或格式错误'
00100   else:
00101    rospy.loginfo(self.error_reason[result[u'err_no']])
00102    return  self.error_reason[result[u'err_no']]
00103    
00104   rospy.sleep(2)
00105    
00106    
00107  def define(self):
00108   self.error_reason={3300:      '输入参数不正确',
00109                      3301:      '识别错误',
00110                      3302:      '验证失败',
00111                      3303:      '语音服务器后端问题',
00112                      3304:      '请求 GPS 过大，超过限额',
00113                      3305:      '产品线当前日请求数超过限额'}
00114  
00115   if rospy.has_param('~REG_NUM_SAMPLES'):
00116    pass
00117   else:
00118    rospy.set_param('~REG_NUM_SAMPLES', 2000)
00119   
00120   if rospy.has_param('~REG_SAMPLING_RATE'):
00121    pass
00122   else:
00123    rospy.set_param('~REG_SAMPLING_RATE', 8000)
00124   
00125   if rospy.has_param('~REG_UPPER_LEVEL'):
00126    pass
00127   else:
00128    rospy.set_param('~REG_UPPER_LEVEL', 5000)
00129 
00130   if rospy.has_param('~REG_LOWER_LEVEL'):
00131    pass
00132   else:
00133    rospy.set_param('~REG_LOWER_LEVEL', 500) 
00134    
00135   if rospy.has_param('~REG_COUNT_NUM'):
00136    pass
00137   else:
00138    rospy.set_param('~REG_COUNT_NUM', 20)
00139    
00140   if rospy.has_param('~REG_SAVE_LENGTH'):
00141    pass
00142   else:
00143    rospy.set_param('~REG_SAVE_LENGTH', 8)
00144    
00145   if rospy.has_param('~REG_TIME_OUT'):
00146    pass
00147   else:
00148    rospy.set_param('~REG_TIME_OUT', 60)
00149    
00150   if rospy.has_param('~REG_NO_WORDS'):
00151    pass
00152   else:
00153    rospy.set_param('~REG_NO_WORDS', 6)
00154    
00155   if rospy.has_param('~REG_Api_Key'):
00156    pass
00157   else:
00158    rospy.set_param('~REG_Api_Key', "pmUzrWcsA3Ce7RB5rSqsvQt2")
00159    
00160   if rospy.has_param('~REG_Secrect_Key'):
00161    pass
00162   else:
00163    rospy.set_param('~REG_Secrect_Key', "d39ec848d016a8474c7c25e308b310c3")
00164    
00165   if rospy.has_param('~REG_Grant_type'):
00166    pass
00167   else:
00168    rospy.set_param('~REG_Grant_type', "client_credentials")
00169    
00170   if rospy.has_param('~REG_Token_url'):
00171    pass
00172   else:
00173    rospy.set_param('~REG_Token_url', "https://openapi.baidu.com/oauth/2.0/token")
00174    
00175   if rospy.has_param('~REG_Reg_url'):
00176    pass
00177   else:
00178    rospy.set_param('~REG_Reg_url', "http://vop.baidu.com/server_api")
00179 
00180   if rospy.has_param('~REG_USER_ID'):
00181    pass
00182   else:
00183    rospy.set_param('~REG_USER_ID', "8168466")
00184    
00185   if rospy.has_param('~REG_FORMAT'):
00186    pass
00187   else:
00188    rospy.set_param('~REG_FORMAT', "wav")
00189    
00190   if rospy.has_param('~REG_LAN'):
00191    pass
00192   else:
00193    rospy.set_param('~REG_LAN', "zh")
00194    
00195   if rospy.has_param('~REG_nchannel'):
00196    pass
00197   else:
00198    rospy.set_param('~REG_nchannel', 1)
00199 
00200    
00201   self.NUM_SAMPLES = rospy.get_param('~REG_NUM_SAMPLES') # default 2000 pyaudio内置缓冲大小
00202   #print 'self.NUM_SAMPLES',self.NUM_SAMPLES,type(self.NUM_SAMPLES)
00203 
00204   self.SAMPLING_RATE = rospy.get_param('~REG_SAMPLING_RATE')  # default 8000 取样频率
00205   #print 'self.SAMPLING_RATE',self.SAMPLING_RATE,type(self.SAMPLING_RATE)
00206 
00207   self.UPPER_LEVEL = rospy.get_param('~REG_UPPER_LEVEL') # default 5000 声音保存的阈值
00208   #print 'self.UPPER_LEVEL',self.UPPER_LEVEL,type(self.UPPER_LEVEL)
00209 
00210   self.LOWER_LEVEL = rospy.get_param('~REG_LOWER_LEVEL') # default 500 声音保存的阈值
00211   #print 'self.LOWER_LEVEL',self.LOWER_LEVEL,type(self.LOWER_LEVEL)
00212 
00213   self.COUNT_NUM = rospy.get_param('~REG_COUNT_NUM') # default 20 NUM_SAMPLES个取样之内出现COUNT_NUM个大于LOWER_LEVEL的取样则记录声音
00214   #print 'self.COUNT_NUM',self.COUNT_NUM,type(self.COUNT_NUM)
00215 
00216   self.SAVE_LENGTH = rospy.get_param('~REG_SAVE_LENGTH') # default 8 声音记录的最小长度：SAVE_LENGTH * NUM_SAMPLES 个取样
00217   #print 'self.SAVE_LENGTH',self.SAVE_LENGTH,type(self.SAVE_LENGTH)
00218 
00219   self.TIME_OUT = rospy.get_param('~REG_TIME_OUT') # default 60 录音时间，单位s
00220   #print 'self.TIME_OUT',self.TIME_OUT,type(self.TIME_OUT)
00221 
00222   self.NO_WORDS = rospy.get_param('~REG_NO_WORDS') # default 6
00223   #print 'self.NO_WORDS',self.NO_WORDS,type(self.NO_WORDS)
00224 
00225   self.Api_Key = rospy.get_param('~REG_Api_Key') # default "pmUzrWcsA3Ce7RB5rSqsvQt2"
00226   #print 'self.Api_Key',self.Api_Key,type(self.Api_Key)
00227 
00228   self.Secrect_Key = rospy.get_param('~REG_Secrect_Key') # default "d39ec848d016a8474c7c25e308b310c3"
00229   #print 'self.Secrect_Key',self.Secrect_Key,type(self.Secrect_Key)
00230 
00231   self.Grant_type = rospy.get_param('~REG_Grant_type') # default "client_credentials"
00232   #print 'self.Grant_type',self.Grant_type,type(self.Grant_type)
00233 
00234   self.Token_url = rospy.get_param('~REG_Token_url') # default 'https://openapi.baidu.com/oauth/2.0/token'
00235   #print 'self.Token_url',self.Token_url,type(self.Token_url)
00236 
00237   self.Reg_url = rospy.get_param('~REG_Reg_url') # default 'http://vop.baidu.com/server_api'
00238   #print 'self.Reg_url',self.Reg_url,type(self.Reg_url)
00239 
00240   self.USER_ID = rospy.get_param('~REG_USER_ID') # default '8168466'
00241   #print 'self.USER_ID',self.USER_ID,type(self.USER_ID)
00242 
00243   self.FORMAT = rospy.get_param('~REG_FORMAT') # default 'wav'
00244   #print 'self.FORMAT',self.FORMAT,type(self.FORMAT)
00245 
00246   self.LAN = rospy.get_param('~REG_LAN') # default 'zh'
00247   #print 'self.LAN',self.LAN,type(self.LAN)
00248 
00249   self.nchannel = rospy.get_param('~REG_nchannel') # default 1
00250   #print 'self.nchannel',self.nchannel,type(self.nchannel)
00251 
00252   self.Voice_String =        []
00253   
00254   
00255  #testing
00256   #print 'NUM_SAMPLES',type(self.nchannel)
00257   
00258   
00259  def Print_Response(self, data):
00260   for i in data:
00261    print ' ', i , ': ' , data[i]
00262    
00263 
00264  def recode(self):
00265   pa = PyAudio() 
00266   stream = pa.open(format=paInt16, channels=self.nchannel, rate=self.SAMPLING_RATE, input=True, frames_per_buffer=self.NUM_SAMPLES) 
00267   save_count = 0 
00268   save_buffer = [] 
00269   time_out = self.TIME_OUT
00270   NO_WORDS=self.NO_WORDS
00271   
00272   while True and NO_WORDS:
00273    time_out -= 1
00274    print 'time_out in', time_out # 读入NUM_SAMPLES个取样
00275    string_audio_data = stream.read(self.NUM_SAMPLES) # 将读入的数据转换为数组
00276    audio_data = np.fromstring(string_audio_data, dtype=np.short) 
00277 
00278    # 查看是否没有语音输入
00279    NO_WORDS -= 1
00280    if np.max(audio_data) > self.UPPER_LEVEL:
00281     NO_WORDS=self.NO_WORDS
00282    print 'self.NO_WORDS ', NO_WORDS
00283    print 'np.max(audio_data) ', np.max(audio_data)
00284 
00285    # 计算大于LOWER_LEVEL的取样的个数
00286    large_sample_count = np.sum( audio_data > self.LOWER_LEVEL )
00287    
00288    # 如果个数大于COUNT_NUM，则至少保存SAVE_LENGTH个块
00289    if large_sample_count > self.COUNT_NUM:
00290     save_count = self.SAVE_LENGTH 
00291    else: 
00292     save_count -= 1
00293    #print 'save_count',save_count
00294    
00295    # 将要保存的数据存放到save_buffer中
00296    if save_count < 0:
00297     save_count = 0 
00298    elif save_count > 0 : 
00299     save_buffer.append( string_audio_data ) 
00300    else:
00301     pass
00302     
00303    # 将save_buffer中的数据写入WAV文件，WAV文件的文件名是保存的时刻
00304    if len(save_buffer) > 0 and NO_WORDS==0: 
00305     self.Voice_String = save_buffer
00306     save_buffer = [] 
00307     rospy.loginfo( "Recode a piece of voice successfully!")
00308     #return self.Voice_String
00309     
00310    elif len(save_buffer) > 0 and time_out==0: 
00311     self.Voice_String = save_buffer
00312     save_buffer = [] 
00313     rospy.loginfo( "Recode a piece of voice successfully!")
00314     #return self.Voice_String
00315    else: 
00316     pass
00317    #rospy.loginfo( '\n\n')
00318 
00319 
00320  def conventor(self, Data_to_String):
00321   Voice_data=str()
00322   for Data in Data_to_String:
00323    Voice_data_h=array.array('b',Data)
00324    #print Voice_data_h
00325    Voice_data_h.byteswap()
00326    #print Voice_data_b
00327    Voice_data_s=Voice_data_h.tostring()
00328    Voice_data+=Voice_data_s
00329   return Voice_data
00330   
00331  def print_data_len(self,data):
00332   print len(data)
00333   n=0
00334   for i in data:
00335    n+=1
00336    print n
00337 
00338   ###########################################################
00339   ########################  testing  ########################
00340   ###########################################################  
00341    
00342  def savewav(self,filename):
00343   rospy.loginfo('存储音频')
00344   file_path='/home/turtlebot/xu_slam/src/simple_voice/src'
00345   WAVE_FILE = '%s/%s.wav'%(file_path,filename)
00346   wf = wave.open(WAVE_FILE, 'wb') 
00347   wf.setnchannels(1) 
00348   wf.setsampwidth(2) 
00349   wf.setframerate(self.SAMPLING_RATE) 
00350   wf.writeframes("".join(self.Voice_String)) 
00351   wf.close() 
00352   rospy.loginfo('音频数据已存')
00353       
00354 if __name__=="__main__":
00355  rospy.init_node('simple_voice')
00356  rospy.loginfo("initialization system")
00357  recoder()
00358  rospy.loginfo("process done and quit")