6 import speech_recognition
as SR
7 from speech_recognition
import *
10 def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False, user_config = {}):
12 Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API. 14 This function requires a Google Cloud Platform account; see the `Google Cloud Speech API Quickstart <https://cloud.google.com/speech/docs/getting-started>`__ for details and instructions. Basically, create a project, enable billing for the project, enable the Google Cloud Speech API for the project, and set up Service Account Key credentials for the project. The result is a JSON file containing the API credentials. The text content of this JSON file is specified by ``credentials_json``. If not specified, the library will try to automatically `find the default API credentials JSON file <https://developers.google.com/identity/protocols/application-default-credentials>`__. 16 The recognition language is determined by ``language``, which is a BCP-47 language tag like ``"en-US"`` (US English). A list of supported language tags can be found in the `Google Cloud Speech API documentation <https://cloud.google.com/speech/docs/languages>`__. 18 If ``preferred_phrases`` is an iterable of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__. 20 Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary. 22 Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection. 24 assert isinstance(audio_data, AudioData),
"``audio_data`` must be audio data" 25 if credentials_json
is not None:
26 try: json.loads(credentials_json)
27 except Exception:
raise AssertionError(
"``credentials_json`` must be ``None`` or a valid JSON string")
28 assert isinstance(language, str),
"``language`` must be a string" 29 assert preferred_phrases
is None or all(isinstance(preferred_phrases, (type(
""), type(
u"")))
for preferred_phrases
in preferred_phrases),
"``preferred_phrases`` must be a list of strings" 32 flac_data = audio_data.get_flac_data(
33 convert_rate=
None if 8000 <= audio_data.sample_rate <= 48000
else max(8000, min(audio_data.sample_rate, 48000)),
38 from oauth2client.client
import GoogleCredentials
39 from googleapiclient.discovery
import build
40 import googleapiclient.errors
45 import googleapiclient.http
46 if self.operation_timeout
and socket.getdefaulttimeout()
is None:
48 googleapiclient.http.DEFAULT_HTTP_TIMEOUT_SEC = self.operation_timeout
50 if credentials_json
is None:
51 api_credentials = GoogleCredentials.get_application_default()
54 with PortableNamedTemporaryFile(
"w")
as f:
55 f.write(credentials_json)
57 api_credentials = GoogleCredentials.from_stream(f.name)
59 speech_service = build(
"speech",
"v1", credentials=api_credentials, cache_discovery=
False)
61 raise RequestError(
"missing google-api-python-client module: ensure that google-api-python-client is set up correctly.")
63 speech_config = {
"encoding":
"FLAC",
"sampleRateHertz": audio_data.sample_rate,
"languageCode": language}
68 speech_config.update(user_config)
73 if preferred_phrases
is not None:
74 speech_config[
"speechContext"] = {
"phrases": preferred_phrases}
76 speech_config[
"enableWordTimeOffsets"] =
True 77 request = speech_service.speech().recognize(body={
"audio": {
"content": base64.b64encode(flac_data).decode(
"utf8")},
"config": speech_config})
80 response = request.execute()
81 except googleapiclient.errors.HttpError
as e:
84 raise RequestError(
"recognition connection failed: {0}".format(e.reason))
86 if show_all:
return response
87 if "results" not in response
or len(response[
"results"]) == 0:
raise UnknownValueError()
89 for result
in response[
"results"]:
90 if speech_config.has_key(
'diarizationConfig')
and \
91 speech_config[
'diarizationConfig'][
'enableSpeakerDiarization'] ==
True:
94 for word
in result[
"alternatives"][0][
"words"]:
95 if word.has_key(
'speakerTag'):
96 if speakerTag != word[
'speakerTag']:
97 speakerTag = word[
'speakerTag']
98 transcript +=
"[{}]".format(speakerTag)
99 transcript +=
' ' + word[
'word']
100 elif result[
"alternatives"][0].has_key(
"transcript"):
102 transcript += result[
"alternatives"][0][
"transcript"].strip() +
" "
def recognize_google_cloud(self, audio_data, credentials_json=None, language="en-US", preferred_phrases=None, show_all=False, user_config={})