13 from collections
import namedtuple
68 Python binding for Rhino Speech-to-Intent engine. It directly infers the user's intent from spoken commands in
69 real-time. Rhino processes incoming audio in consecutive frames and indicates if the inference is finalized. When
70 finalized, the inferred intent can be retrieved as structured data in the form of an intent string and pairs of
71 slots and values. The number of samples per frame can be attained by calling `.frame_length`. The incoming audio
72 needs to have a sample rate equal to `.sample_rate` and be 16-bit linearly-encoded. Rhino operates on single-channel
86 ACTIVATION_LIMIT_REACHED = 9
87 ACTIVATION_THROTTLED = 10
88 ACTIVATION_REFUSED = 11
90 _PICOVOICE_STATUS_TO_EXCEPTION = {
91 PicovoiceStatuses.OUT_OF_MEMORY: RhinoMemoryError,
92 PicovoiceStatuses.IO_ERROR: RhinoIOError,
93 PicovoiceStatuses.INVALID_ARGUMENT: RhinoInvalidArgumentError,
94 PicovoiceStatuses.STOP_ITERATION: RhinoStopIterationError,
95 PicovoiceStatuses.KEY_ERROR: RhinoKeyError,
96 PicovoiceStatuses.INVALID_STATE: RhinoInvalidStateError,
97 PicovoiceStatuses.RUNTIME_ERROR: RhinoRuntimeError,
98 PicovoiceStatuses.ACTIVATION_ERROR: RhinoActivationError,
99 PicovoiceStatuses.ACTIVATION_LIMIT_REACHED: RhinoActivationLimitError,
100 PicovoiceStatuses.ACTIVATION_THROTTLED: RhinoActivationThrottledError,
101 PicovoiceStatuses.ACTIVATION_REFUSED: RhinoActivationRefusedError
104 Inference = namedtuple(
'Inference', [
'is_understood',
'intent',
'slots'])
105 Inference.__doc__ =
""""\
106 Immutable object with `.is_understood`, `.intent` , and `.slots` getters.
108 :param is_understood: Indicates whether the intent was understood by Rhino.
109 :param intent: Name of intent that was inferred
110 :param slots: Dictionary of the slot keys and values extracted from the utterance.
116 def __init__(self, access_key, library_path, model_path, context_path, sensitivity=0.5, require_endpoint=True):
120 :param access_key: AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).
121 :param library_path: Absolute path to Rhino's dynamic library.
122 :param model_path: Absolute path to file containing model parameters.
123 :param context_path: Absolute path to file containing context parameters. A context represents the set of
124 expressions (spoken commands), intents, and intent arguments (slots) within a domain of interest.
125 :param sensitivity: Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value
126 results in fewer misses at the cost of (potentially) increasing the erroneous inference rate.
127 :param require_endpoint If set to `False`, Rhino does not require an endpoint (chunk of silence) before
132 raise ValueError(
"access_key should be a non-empty string.")
134 if not os.path.exists(library_path):
135 raise IOError(
"Couldn't find Rhino's dynamic library at '%s'." % library_path)
137 library = cdll.LoadLibrary(library_path)
139 if not os.path.exists(model_path):
140 raise IOError(
"Couldn't find model file at '%s'." % model_path)
142 if not os.path.exists(context_path):
143 raise IOError(
"Couldn't find context file at '%s'." % context_path)
145 if not 0 <= sensitivity <= 1:
146 raise ValueError(
"Sensitivity should be within [0, 1].")
148 init_func = library.pv_rhino_init
149 init_func.argtypes = [
155 POINTER(POINTER(self.CRhino))]
156 init_func.restype = self.PicovoiceStatuses
161 access_key.encode(
'utf-8'),
162 model_path.encode(
'utf-8'),
163 context_path.encode(
'utf-8'),
187 POINTER(POINTER(c_char_p)),
188 POINTER(POINTER(c_char_p))]
199 context_info_func = library.pv_rhino_context_info
200 context_info_func.argtypes = [POINTER(self.
CRhino), POINTER(c_char_p)]
203 context_info = c_char_p()
204 status = context_info_func(self.
_handle, byref(context_info))
210 version_func = library.pv_rhino_version
211 version_func.argtypes = []
212 version_func.restype = c_char_p
220 """Releases resources acquired."""
226 Processes a frame of audio and emits a flag indicating if the inference is finalized. When finalized,
227 `.get_inference()` should be called to retrieve the intent and slots, if the spoken command is considered valid.
229 :param pcm: A frame of audio samples. The number of samples per frame can be attained by calling
230 `.frame_length`. The incoming audio needs to have a sample rate equal to `.sample_rate` and be 16-bit
231 linearly-encoded. Rhino operates on single-channel audio.
232 :return: Flag indicating if the inference is finalized.
236 raise ValueError(
"Invalid frame length. expected %d but received %d" % (self.
frame_length, len(pcm)))
238 is_finalized = c_bool()
243 return is_finalized.value
247 Gets inference results from Rhino. If the spoken command was understood, it includes the specific intent name
248 that was inferred, and (if applicable) slot keys and specific slot values. Should only be called after the
249 process function returns true, otherwise Rhino has not yet reached an inference conclusion.
250 :return Inference object with `.is_understood`, `.intent` , and `.slots` getters.
253 is_understood = c_bool()
257 is_understood = is_understood.value
262 slot_keys = POINTER(c_char_p)()
263 slot_values = POINTER(c_char_p)()
273 intent = intent.value.decode(
'utf-8')
276 for i
in range(num_slots.value):
277 slots[slot_keys[i].decode(
'utf-8')] = slot_values[i].decode(
'utf-8')
290 return self.
Inference(is_understood=is_understood, intent=intent, slots=slots)
294 """Context information."""
306 """Number of audio samples per frame."""
312 """Audio sample rate accepted by Picovoice."""