13 package ai.picovoice.rhinodemo;
17 import org.apache.commons.cli.*;
19 import javax.sound.sampled.AudioFormat;
20 import javax.sound.sampled.AudioInputStream;
21 import javax.sound.sampled.AudioSystem;
22 import javax.sound.sampled.UnsupportedAudioFileException;
24 import java.io.IOException;
25 import java.nio.ByteBuffer;
26 import java.nio.ByteOrder;
31 public static void runDemo(String accessKey, File inputAudioFile, String libraryPath, String modelPath,
32 String contextPath,
float sensitivity,
boolean requireEndpoint) {
34 AudioInputStream audioInputStream;
36 audioInputStream = AudioSystem.getAudioInputStream(inputAudioFile);
37 }
catch (UnsupportedAudioFileException e) {
38 System.err.println(
"Audio format not supported. Please provide an input file of .au, .aiff or .wav format");
40 }
catch (IOException e) {
41 System.err.println(
"Could not find input audio file at " + inputAudioFile);
56 AudioFormat audioFormat = audioInputStream.getFormat();
58 if (audioFormat.getSampleRate() != 16000.0f || audioFormat.getSampleSizeInBits() != 16) {
59 throw new IllegalArgumentException(String.format(
"Invalid input audio file format. " +
60 "Input file must be a %dkHz, 16-bit audio file.", rhino.
getSampleRate()));
63 if (audioFormat.getChannels() > 1) {
64 System.out.println(
"Picovoice processes single-channel audio, but a multi-channel file was provided. " +
65 "Processing leftmost channel only.");
71 ByteBuffer sampleBuffer = ByteBuffer.allocate(audioFormat.getFrameSize());
72 sampleBuffer.order(ByteOrder.LITTLE_ENDIAN);
73 while (audioInputStream.available() != 0) {
75 int numBytesRead = audioInputStream.read(sampleBuffer.array());
76 if (numBytesRead < 2) {
80 rhinoFrame[frameIndex++] = sampleBuffer.getShort(0);
82 if (frameIndex == rhinoFrame.length) {
84 boolean isFinalized = rhino.
process(rhinoFrame);
90 System.out.println(
"{");
91 System.out.println(String.format(
" intent : '%s'", inference.
getIntent()));
92 System.out.println(
" slots : {");
93 for (Map.Entry<String, String> slot : inference.
getSlots().entrySet()) {
94 System.out.println(String.format(
" %s : '%s'", slot.getKey(), slot.getValue()));
96 System.out.println(
" }");
97 System.out.println(
"}");
99 System.out.println(
"Didn't understand the command.");
106 System.out.println(
"Reached end of audio file before Rhino returned an inference.");
107 }
catch (Exception e) {
108 System.out.println(e.toString());
119 CommandLineParser parser =
new DefaultParser();
120 HelpFormatter formatter =
new HelpFormatter();
124 cmd = parser.parse(options,
args);
125 }
catch (ParseException e) {
126 System.out.println(e.getMessage());
127 formatter.printHelp(
"rhinofiledemo", options);
132 if (
cmd.hasOption(
"help")) {
133 formatter.printHelp(
"rhinofiledemo", options);
137 String accessKey =
cmd.getOptionValue(
"access_key");
138 String inputAudioPath =
cmd.getOptionValue(
"input_audio_path");
139 String libraryPath =
cmd.getOptionValue(
"library_path");
140 String modelPath =
cmd.getOptionValue(
"model_path");
141 String contextPath =
cmd.getOptionValue(
"context_path");
142 String sensitivityStr =
cmd.getOptionValue(
"sensitivity");
143 String requireEndpointValue =
cmd.getOptionValue(
"require_endpoint");
145 if (accessKey ==
null || accessKey.length() == 0) {
146 throw new IllegalArgumentException(
"AccessKey is required for Rhino.");
149 float sensitivity = 0.5f;
150 if (sensitivityStr !=
null) {
152 sensitivity = Float.parseFloat(sensitivityStr);
153 }
catch (Exception e) {
154 throw new IllegalArgumentException(
"Failed to parse sensitivity value. " +
155 "Must be a floating-point number between [0,1].");
158 if (sensitivity < 0 || sensitivity > 1) {
159 throw new IllegalArgumentException(String.format(
"Failed to parse sensitivity value (%s). " +
160 "Must be a floating-point number between [0,1].", sensitivity));
164 if(inputAudioPath ==
null){
165 throw new IllegalArgumentException(
"No input audio file provided. This is a required argument.");
167 File inputAudioFile =
new File(inputAudioPath);
168 if (!inputAudioFile.exists()) {
169 throw new IllegalArgumentException(String.format(
"Audio file at path %s does not exits.", inputAudioPath));
172 if(contextPath ==
null){
173 throw new IllegalArgumentException(
"No context file provided. This is a required argument.");
175 File contextFile =
new File(contextPath);
176 if (!contextFile.exists()) {
177 throw new IllegalArgumentException(String.format(
"Context file at path '%s' does not exist", contextPath));
180 if (libraryPath ==
null) {
184 if (modelPath ==
null) {
188 boolean requireEndpoint =
true;
189 if (requireEndpointValue !=
null && requireEndpointValue.toLowerCase().equals(
"false")) {
190 requireEndpoint =
false;
193 runDemo(accessKey, inputAudioFile, libraryPath, modelPath, contextPath, sensitivity, requireEndpoint);
197 Options options =
new Options();
199 options.addOption(Option.builder(
"a")
200 .longOpt(
"access_key")
202 .desc(
"AccessKey obtained from Picovoice Console (https://picovoice.ai/console/).")
205 options.addOption(Option.builder(
"i")
206 .longOpt(
"input_audio_path")
208 .desc(
"Absolute path to input audio file.")
211 options.addOption(Option.builder(
"c")
212 .longOpt(
"context_path")
214 .desc(
"Absolute path to context file.")
217 options.addOption(Option.builder(
"l")
218 .longOpt(
"library_path")
220 .desc(
"Absolute path to the Rhino native runtime library.")
223 options.addOption(Option.builder(
"m")
224 .longOpt(
"model_path")
226 .desc(
"Absolute path to the file containing model parameters.")
229 options.addOption(Option.builder(
"s")
230 .longOpt(
"sensitivity")
232 .desc(
"Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in " +
233 "fewer misses at the cost of (potentially) increasing the erroneous inference rate. " +
234 "If not set 0.5 will be used.")
237 options.addOption(Option.builder(
"e")
238 .longOpt(
"require_endpoint")
240 .desc(
"If set to `false`, Rhino does not require an endpoint (chunk of silence) before " +
241 "finishing inference.")
244 options.addOption(
new Option(
"h",
"help",
false,
""));