001/* 002 * $Rev: 1028 $: Revision of last commit 003 * $Author: tgutwin $: Author of last commit 004 * $Date: 2015-11-01 15:32:08 -0800 (Sun, 01 Nov 2015) $: Date of last commit 005 * $URL: svn://svn.webarts.bc.ca/open/trunk/projects/WebARTS/ca/bc/webarts/tools/sphinx/DialogDemo.java $ 006 * 007 * refactored by Tom Gutwin 008 * Author Alexander Solovets 009 * 010 * Copyright 1999-2015 Carnegie Mellon University. 011 * Portions Copyright 2002-2008 Sun Microsystems, Inc. 012 * Portions Copyright 2002-2008 Mitsubishi Electric Research Laboratories. 013 * Portions Copyright 2013-2015 Alpha Cephei, Inc. 014 * 015 * All Rights Reserved. Use is subject to license terms. 016 * 017 * Redistribution and use in source and binary forms, with or without 018 * modification, are permitted provided that the following conditions 019 * are met: 020 * 021 * 1. Redistributions of source code must retain the above copyright 022 * notice, this list of conditions and the following disclaimer. 023 * 024 * 2. Redistributions in binary form must reproduce the above copyright 025 * notice, this list of conditions and the following disclaimer in 026 * the documentation and/or other materials provided with the 027 * distribution. 028 * 029 * 3. Original authors' names are not deleted. 030 * 031 * 4. The authors' names are not used to endorse or promote products 032 * derived from this software without specific prior written 033 * permission. 034 * 035 * This work was supported in part by funding from the Defense Advanced 036 * Research Projects Agency and the National Science Foundation of the 037 * United States of America, the CMU Sphinx Speech Consortium, and 038 * Sun Microsystems, Inc. 039 040 * CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI 041 * ELECTRONIC RESEARCH LABORATORIES AND THE CONTRIBUTORS TO THIS WORK 042 * DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 043 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL 044 * CARNEGIE MELLON UNIVERSITY, SUN MICROSYSTEMS, INC., MITSUBISHI 045 * ELECTRONIC RESEARCH LABORATORIES NOR THE CONTRIBUTORS BE LIABLE FOR 046 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 047 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 048 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 049 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 050 * 051 */ 052package ca.bc.webarts.tools.sphinx; 053 054import java.util.HashMap; 055import java.util.Map; 056 057import edu.cmu.sphinx.api.Configuration; 058import edu.cmu.sphinx.api.LiveSpeechRecognizer; 059import edu.cmu.sphinx.api.SpeechResult; 060import edu.cmu.sphinx.result.WordResult; 061 062 063public class DialogDemo 064{ 065 066 private static final String ACOUSTIC_MODEL = "resource:/edu/cmu/sphinx/models/en-us/en-us"; 067 //private static final String DICTIONARY_PATH = "resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"; 068 private static final String DICTIONARY_PATH = "resource:/ca/bc/webarts/tools/sphinx/cmudict-en-us_ca.dict"; 069 private static final String GRAMMAR_PATH = "resource:/ca/bc/webarts/tools/sphinx/"; 070 private static final String LANGUAGE_MODEL = "resource:/ca/bc/webarts/tools/sphinx/weather.lm"; 071 072 /** The grammer file to use for digits recogognition. **/ 073 private static String digitsGrammerFilename = "number.grxml"; //"number.grxml"; //"digits.grxml"; 074 075 private static final Map<String, Integer> DIGITS = new HashMap <String, Integer>(); 076 static 077 { 078 DIGITS.put("oh", 0); 079 DIGITS.put("zero", 0); 080 DIGITS.put("one", 1); 081 DIGITS.put("two", 2); 082 DIGITS.put("three", 3); 083 DIGITS.put("four", 4); 084 DIGITS.put("five", 5); 085 DIGITS.put("six", 6); 086 DIGITS.put("seven", 7); 087 DIGITS.put("eight", 8); 088 DIGITS.put("nine", 9); 089 DIGITS.put("ten", 10); 090 DIGITS.put("twenty", 20); 091 DIGITS.put("thirty", 30); 092 DIGITS.put("fourty", 40); 093 DIGITS.put("fifty", 50); 094 DIGITS.put("sixty", 60); 095 DIGITS.put("seventy", 70); 096 DIGITS.put("eighty", 80); 097 DIGITS.put("ninety", 90); 098 DIGITS.put("one hundred", 100); 099 DIGITS.put("two hundred", 200); 100 DIGITS.put("three hundred", 300); 101 DIGITS.put("four hundred", 400); 102 DIGITS.put("five hundred", 500); 103 DIGITS.put("six hundred", 600); 104 DIGITS.put("seven hundred", 700); 105 DIGITS.put("eight hundred", 800); 106 DIGITS.put("nine hundred", 900); 107 } 108 109 110 private static double parseNumber(String[] tokens) 111 { 112 StringBuilder sb = new StringBuilder(); 113 114 for (int i = 1; i < tokens.length; ++i) 115 { 116 if (tokens[i].equals("point") || tokens[i].equals("decimal") || tokens[i].equals("dot") ) 117 { 118 sb.append("."); 119 } 120 else 121 { 122 sb.append(DIGITS.get(tokens[i])); 123 } 124 } 125 126 return Double.parseDouble(sb.toString()); 127 } 128 129 130 private static void recognizeDigits(LiveSpeechRecognizer recognizer) 131 { 132 System.out.println("Digits recognition (using GrXML)"); 133 System.out.println("--------------------------------"); 134 System.out.println("Example: one two three"); 135 System.out.println("Say \"101\" to exit"); 136 System.out.println("--------------------------------"); 137 138 SpeechResult speechResult = null; 139 String utterance = null; 140 141 recognizer.startRecognition(true); 142 boolean notDone = true; 143 while (notDone) 144 { 145 speechResult = recognizer.getResult(); 146 System.out.print("Got A Result... "); 147 if(speechResult!=null) 148 { 149 //System.out.print(" List of recognized words and their times: "); 150 for (WordResult r : speechResult.getWords()) 151 { 152 System.out.print(" "+r+ ", "); 153 } 154 155 utterance = speechResult.getHypothesis(); 156 if (utterance.equals("one zero one") || utterance.equals("one oh one")) 157 { 158 notDone = false; 159 System.out.print("Got A 101................. EXIT Digits "); 160 } 161 else 162 { 163 System.out.print(utterance); 164 } 165 } 166 System.out.println(); 167 } 168 recognizer.stopRecognition(); 169 } 170 171 172 private static void recognizerBankAccount(LiveSpeechRecognizer recognizer) 173 { 174 System.out.println("This is bank account voice menu"); 175 System.out.println("-------------------------------"); 176 System.out.println("Example: balance"); 177 System.out.println("Example: withdraw zero point five"); 178 System.out.println("Example: deposit one two three"); 179 System.out.println("Example: back"); 180 System.out.println("-------------------------------"); 181 182 double savings = .0; 183 recognizer.startRecognition(true); 184 185 while (true) 186 { 187 String utterance = recognizer.getResult().getHypothesis(); 188 if (utterance.endsWith("back")) 189 { 190 break; 191 } 192 else if (utterance.startsWith("deposit")) 193 { 194 double deposit = parseNumber(utterance.split("\\s")); 195 savings += deposit; 196 System.out.format("Deposited: $%.2f\n", deposit); 197 } 198 else if (utterance.startsWith("withdraw")) 199 { 200 double withdraw = parseNumber(utterance.split("\\s")); 201 savings -= withdraw; 202 System.out.format("Withdrawn: $%.2f\n", withdraw); 203 } 204 else if (!utterance.endsWith("balance")) 205 { 206 System.out.println("Unrecognized command: " + utterance); 207 } 208 209 System.out.format("Your savings: $%.2f\n", savings); 210 } 211 212 recognizer.stopRecognition(); 213 } 214 215 216 private static void recognizeWeather(LiveSpeechRecognizer recognizer) 217 { 218 System.out.println("Try some forecast. End with \"the end\""); 219 System.out.println("-------------------------------------"); 220 System.out.println("Example: mostly dry some fog patches tonight"); 221 System.out.println("Example: sunny spells on wednesday"); 222 System.out.println("-------------------------------------"); 223 224 recognizer.startRecognition(true); 225 while (true) 226 { 227 String utterance = recognizer.getResult().getHypothesis(); 228 if (utterance.equals("the end")) 229 { 230 break; 231 } 232 else 233 { 234 System.out.println(utterance); 235 } 236 } 237 recognizer.stopRecognition(); 238 } 239 240 241 public static void main(String[] args) throws Exception 242 { 243 Configuration configuration = new Configuration(); 244 configuration.setAcousticModelPath(ACOUSTIC_MODEL); 245 configuration.setDictionaryPath(DICTIONARY_PATH); 246 configuration.setGrammarPath(GRAMMAR_PATH); 247 248 configuration.setUseGrammar(true);// use the fixed grammer instead of language model 249 configuration.setGrammarName("dialog"); 250 LiveSpeechRecognizer jsgfRecognizer = new LiveSpeechRecognizer(configuration); 251 252 configuration.setUseGrammar(true); // use the fixed grammer instead of language model 253 configuration.setGrammarName(digitsGrammerFilename); 254 LiveSpeechRecognizer grxmlDigitsRecognizer = new LiveSpeechRecognizer(configuration); 255 256 configuration.setUseGrammar(false); 257 configuration.setLanguageModelPath(LANGUAGE_MODEL); 258 LiveSpeechRecognizer lmWeatherRecognizer = new LiveSpeechRecognizer(configuration); 259 260 boolean notDone = true; 261 SpeechResult speechResult = null; 262 String utterance = null; 263 jsgfRecognizer.startRecognition(true); 264 while (notDone) 265 { 266 System.out.println("Choose menu item:"); 267 System.out.println("Example: go to the bank account"); 268 System.out.println("Example: exit the program"); 269 System.out.println("Example: weather forecast"); 270 System.out.println("Example: digits\n"); 271 272 // this blocks until a result is returned 273 speechResult = jsgfRecognizer.getResult(); 274 System.out.print("Got A Result... "); 275 //System.out.print(" List of recognized words and their times: "); 276 for (WordResult r : speechResult.getWords()) 277 { 278 System.out.print(" "+r+ ", "); 279 } 280 System.out.println(); 281 282 if(speechResult!=null) 283 { 284 utterance = speechResult.getHypothesis(); 285 if (utterance!=null ) 286 { 287 /*System.out.println("\nBest 3 hypothesis:"); 288 for (String s : speechResult.getNbest(3)) 289 System.out.println(" "+s); 290 */ 291 292 if (utterance.startsWith("exit")) 293 { 294 notDone = false; 295 } 296 297 else if (utterance.equals("digits")) 298 { 299 jsgfRecognizer.stopRecognition(); 300 recognizeDigits(grxmlDigitsRecognizer); 301 jsgfRecognizer.startRecognition(true); 302 } 303 304 else if (utterance.equals("bank account")) 305 { 306 jsgfRecognizer.stopRecognition(); 307 recognizerBankAccount(jsgfRecognizer); 308 jsgfRecognizer.startRecognition(true); 309 } 310 311 else if (utterance.endsWith("weather forecast")) 312 { 313 jsgfRecognizer.stopRecognition(); 314 recognizeWeather(lmWeatherRecognizer); 315 jsgfRecognizer.startRecognition(true); 316 } 317 318 else 319 System.out.println("Utterance did not match ("+utterance+")"); 320 } 321 else 322 System.out.println("Utterance was null"); 323 } 324 } 325 326 jsgfRecognizer.stopRecognition(); 327 } 328 329}