001/** 002 * Portions Copyright 2004 DFKI GmbH. 003 * Portions Copyright 2001 Sun Microsystems, Inc. 004 * Portions Copyright 1999-2001 Language Technologies Institute, 005 * Carnegie Mellon University. 006 * All Rights Reserved. Use is subject to license terms. 007 * 008 * See the file "license.terms" for information on usage and 009 * redistribution of this file, and for a DISCLAIMER OF ALL 010 * WARRANTIES. 011 */ 012package de.dfki.lt.freetts; 013 014import java.io.IOException; 015import java.net.URL; 016import java.util.Locale; 017 018import com.sun.speech.freetts.Age; 019import com.sun.speech.freetts.Gender; 020import com.sun.speech.freetts.PartOfSpeech; 021import com.sun.speech.freetts.PartOfSpeechImpl; 022import com.sun.speech.freetts.PhoneSet; 023import com.sun.speech.freetts.PhoneSetImpl; 024import com.sun.speech.freetts.Tokenizer; 025import com.sun.speech.freetts.UtteranceProcessor; 026import com.sun.speech.freetts.Voice; 027import com.sun.speech.freetts.diphone.DiphonePitchmarkGenerator; 028import com.sun.speech.freetts.diphone.DiphoneUnitSelector; 029import com.sun.speech.freetts.en.us.CMULexicon; 030import com.sun.speech.freetts.en.us.FeatureProcessors; 031import com.sun.speech.freetts.lexicon.Lexicon; 032import com.sun.speech.freetts.relp.AudioOutput; 033import com.sun.speech.freetts.relp.SampleInfo; 034import com.sun.speech.freetts.relp.UnitConcatenator; 035 036 037/** 038 * A simple dummy voice as a starting point for non-US-English 039 * cluster unit voices. All NLP stuff would need to be implemented 040 * in order for this to become a full TTS voice. 041 */ 042public class DiphoneVoice extends Voice implements ConcatenativeVoice { 043 private PhoneSet phoneSet; 044 protected URL database; 045 protected URL phonesetURL; 046 protected URL partOfSpeechURL; 047 protected DiphoneUnitSelector unitSelector; 048 049 public DiphoneVoice(String name, Gender gender, Age age, 050 String description, Locale locale, String domain, 051 String organization, Lexicon lexicon, URL database) { 052 this(name, gender, age, description, locale, domain, 053 organization, lexicon, database, null, null); 054 } 055 056 /** 057 * Creates a ClusterUnitVoice 058 * 059 * @param database the database of the voice 060 * @param unitNamer specifies the name of the Units (if null, an 061 * ldom naming scheme will be used: 'ae_afternoon') 062 * @param phonesetURL leads to the phoneset, which will be used 063 * for the FeatureProcessors (can be null) 064 * @param partOfSpeechURL leads to the pos-textfile which will be used 065 * for the FeatureProcessors (can be null) 066 */ 067 public DiphoneVoice(String name, Gender gender, Age age, 068 String description, Locale locale, String domain, 069 String organization, Lexicon lexicon, URL database, 070 URL phonesetURL, URL partOfSpeechURL) { 071 072 //TODO: do something useful with the lexicon 073 super(name, gender, age, description, locale, 074 domain, organization); 075 // Set default prosody values: 076 setRate(150f); 077 setPitch(100F); 078 setPitchRange(12F); 079 if (lexicon != null) { 080 setLexicon(lexicon); 081 } else { 082 // Use a small dummy lexicon 083 setLexicon(new CMULexicon("cmutimelex")); 084 } 085 this.database = database; 086 this.phonesetURL = phonesetURL; 087 this.partOfSpeechURL = partOfSpeechURL; 088 try { 089 this.unitSelector = new DiphoneUnitSelector(getDatabase()); 090 } catch (IOException ioe) { 091 ioe.printStackTrace(); 092 } 093 } 094 095 public Tokenizer getTokenizer() { 096 return null; 097 } 098 099 100 protected void loader() throws IOException { 101 setupFeatureProcessors(); 102 } 103 104 /** 105 * Get the sample info for the underlying database. 106 * @return the sample info object 107 */ 108 public SampleInfo getSampleInfo() { 109 return unitSelector.getSampleInfo(); 110 } 111 112 protected UtteranceProcessor getAudioOutput() throws IOException { 113 return new AudioOutput(); 114 } 115 116 /** 117 * Gets the url to the database that defines the unit data for this 118 * voice. 119 * 120 * @return a url to the database 121 */ 122 public URL getDatabase() { 123 return database; 124 } 125 126 /** 127 * Returns the unit selector to be used by this voice. 128 * Derived voices typically override this to customize behaviors. 129 * This voice uses a diphone selector as the unit selector. 130 * 131 * @return the post lexical processor 132 * 133 * @throws IOException if an IO error occurs while getting 134 * processor 135 */ 136 public UtteranceProcessor getUnitSelector() throws IOException { 137 return unitSelector; 138 } 139 140 /** 141 * Returns the pitch mark generator to be used by this voice. 142 * Derived voices typically override this to customize behaviors. 143 * There is no default unit selector 144 * 145 * @return the post lexical processor 146 * 147 * @throws IOException if an IO error occurs while getting 148 * processor 149 */ 150 public UtteranceProcessor getPitchmarkGenerator() throws IOException { 151 return new DiphonePitchmarkGenerator(); 152 } 153 154 /** 155 * Returns the unit concatenator to be used by this voice. 156 * Derived voices typically override this to customize behaviors. 157 * There is no default unit selector 158 * 159 * @return the post lexical processor 160 * 161 * @throws IOException if an IO error occurs while getting 162 * processor 163 */ 164 public UtteranceProcessor getUnitConcatenator() throws IOException { 165 return new UnitConcatenator(); 166 } 167 168 protected void setupFeatureProcessors() throws IOException { 169 if (phonesetURL != null) { 170 phoneSet = new PhoneSetImpl(phonesetURL); 171 } 172 if (partOfSpeechURL != null) { 173 PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, "content"); 174 addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos)); 175 } 176 addFeatureProcessor("word_break", new FeatureProcessors.WordBreak()); 177 addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc()); 178 addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls()); 179 addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn()); 180 addFeatureProcessor("syl_in", new FeatureProcessors.SylIn()); 181 addFeatureProcessor("syl_out", new FeatureProcessors.SylOut()); 182 addFeatureProcessor("ssyl_out", new 183 FeatureProcessors.StressedSylOut()); 184 addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak()); 185 addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak()); 186 addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits()); 187 addFeatureProcessor("month_range", new FeatureProcessors.MonthRange()); 188 addFeatureProcessor("token_pos_guess", 189 new FeatureProcessors.TokenPosGuess()); 190 addFeatureProcessor("segment_duration", 191 new FeatureProcessors.SegmentDuration()); 192 addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases()); 193 addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn()); 194 addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent()); 195 addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl()); 196 addFeatureProcessor("position_type", new 197 FeatureProcessors.PositionType()); 198 199 addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace()); 200 addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType()); 201 addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox()); 202 addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC()); 203 addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront()); 204 addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight()); 205 addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength()); 206 addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd()); 207 208 addFeatureProcessor("seg_coda_fric", new 209 FeatureProcessors.SegCodaFric()); 210 addFeatureProcessor("seg_onset_fric", new 211 FeatureProcessors.SegOnsetFric()); 212 213 addFeatureProcessor("seg_coda_stop", new 214 FeatureProcessors.SegCodaStop()); 215 addFeatureProcessor("seg_onset_stop", new 216 FeatureProcessors.SegOnsetStop()); 217 218 addFeatureProcessor("seg_coda_nasal", new 219 FeatureProcessors.SegCodaNasal()); 220 addFeatureProcessor("seg_onset_nasal", new 221 FeatureProcessors.SegOnsetNasal()); 222 223 addFeatureProcessor("seg_coda_glide", new 224 FeatureProcessors.SegCodaGlide()); 225 addFeatureProcessor("seg_onset_glide", new 226 FeatureProcessors.SegOnsetGlide()); 227 228 addFeatureProcessor("seg_onsetcoda", new 229 FeatureProcessors.SegOnsetCoda()); 230 addFeatureProcessor("syl_codasize", new 231 FeatureProcessors.SylCodaSize()); 232 addFeatureProcessor("syl_onsetsize", new 233 FeatureProcessors.SylOnsetSize()); 234 addFeatureProcessor("accented", new FeatureProcessors.Accented()); 235 } 236 237 /** 238 * Given a phoneme and a feature name, return the feature 239 * 240 * @param phone the phoneme of interest 241 * @param featureName the name of the feature of interest 242 * 243 * @return the feature with the given name 244 */ 245 public String getPhoneFeature(String phone, String featureName) { 246 if (phoneSet != null) 247 return phoneSet.getPhoneFeature(phone, featureName); 248 else 249 return null; 250 } 251 252}