Source code

001/**
002 * Portions Copyright 2004 DFKI GmbH.
003 * Portions Copyright 2001 Sun Microsystems, Inc.
004 * Portions Copyright 1999-2001 Language Technologies Institute, 
005 * Carnegie Mellon University.
006 * All Rights Reserved.  Use is subject to license terms.
007 * 
008 * See the file "license.terms" for information on usage and
009 * redistribution of this file, and for a DISCLAIMER OF ALL 
010 * WARRANTIES.
011 */
012package de.dfki.lt.freetts;
013
014import java.io.IOException;
015import java.net.URL;
016import java.util.Locale;
017
018import com.sun.speech.freetts.Age;
019import com.sun.speech.freetts.Gender;
020import com.sun.speech.freetts.PartOfSpeech;
021import com.sun.speech.freetts.PartOfSpeechImpl;
022import com.sun.speech.freetts.PhoneSet;
023import com.sun.speech.freetts.PhoneSetImpl;
024import com.sun.speech.freetts.Tokenizer;
025import com.sun.speech.freetts.UtteranceProcessor;
026import com.sun.speech.freetts.Voice;
027import com.sun.speech.freetts.diphone.DiphonePitchmarkGenerator;
028import com.sun.speech.freetts.diphone.DiphoneUnitSelector;
029import com.sun.speech.freetts.en.us.CMULexicon;
030import com.sun.speech.freetts.en.us.FeatureProcessors;
031import com.sun.speech.freetts.lexicon.Lexicon;
032import com.sun.speech.freetts.relp.AudioOutput;
033import com.sun.speech.freetts.relp.SampleInfo;
034import com.sun.speech.freetts.relp.UnitConcatenator;
035
036
037/**
038 * A simple dummy voice as a starting point for non-US-English
039 * cluster unit voices. All NLP stuff would need to be implemented
040 * in order for this to become a full TTS voice.
041 */
042public class DiphoneVoice extends Voice implements ConcatenativeVoice {
043    private PhoneSet phoneSet;
044        protected URL database;
045        protected URL phonesetURL;
046        protected URL partOfSpeechURL;
047    protected DiphoneUnitSelector unitSelector;
048    
049        public DiphoneVoice(String name, Gender gender, Age age,
050                        String description, Locale locale, String domain,
051                        String organization, Lexicon lexicon, URL database) {
052                this(name, gender, age, description, locale, domain,
053                                organization, lexicon, database, null, null);
054        }
055        
056         /**
057     * Creates a ClusterUnitVoice
058     * 
059     * @param database the database of the voice
060     * @param unitNamer specifies the name of the Units (if null, an 
061     * ldom naming scheme will be used: 'ae_afternoon')
062     * @param phonesetURL leads to the phoneset, which will be used 
063     * for the FeatureProcessors (can be null)
064     * @param partOfSpeechURL leads to the pos-textfile which will be used
065     * for the FeatureProcessors (can be null)
066     */
067        public DiphoneVoice(String name, Gender gender, Age age,
068                        String description, Locale locale, String domain,
069                        String organization, Lexicon lexicon, URL database,
070            URL phonesetURL, URL partOfSpeechURL) {
071                
072                //TODO: do something useful with the lexicon
073                super(name, gender, age, description, locale,
074                                domain, organization);
075                // Set default prosody values:
076        setRate(150f);
077                setPitch(100F);
078                setPitchRange(12F);
079        if (lexicon != null) {
080            setLexicon(lexicon);
081        } else {
082            // Use a small dummy lexicon
083            setLexicon(new CMULexicon("cmutimelex"));
084        }
085                this.database = database;
086                this.phonesetURL = phonesetURL;
087                this.partOfSpeechURL = partOfSpeechURL;
088        try {
089            this.unitSelector = new DiphoneUnitSelector(getDatabase());
090        } catch (IOException ioe) {
091            ioe.printStackTrace();
092        }
093        }
094        
095        public Tokenizer getTokenizer() {
096                return null;
097        }
098        
099        
100        protected void loader() throws IOException {
101                setupFeatureProcessors();
102        }
103        
104    /**
105     * Get the sample info for the underlying database.
106     * @return the sample info object
107     */
108    public SampleInfo getSampleInfo() {
109        return unitSelector.getSampleInfo();
110    }
111        
112        protected UtteranceProcessor getAudioOutput() throws IOException {
113                return new AudioOutput();
114        }
115        
116        /**
117         * Gets the url to the database that defines the unit data for this
118         * voice.
119         *
120         * @return a url to the database
121         */
122        public URL getDatabase() {
123                return database;
124        }
125        
126        /**
127         * Returns the unit selector to be used by this voice.
128         * Derived voices typically override this to customize behaviors.
129         * This voice uses  a diphone selector as the unit selector.
130         * 
131         * @return the post lexical processor
132         * 
133         * @throws IOException if an IO error occurs while getting
134         *     processor
135         */
136        public UtteranceProcessor getUnitSelector() throws IOException {
137        return unitSelector;
138    }
139    
140    /**
141     * Returns the pitch mark generator to be used by this voice.
142     * Derived voices typically override this to customize behaviors.
143     * There is no default unit selector
144     * 
145     * @return the post lexical processor
146     * 
147     * @throws IOException if an IO error occurs while getting
148     *     processor
149     */
150    public UtteranceProcessor getPitchmarkGenerator() throws IOException {
151        return new DiphonePitchmarkGenerator();
152    }
153    
154    /**
155     * Returns the unit concatenator to be used by this voice.
156     * Derived voices typically override this to customize behaviors.
157     * There is no default unit selector
158     * 
159     * @return the post lexical processor
160     * 
161     * @throws IOException if an IO error occurs while getting
162     *     processor
163     */
164    public UtteranceProcessor getUnitConcatenator() throws IOException {
165        return new UnitConcatenator();
166    }
167
168    protected void setupFeatureProcessors() throws IOException {
169        if (phonesetURL != null) {
170            phoneSet = new PhoneSetImpl(phonesetURL);
171        }
172        if (partOfSpeechURL != null) {
173            PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, "content");
174            addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
175        }
176        addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
177        addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
178        addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
179        addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
180        addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
181        addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
182        addFeatureProcessor("ssyl_out", new
183                FeatureProcessors.StressedSylOut());
184        addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
185        addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
186        addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
187        addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
188        addFeatureProcessor("token_pos_guess", 
189                new FeatureProcessors.TokenPosGuess());
190        addFeatureProcessor("segment_duration", 
191                new FeatureProcessors.SegmentDuration());
192        addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
193        addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
194        addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
195        addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
196        addFeatureProcessor("position_type", new
197                FeatureProcessors.PositionType());
198
199        addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
200        addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
201        addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
202        addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
203        addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
204        addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
205        addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
206        addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
207
208        addFeatureProcessor("seg_coda_fric", new
209                FeatureProcessors.SegCodaFric());
210        addFeatureProcessor("seg_onset_fric", new
211                FeatureProcessors.SegOnsetFric());
212
213        addFeatureProcessor("seg_coda_stop", new
214                FeatureProcessors.SegCodaStop());
215        addFeatureProcessor("seg_onset_stop", new
216                FeatureProcessors.SegOnsetStop());
217
218        addFeatureProcessor("seg_coda_nasal", new
219                FeatureProcessors.SegCodaNasal());
220        addFeatureProcessor("seg_onset_nasal", new
221                FeatureProcessors.SegOnsetNasal());
222
223        addFeatureProcessor("seg_coda_glide", new
224                FeatureProcessors.SegCodaGlide());
225        addFeatureProcessor("seg_onset_glide", new
226                FeatureProcessors.SegOnsetGlide());
227
228        addFeatureProcessor("seg_onsetcoda", new
229                FeatureProcessors.SegOnsetCoda());
230        addFeatureProcessor("syl_codasize", new
231                FeatureProcessors.SylCodaSize());
232        addFeatureProcessor("syl_onsetsize", new
233                FeatureProcessors.SylOnsetSize());
234        addFeatureProcessor("accented", new FeatureProcessors.Accented());
235    }
236
237    /**
238     * Given a phoneme and a feature name, return the feature
239     *
240     * @param phone the phoneme of interest
241     * @param featureName the name of the feature of interest
242     *
243     * @return the feature with the given name
244     */
245    public String getPhoneFeature(String phone, String featureName) {
246        if (phoneSet != null)
247            return phoneSet.getPhoneFeature(phone, featureName);
248        else
249            return null;
250    }
251
252}