1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.voicedialer;
18
19import android.app.Activity;
20import android.content.Intent;
21import android.speech.srec.MicrophoneInputStream;
22import android.speech.srec.Recognizer;
23import android.speech.srec.WaveHeader;
24import android.util.Log;
25import java.io.File;
26import java.io.FileInputStream;
27import java.io.IOException;
28import java.io.InputStream;
29import java.util.ArrayList;
30
31/**
32 * This class is a framework for recognizing speech.  It must be extended to use.
33 * The child class must timplement setupGrammar and onRecognitionSuccess.
34 * A usage cycle is as follows:
35 * <ul>
36 * <li>Create with a reference to the {@link VoiceDialerActivity}.
37 * <li>Signal the user to start speaking with the Vibrator or beep.
38 * <li>Start audio input by creating a {@link MicrophoneInputStream}.
39 * <li>Create and configure a {@link Recognizer}.
40 * <li>Set up the grammar using setupGrammar.
41 * <li>Start the {@link Recognizer} running using data already being
42 * collected by the microphone.
43 * <li>Wait for the {@link Recognizer} to complete.
44 * <li>Process the results using onRecognitionSuccess, which will pass
45 * a list of intents to the {@RecogizerClient}.
46 * <li>Shut down and clean up.
47 * </ul>
48 * Notes:
49 * <ul>
50 * <li>Audio many be read from a file.
51 * <li>A directory tree of audio files may be stepped through.
52 * <li>A contact list may be read from a file.
53 * <li>A {@link RecognizerLogger} may generate a set of log files from
54 * a recognition session.
55 * <li>A static instance of this class is held and reused by the
56 * {@link VoiceDialerActivity}, which saves setup time.
57 * </ul>
58 */
59abstract public class RecognizerEngine {
60
61    protected static final String TAG = "RecognizerEngine";
62
63    protected static final String ACTION_RECOGNIZER_RESULT =
64            "com.android.voicedialer.ACTION_RECOGNIZER_RESULT";
65    public static final String SENTENCE_EXTRA = "sentence";
66    public static final String SEMANTIC_EXTRA = "semantic";
67
68    protected final String SREC_DIR = Recognizer.getConfigDir(null);
69
70    protected static final String OPEN_ENTRIES = "openentries.txt";
71
72    protected static final int RESULT_LIMIT = 5;
73
74    protected Activity mActivity;
75    protected Recognizer mSrec;
76    protected Recognizer.Grammar mSrecGrammar;
77    protected RecognizerLogger mLogger;
78    protected int mSampleRate;
79
80    /**
81     * Constructor.
82     */
83    public RecognizerEngine() {
84        mSampleRate = 0;
85    }
86
87    abstract protected void setupGrammar() throws IOException, InterruptedException;
88
89    abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient)
90            throws InterruptedException;
91
92    /**
93     * Start the recognition process.
94     *
95     * <ul>
96     * <li>Create and start the microphone.
97     * <li>Create a Recognizer.
98     * <li>set up the grammar (implementation is in child class)
99     * <li>Start the Recognizer.
100     * <li>Feed the Recognizer audio until it provides a result.
101     * <li>Build a list of Intents corresponding to the results. (implementation
102     * is in child class)
103     * <li>Stop the microphone.
104     * <li>Stop the Recognizer.
105     * </ul>
106     *
107     * @param recognizerClient client to be given the results
108     * @param activity the Activity this recognition is being run from.
109     * @param micFile optional audio input from this file, or directory tree.
110     * @param sampleRate the same rate coming from the mic or micFile
111     */
112    public void recognize(RecognizerClient recognizerClient, Activity activity,
113            File micFile, int sampleRate) {
114        InputStream mic = null;
115        boolean recognizerStarted = false;
116        try {
117            mActivity = activity;
118            // set up logger
119            mLogger = null;
120            if (RecognizerLogger.isEnabled(mActivity)) {
121                mLogger = new RecognizerLogger(mActivity);
122            }
123
124            if (mSampleRate != sampleRate) {
125                // sample rate has changed since we last used this recognizerEngine.
126                // destroy the grammar and regenerate.
127                if (mSrecGrammar != null) {
128                    mSrecGrammar.destroy();
129                }
130                mSrecGrammar = null;
131                mSampleRate = sampleRate;
132            }
133
134            // create a new recognizer
135            if (false) Log.d(TAG, "start new Recognizer");
136            if (mSrec == null) {
137                String parFilePath = SREC_DIR + "/baseline11k.par";
138                if (sampleRate == 8000) {
139                    parFilePath = SREC_DIR + "/baseline8k.par";
140                }
141                mSrec = new Recognizer(parFilePath);
142            }
143
144            // start audio input
145            if (micFile != null) {
146                if (false) Log.d(TAG, "using mic file");
147                mic = new FileInputStream(micFile);
148                WaveHeader hdr = new WaveHeader();
149                hdr.read(mic);
150            } else {
151                if (false) Log.d(TAG, "start new MicrophoneInputStream");
152                mic = new MicrophoneInputStream(sampleRate, sampleRate * 15);
153            }
154
155            // notify UI
156            recognizerClient.onMicrophoneStart(mic);
157
158            // log audio if requested
159            if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate);
160
161            setupGrammar();
162
163            // start the recognition process
164            if (false) Log.d(TAG, "start mSrec.start");
165            mSrec.start();
166            recognizerStarted = true;
167
168            // recognize
169            while (true) {
170                if (Thread.interrupted()) throw new InterruptedException();
171                int event = mSrec.advance();
172                if (event != Recognizer.EVENT_INCOMPLETE &&
173                        event != Recognizer.EVENT_NEED_MORE_AUDIO) {
174                    Log.d(TAG, "start advance()=" +
175                            Recognizer.eventToString(event) +
176                            " avail " + mic.available());
177                }
178                switch (event) {
179                case Recognizer.EVENT_INCOMPLETE:
180                case Recognizer.EVENT_STARTED:
181                case Recognizer.EVENT_START_OF_VOICING:
182                case Recognizer.EVENT_END_OF_VOICING:
183                    continue;
184                case Recognizer.EVENT_RECOGNITION_RESULT:
185                    onRecognitionSuccess(recognizerClient);
186                    break;
187                case Recognizer.EVENT_NEED_MORE_AUDIO:
188                    mSrec.putAudio(mic);
189                    continue;
190                default:
191                    Log.d(TAG, "unknown event " + event);
192                    recognizerClient.onRecognitionFailure(Recognizer.eventToString(event));
193                    break;
194                }
195                break;
196            }
197
198        } catch (InterruptedException e) {
199            if (false) Log.d(TAG, "start interrupted " + e);
200            recognizerClient.onRecognitionError(e.toString());
201        } catch (IOException e) {
202            if (false) Log.d(TAG, "start new Srec failed " + e);
203            recognizerClient.onRecognitionError(e.toString());
204        } catch (Exception e) {
205            if (false) Log.d(TAG, "exception " + e);
206            recognizerClient.onRecognitionError(e.toString());
207        } finally {
208            if (false) Log.d(TAG, "start mSrec.stop");
209            if (mSrec != null && recognizerStarted) mSrec.stop();
210
211            // stop microphone
212            try {
213                if (mic != null) mic.close();
214            }
215            catch (IOException ex) {
216                if (false) Log.d(TAG, "start - mic.close failed - " + ex);
217            }
218            mic = null;
219
220            // close logger
221            try {
222                if (mLogger != null) mLogger.close();
223            }
224            catch (IOException ex) {
225                if (false) Log.d(TAG, "start - mLoggger.close failed - " + ex);
226            }
227            mLogger = null;
228        }
229        if (false) Log.d(TAG, "start bye");
230    }
231
232    protected static void addIntent(ArrayList<Intent> intents, Intent intent) {
233        for (Intent in : intents) {
234            if (in.getAction() != null &&
235                    in.getAction().equals(intent.getAction()) &&
236                    in.getData() != null &&
237                    in.getData().equals(intent.getData())) {
238                return;
239            }
240        }
241        intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK);
242        intents.add(intent);
243    }
244}
245