AlwaysOnHotwordDetector.java revision 6daae9622672e0b38fc2efed29f68061d749cacc
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.os.Handler;
31import android.os.Message;
32import android.os.RemoteException;
33import android.util.Slog;
34
35import com.android.internal.app.IVoiceInteractionManagerService;
36
37import java.util.List;
38
39/**
40 * A class that lets a VoiceInteractionService implementation interact with
41 * always-on keyphrase detection APIs.
42 */
43public class AlwaysOnHotwordDetector {
44    //---- States of Keyphrase availability. Return codes for getAvailability() ----//
45    /**
46     * Indicates that this hotword detector is no longer valid for any recognition
47     * and should not be used anymore.
48     */
49    public static final int STATE_INVALID = -3;
50    /**
51     * Indicates that recognition for the given keyphrase is not available on the system
52     * because of the hardware configuration.
53     */
54    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
55    /**
56     * Indicates that recognition for the given keyphrase is not supported.
57     */
58    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
59    /**
60     * Indicates that the given keyphrase is not enrolled.
61     */
62    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
63    /**
64     * Indicates that the given keyphrase is currently enrolled and it's possible to start
65     * recognition for it.
66     */
67    public static final int STATE_KEYPHRASE_ENROLLED = 2;
68
69    // Keyphrase management actions. Used in getManageIntent() ----//
70    /** Indicates that we need to enroll. */
71    public static final int MANAGE_ACTION_ENROLL = 0;
72    /** Indicates that we need to re-enroll. */
73    public static final int MANAGE_ACTION_RE_ENROLL = 1;
74    /** Indicates that we need to un-enroll. */
75    public static final int MANAGE_ACTION_UN_ENROLL = 2;
76
77    /**
78     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
79     */
80    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
81    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
82
83    //-- Flags for startRecogntion    ----//
84    /** Empty flag for {@link #startRecognition(int)}. */
85    public static final int RECOGNITION_FLAG_NONE = 0;
86    /**
87     * Recognition flag for {@link #startRecognition(int)} that indicates
88     * whether the trigger audio for hotword needs to be captured.
89     */
90    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
91
92    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
93    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
94
95    /**
96     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
97     */
98    public static final int RECOGNITION_MODE_VOICE_TRIGGER
99            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
100    /**
101     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
102     */
103    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
104            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
105
106    static final String TAG = "AlwaysOnHotwordDetector";
107
108    private static final int MSG_HOTWORD_DETECTED = 1;
109    private static final int MSG_DETECTION_STOPPED = 2;
110
111    private final String mText;
112    private final String mLocale;
113    /**
114     * The metadata of the Keyphrase, derived from the enrollment application.
115     * This may be null if this keyphrase isn't supported by the enrollment application.
116     */
117    private final KeyphraseMetadata mKeyphraseMetadata;
118    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
119    private final IVoiceInteractionService mVoiceInteractionService;
120    private final IVoiceInteractionManagerService mModelManagementService;
121    private final SoundTriggerListener mInternalCallback;
122    private final Callback mExternalCallback;
123    private final boolean mDisabled;
124    private final Object mLock = new Object();
125
126    /**
127     * The sound model for the keyphrase, derived from the model management service
128     * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet.
129     */
130    private KeyphraseSoundModel mEnrolledSoundModel;
131    private boolean mInvalidated;
132
133    /**
134     * Callbacks for always-on hotword detection.
135     */
136    public interface Callback {
137        /**
138         * Called when the keyphrase is spoken.
139         *
140         * @param data Optional trigger audio data, if it was requested during
141         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
142         */
143        void onDetected(byte[] data);
144        /**
145         * Called when the detection for the associated keyphrase stops.
146         */
147        void onDetectionStopped();
148    }
149
150    /**
151     * @param text The keyphrase text to get the detector for.
152     * @param locale The java locale for the detector.
153     * @param callback A non-null Callback for receiving the recognition events.
154     * @param voiceInteractionService The current voice interaction service.
155     * @param modelManagementService A service that allows management of sound models.
156     *
157     * @hide
158     */
159    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
160            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
161            IVoiceInteractionService voiceInteractionService,
162            IVoiceInteractionManagerService modelManagementService) {
163        mInvalidated = false;
164        mText = text;
165        mLocale = locale;
166        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
167        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
168        mExternalCallback = callback;
169        mInternalCallback = new SoundTriggerListener(new MyHandler());
170        mVoiceInteractionService = voiceInteractionService;
171        mModelManagementService = modelManagementService;
172        if (mKeyphraseMetadata != null) {
173            mEnrolledSoundModel = internalGetKeyphraseSoundModelLocked(mKeyphraseMetadata.id);
174        }
175        int initialAvailability = internalGetAvailabilityLocked();
176        mDisabled = (initialAvailability == STATE_HARDWARE_UNAVAILABLE)
177                || (initialAvailability == STATE_KEYPHRASE_UNSUPPORTED);
178    }
179
180    /**
181     * Gets the state of always-on hotword detection for the given keyphrase and locale
182     * on this system.
183     * Availability implies that the hardware on this system is capable of listening for
184     * the given keyphrase or not. <p/>
185     * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or
186     * {@link #STATE_KEYPHRASE_UNSUPPORTED}, no further interaction should be performed with this
187     * detector. <br/>
188     * If the state is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin
189     * an enrollment flow for the keyphrase. <br/>
190     * For {@value #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <br/>
191     * If the return code is {@link #STATE_INVALID}, this detector is stale and must not be used.
192     * A new detector should be obtained and used.
193     *
194     * @return Indicates if always-on hotword detection is available for the given keyphrase.
195     *         The return code is one of {@link #STATE_HARDWARE_UNAVAILABLE},
196     *         {@link #STATE_KEYPHRASE_UNSUPPORTED}, {@link #STATE_KEYPHRASE_UNENROLLED},
197     *         {@link #STATE_KEYPHRASE_ENROLLED}, or {@link #STATE_INVALID}.
198     */
199    public int getAvailability() {
200        synchronized (mLock) {
201            return internalGetAvailabilityLocked();
202        }
203    }
204
205    /**
206     * Gets the recognition modes supported by the associated keyphrase.
207     *
208     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
209     *         Callers should check the availability by calling {@link #getAvailability()}
210     *         before calling this method to avoid this exception.
211     */
212    public int getSupportedRecognitionModes() {
213        synchronized (mLock) {
214            return getSupportedRecognitionModesLocked();
215        }
216    }
217
218    private int getSupportedRecognitionModesLocked() {
219        if (mDisabled) {
220            throw new UnsupportedOperationException(
221                    "Getting supported recognition modes for the keyphrase is not supported");
222        }
223
224        return mKeyphraseMetadata.recognitionModeFlags;
225    }
226
227    /**
228     * Starts recognition for the associated keyphrase.
229     *
230     * @param recognitionFlags The flags to control the recognition properties.
231     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
232     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
233     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
234     * @throws UnsupportedOperationException if the recognition isn't supported.
235     *         Callers should check the availability by calling {@link #getAvailability()}
236     *         before calling this method to avoid this exception.
237     */
238    public int startRecognition(int recognitionFlags) {
239        synchronized (mLock) {
240            return startRecognitionLocked(recognitionFlags);
241        }
242    }
243
244    private int startRecognitionLocked(int recognitionFlags) {
245        if (internalGetAvailabilityLocked() != STATE_KEYPHRASE_ENROLLED) {
246            throw new UnsupportedOperationException(
247                    "Recognition for the given keyphrase is not supported");
248        }
249
250        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
251        // TODO: Do we need to do something about the confidence level here?
252        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
253                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
254        boolean captureTriggerAudio =
255                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
256        int code = STATUS_ERROR;
257        try {
258            code = mModelManagementService.startRecognition(mVoiceInteractionService,
259                    mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback,
260                    new RecognitionConfig(
261                            captureTriggerAudio, recognitionExtra, null /* additional data */));
262        } catch (RemoteException e) {
263            Slog.w(TAG, "RemoteException in startRecognition!");
264        }
265        if (code != STATUS_OK) {
266            Slog.w(TAG, "startRecognition() failed with error code " + code);
267        }
268        return code;
269    }
270
271    /**
272     * Stops recognition for the associated keyphrase.
273     *
274     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
275     * @throws UnsupportedOperationException if the recognition isn't supported.
276     *         Callers should check the availability by calling {@link #getAvailability()}
277     *         before calling this method to avoid this exception.
278     */
279    public int stopRecognition() {
280        synchronized (mLock) {
281            return stopRecognitionLocked();
282        }
283    }
284
285    private int stopRecognitionLocked() {
286        if (internalGetAvailabilityLocked() != STATE_KEYPHRASE_ENROLLED) {
287            throw new UnsupportedOperationException(
288                    "Recognition for the given keyphrase is not supported");
289        }
290
291        int code = STATUS_ERROR;
292        try {
293            code = mModelManagementService.stopRecognition(
294                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
295        } catch (RemoteException e) {
296            Slog.w(TAG, "RemoteException in stopRecognition!");
297        }
298
299        if (code != STATUS_OK) {
300            Slog.w(TAG, "stopRecognition() failed with error code " + code);
301        }
302        return code;
303    }
304
305    /**
306     * Gets an intent to manage the associated keyphrase.
307     *
308     * @param action The manage action that needs to be performed.
309     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
310     *        {@link #MANAGE_ACTION_UN_ENROLL}.
311     * @return An {@link Intent} to manage the given keyphrase.
312     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
313     *         Callers should check the availability by calling {@link #getAvailability()}
314     *         before calling this method to avoid this exception.
315     */
316    public Intent getManageIntent(int action) {
317        if (mDisabled) {
318            throw new UnsupportedOperationException(
319                    "Managing the given keyphrase is not supported");
320        }
321        if (action != MANAGE_ACTION_ENROLL
322                && action != MANAGE_ACTION_RE_ENROLL
323                && action != MANAGE_ACTION_UN_ENROLL) {
324            throw new IllegalArgumentException("Invalid action specified " + action);
325        }
326
327        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
328    }
329
330    private int internalGetAvailabilityLocked() {
331        if (mInvalidated) {
332            return STATE_INVALID;
333        }
334
335        ModuleProperties dspModuleProperties = null;
336        try {
337            dspModuleProperties =
338                    mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
339        } catch (RemoteException e) {
340            Slog.w(TAG, "RemoteException in getDspProperties!");
341        }
342        // No DSP available
343        if (dspModuleProperties == null) {
344            return STATE_HARDWARE_UNAVAILABLE;
345        }
346        // No enrollment application supports this keyphrase/locale
347        if (mKeyphraseMetadata == null) {
348            return STATE_KEYPHRASE_UNSUPPORTED;
349        }
350
351        // This keyphrase hasn't been enrolled.
352        if (mEnrolledSoundModel == null) {
353            return STATE_KEYPHRASE_UNENROLLED;
354        }
355        return STATE_KEYPHRASE_ENROLLED;
356    }
357
358    /**
359     * Invalidates this hotword detector so that any future calls to this result
360     * in an IllegalStateException.
361     *
362     * @hide
363     */
364    void invalidate() {
365        synchronized (mLock) {
366            mInvalidated = true;
367        }
368    }
369
370    /**
371     * Reloads the sound models from the service.
372     *
373     * @hide
374     */
375    void onSoundModelsChanged() {
376        synchronized (mLock) {
377            // TODO: This should stop the recognition if it was using an enrolled sound model
378            // that's no longer available.
379            if (mKeyphraseMetadata != null) {
380                mEnrolledSoundModel = internalGetKeyphraseSoundModelLocked(mKeyphraseMetadata.id);
381            }
382        }
383    }
384
385    /**
386     * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
387     */
388    private KeyphraseSoundModel internalGetKeyphraseSoundModelLocked(int keyphraseId) {
389        List<KeyphraseSoundModel> soundModels;
390        try {
391            soundModels = mModelManagementService
392                    .listRegisteredKeyphraseSoundModels(mVoiceInteractionService);
393            if (soundModels == null || soundModels.isEmpty()) {
394                Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId);
395                return null;
396            }
397            for (KeyphraseSoundModel soundModel : soundModels) {
398                if (soundModel.keyphrases == null) {
399                    continue;
400                }
401                for (Keyphrase keyphrase : soundModel.keyphrases) {
402                    if (keyphrase.id == keyphraseId) {
403                        return soundModel;
404                    }
405                }
406            }
407        } catch (RemoteException e) {
408            Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
409        }
410        return null;
411    }
412
413    /** @hide */
414    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
415        private final Handler mHandler;
416
417        public SoundTriggerListener(Handler handler) {
418            mHandler = handler;
419        }
420
421        @Override
422        public void onDetected(byte[] data) {
423            Slog.i(TAG, "onDetected");
424            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
425            message.obj = data;
426            message.sendToTarget();
427        }
428
429        @Override
430        public void onDetectionStopped() {
431            Slog.i(TAG, "onDetectionStopped");
432            mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
433        }
434    }
435
436    class MyHandler extends Handler {
437        @Override
438        public void handleMessage(Message msg) {
439            switch (msg.what) {
440                case MSG_HOTWORD_DETECTED:
441                    mExternalCallback.onDetected((byte[]) msg.obj);
442                    break;
443                case MSG_DETECTION_STOPPED:
444                    mExternalCallback.onDetectionStopped();
445                default:
446                    super.handleMessage(msg);
447            }
448        }
449    }
450}
451