AlwaysOnHotwordDetector.java revision 6817337118655d5792e36e954b123e6daa4174a6
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
28import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
29import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
30import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
31import android.hardware.soundtrigger.SoundTrigger.RecognitionEvent;
32import android.os.AsyncTask;
33import android.os.Handler;
34import android.os.Message;
35import android.os.RemoteException;
36import android.util.Slog;
37
38import com.android.internal.app.IVoiceInteractionManagerService;
39
40import java.util.List;
41
42/**
43 * A class that lets a VoiceInteractionService implementation interact with
44 * always-on keyphrase detection APIs.
45 */
46public class AlwaysOnHotwordDetector {
47    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
48    /**
49     * Indicates that this hotword detector is no longer valid for any recognition
50     * and should not be used anymore.
51     */
52    public static final int STATE_INVALID = -3;
53    /**
54     * Indicates that recognition for the given keyphrase is not available on the system
55     * because of the hardware configuration.
56     */
57    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
58    /**
59     * Indicates that recognition for the given keyphrase is not supported.
60     */
61    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
62    /**
63     * Indicates that the given keyphrase is not enrolled.
64     */
65    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
66    /**
67     * Indicates that the given keyphrase is currently enrolled and it's possible to start
68     * recognition for it.
69     */
70    public static final int STATE_KEYPHRASE_ENROLLED = 2;
71
72    /**
73     * Indicates that the detector isn't ready currently.
74     */
75    private static final int STATE_NOT_READY = 0;
76
77    // Keyphrase management actions. Used in getManageIntent() ----//
78    /** Indicates that we need to enroll. */
79    public static final int MANAGE_ACTION_ENROLL = 0;
80    /** Indicates that we need to re-enroll. */
81    public static final int MANAGE_ACTION_RE_ENROLL = 1;
82    /** Indicates that we need to un-enroll. */
83    public static final int MANAGE_ACTION_UN_ENROLL = 2;
84
85    /**
86     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
87     */
88    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
89    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
90
91    //-- Flags for startRecogntion    ----//
92    /** Empty flag for {@link #startRecognition(int)}. */
93    public static final int RECOGNITION_FLAG_NONE = 0;
94    /**
95     * Recognition flag for {@link #startRecognition(int)} that indicates
96     * whether the trigger audio for hotword needs to be captured.
97     */
98    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
99
100    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
101    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
102
103    /**
104     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
105     */
106    public static final int RECOGNITION_MODE_VOICE_TRIGGER
107            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
108    /**
109     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
110     */
111    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
112            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
113
114    static final String TAG = "AlwaysOnHotwordDetector";
115    // TODO: Set to false.
116    static final boolean DBG = true;
117
118    private static final int MSG_STATE_CHANGED = 1;
119    private static final int MSG_HOTWORD_DETECTED = 2;
120    private static final int MSG_DETECTION_STOPPED = 3;
121
122    private final String mText;
123    private final String mLocale;
124    /**
125     * The metadata of the Keyphrase, derived from the enrollment application.
126     * This may be null if this keyphrase isn't supported by the enrollment application.
127     */
128    private final KeyphraseMetadata mKeyphraseMetadata;
129    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
130    private final IVoiceInteractionService mVoiceInteractionService;
131    private final IVoiceInteractionManagerService mModelManagementService;
132    private final SoundTriggerListener mInternalCallback;
133    private final Callback mExternalCallback;
134    private final Object mLock = new Object();
135    private final Handler mHandler;
136
137    private int mAvailability = STATE_NOT_READY;
138
139    /**
140     * Callbacks for always-on hotword detection.
141     */
142    public interface Callback {
143        /**
144         * Called when the hotword availability changes.
145         * This indicates a change in the availability of recognition for the given keyphrase.
146         * It's called at least once with the initial availability.<p/>
147         *
148         * Availability implies whether the hardware on this system is capable of listening for
149         * the given keyphrase or not. <p/>
150         * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or
151         * {@link #STATE_KEYPHRASE_UNSUPPORTED},
152         * detection is not possible and no further interaction should be
153         * performed with this detector. <br/>
154         * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin
155         * an enrollment flow for the keyphrase. <br/>
156         * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/>
157         *
158         * If the return code is {@link #STATE_INVALID}, this detector is stale.
159         * A new detector should be obtained for use in the future.
160         */
161        void onAvailabilityChanged(int status);
162        /**
163         * Called when the keyphrase is spoken.
164         *
165         * @param data Optional trigger audio data, if it was requested during
166         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
167         */
168        void onDetected(byte[] data);
169        /**
170         * Called when the detection for the associated keyphrase stops.
171         */
172        void onDetectionStopped();
173    }
174
175    /**
176     * @param text The keyphrase text to get the detector for.
177     * @param locale The java locale for the detector.
178     * @param callback A non-null Callback for receiving the recognition events.
179     * @param voiceInteractionService The current voice interaction service.
180     * @param modelManagementService A service that allows management of sound models.
181     *
182     * @hide
183     */
184    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
185            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
186            IVoiceInteractionService voiceInteractionService,
187            IVoiceInteractionManagerService modelManagementService) {
188        mText = text;
189        mLocale = locale;
190        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
191        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
192        mExternalCallback = callback;
193        mHandler = new MyHandler();
194        mInternalCallback = new SoundTriggerListener(mHandler);
195        mVoiceInteractionService = voiceInteractionService;
196        mModelManagementService = modelManagementService;
197        new RefreshAvailabiltyTask().execute();
198    }
199
200    /**
201     * Gets the recognition modes supported by the associated keyphrase.
202     *
203     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
204     *         Callers should only call this method after a supported state callback on
205     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
206     */
207    public int getSupportedRecognitionModes() {
208        synchronized (mLock) {
209            return getSupportedRecognitionModesLocked();
210        }
211    }
212
213    private int getSupportedRecognitionModesLocked() {
214        // This method only makes sense if we can actually support a recognition.
215        if (mAvailability != STATE_KEYPHRASE_ENROLLED
216                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
217            throw new UnsupportedOperationException(
218                    "Getting supported recognition modes for the keyphrase is not supported");
219        }
220
221        return mKeyphraseMetadata.recognitionModeFlags;
222    }
223
224    /**
225     * Starts recognition for the associated keyphrase.
226     *
227     * @param recognitionFlags The flags to control the recognition properties.
228     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
229     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
230     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
231     * @throws UnsupportedOperationException if the recognition isn't supported.
232     *         Callers should only call this method after a supported state callback on
233     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
234     */
235    public int startRecognition(int recognitionFlags) {
236        synchronized (mLock) {
237            return startRecognitionLocked(recognitionFlags);
238        }
239    }
240
241    private int startRecognitionLocked(int recognitionFlags) {
242        // This method only makes sense if we can start a recognition.
243        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
244            throw new UnsupportedOperationException(
245                    "Recognition for the given keyphrase is not supported");
246        }
247
248        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
249        // TODO: Do we need to do something about the confidence level here?
250        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
251                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
252        boolean captureTriggerAudio =
253                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
254        int code = STATUS_ERROR;
255        try {
256            code = mModelManagementService.startRecognition(mVoiceInteractionService,
257                    mKeyphraseMetadata.id, mInternalCallback,
258                    new RecognitionConfig(
259                            captureTriggerAudio, recognitionExtra, null /* additional data */));
260        } catch (RemoteException e) {
261            Slog.w(TAG, "RemoteException in startRecognition!");
262        }
263        if (code != STATUS_OK) {
264            Slog.w(TAG, "startRecognition() failed with error code " + code);
265        }
266        return code;
267    }
268
269    /**
270     * Stops recognition for the associated keyphrase.
271     *
272     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
273     * @throws UnsupportedOperationException if the recognition isn't supported.
274     *         Callers should only call this method after a supported state callback on
275     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
276     */
277    public int stopRecognition() {
278        synchronized (mLock) {
279            return stopRecognitionLocked();
280        }
281    }
282
283    private int stopRecognitionLocked() {
284        // This method only makes sense if we can start a recognition.
285        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
286            throw new UnsupportedOperationException(
287                    "Recognition for the given keyphrase is not supported");
288        }
289
290        int code = STATUS_ERROR;
291        try {
292            code = mModelManagementService.stopRecognition(
293                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
294        } catch (RemoteException e) {
295            Slog.w(TAG, "RemoteException in stopRecognition!");
296        }
297
298        if (code != STATUS_OK) {
299            Slog.w(TAG, "stopRecognition() failed with error code " + code);
300        }
301        return code;
302    }
303
304    /**
305     * Gets an intent to manage the associated keyphrase.
306     *
307     * @param action The manage action that needs to be performed.
308     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
309     *        {@link #MANAGE_ACTION_UN_ENROLL}.
310     * @return An {@link Intent} to manage the given keyphrase.
311     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
312     *         Callers should only call this method after a supported state callback on
313     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
314     */
315    public Intent getManageIntent(int action) {
316        // This method only makes sense if we can actually support a recognition.
317        if (mAvailability != STATE_KEYPHRASE_ENROLLED
318                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
319            throw new UnsupportedOperationException(
320                    "Managing the given keyphrase is not supported");
321        }
322        if (action != MANAGE_ACTION_ENROLL
323                && action != MANAGE_ACTION_RE_ENROLL
324                && action != MANAGE_ACTION_UN_ENROLL) {
325            throw new IllegalArgumentException("Invalid action specified " + action);
326        }
327
328        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
329    }
330
331    /**
332     * Invalidates this hotword detector so that any future calls to this result
333     * in an IllegalStateException.
334     *
335     * @hide
336     */
337    void invalidate() {
338        synchronized (mLock) {
339            mAvailability = STATE_INVALID;
340            notifyStateChangedLocked();
341        }
342    }
343
344    /**
345     * Reloads the sound models from the service.
346     *
347     * @hide
348     */
349    void onSoundModelsChanged() {
350        synchronized (mLock) {
351            // TODO: This should stop the recognition if it was using an enrolled sound model
352            // that's no longer available.
353            if (mAvailability == STATE_INVALID
354                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
355                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
356                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
357                return;
358            }
359
360            // Execute a refresh availability task - which should then notify of a change.
361            new RefreshAvailabiltyTask().execute();
362        }
363    }
364
365    private void notifyStateChangedLocked() {
366        Message message = Message.obtain(mHandler, MSG_STATE_CHANGED);
367        message.arg1 = mAvailability;
368        message.sendToTarget();
369    }
370
371    /** @hide */
372    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
373        private final Handler mHandler;
374
375        public SoundTriggerListener(Handler handler) {
376            mHandler = handler;
377        }
378
379        @Override
380        public void onDetected(KeyphraseRecognitionEvent event) {
381            Slog.i(TAG, "onDetected");
382            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
383            message.obj = event.data;
384            message.sendToTarget();
385        }
386
387        @Override
388        public void onDetectionStopped() {
389            Slog.i(TAG, "onDetectionStopped");
390            mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
391        }
392    }
393
394    class MyHandler extends Handler {
395        @Override
396        public void handleMessage(Message msg) {
397            switch (msg.what) {
398                case MSG_STATE_CHANGED:
399                    mExternalCallback.onAvailabilityChanged(msg.arg1);
400                    break;
401                case MSG_HOTWORD_DETECTED:
402                    mExternalCallback.onDetected((byte[]) msg.obj);
403                    break;
404                case MSG_DETECTION_STOPPED:
405                    mExternalCallback.onDetectionStopped();
406                default:
407                    super.handleMessage(msg);
408            }
409        }
410    }
411
412    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
413
414        @Override
415        public Void doInBackground(Void... params) {
416            int availability = internalGetInitialAvailability();
417            boolean enrolled = false;
418            // Fetch the sound model if the availability is one of the supported ones.
419            if (availability == STATE_NOT_READY
420                    || availability == STATE_KEYPHRASE_UNENROLLED
421                    || availability == STATE_KEYPHRASE_ENROLLED) {
422                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
423                if (!enrolled) {
424                    availability = STATE_KEYPHRASE_UNENROLLED;
425                } else {
426                    availability = STATE_KEYPHRASE_ENROLLED;
427                }
428            }
429
430            synchronized (mLock) {
431                if (DBG) {
432                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
433                            + " -> " + availability);
434                }
435                mAvailability = availability;
436                notifyStateChangedLocked();
437            }
438            return null;
439        }
440
441        /**
442         * @return The initial availability without checking the enrollment status.
443         */
444        private int internalGetInitialAvailability() {
445            synchronized (mLock) {
446                // This detector has already been invalidated.
447                if (mAvailability == STATE_INVALID) {
448                    return STATE_INVALID;
449                }
450            }
451
452            ModuleProperties dspModuleProperties = null;
453            try {
454                dspModuleProperties =
455                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
456            } catch (RemoteException e) {
457                Slog.w(TAG, "RemoteException in getDspProperties!");
458            }
459            // No DSP available
460            if (dspModuleProperties == null) {
461                return STATE_HARDWARE_UNAVAILABLE;
462            }
463            // No enrollment application supports this keyphrase/locale
464            if (mKeyphraseMetadata == null) {
465                return STATE_KEYPHRASE_UNSUPPORTED;
466            }
467            return STATE_NOT_READY;
468        }
469
470        /**
471         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
472         */
473        private boolean internalGetIsEnrolled(int keyphraseId) {
474            try {
475                return mModelManagementService.isEnrolledForKeyphrase(
476                        mVoiceInteractionService, keyphraseId);
477            } catch (RemoteException e) {
478                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
479            }
480            return false;
481        }
482    }
483}
484