AlwaysOnHotwordDetector.java revision 1ed12ddb8c46193cc4d790b9c7d6a5d61afb3311
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.os.AsyncTask;
31import android.os.Handler;
32import android.os.Message;
33import android.os.RemoteException;
34import android.util.Slog;
35
36import com.android.internal.app.IVoiceInteractionManagerService;
37
38/**
39 * A class that lets a VoiceInteractionService implementation interact with
40 * always-on keyphrase detection APIs.
41 */
42public class AlwaysOnHotwordDetector {
43    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
44    /**
45     * Indicates that this hotword detector is no longer valid for any recognition
46     * and should not be used anymore.
47     */
48    public static final int STATE_INVALID = -3;
49    /**
50     * Indicates that recognition for the given keyphrase is not available on the system
51     * because of the hardware configuration.
52     */
53    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
54    /**
55     * Indicates that recognition for the given keyphrase is not supported.
56     */
57    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
58    /**
59     * Indicates that the given keyphrase is not enrolled.
60     */
61    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
62    /**
63     * Indicates that the given keyphrase is currently enrolled and it's possible to start
64     * recognition for it.
65     */
66    public static final int STATE_KEYPHRASE_ENROLLED = 2;
67
68    /**
69     * Indicates that the detector isn't ready currently.
70     */
71    private static final int STATE_NOT_READY = 0;
72
73    // Keyphrase management actions. Used in getManageIntent() ----//
74    /** Indicates that we need to enroll. */
75    public static final int MANAGE_ACTION_ENROLL = 0;
76    /** Indicates that we need to re-enroll. */
77    public static final int MANAGE_ACTION_RE_ENROLL = 1;
78    /** Indicates that we need to un-enroll. */
79    public static final int MANAGE_ACTION_UN_ENROLL = 2;
80
81    //-- Flags for startRecogntion    ----//
82    /** Empty flag for {@link #startRecognition(int)}. */
83    public static final int RECOGNITION_FLAG_NONE = 0;
84    /**
85     * Recognition flag for {@link #startRecognition(int)} that indicates
86     * whether the trigger audio for hotword needs to be captured.
87     */
88    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
89
90    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
91    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
92
93    /**
94     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
95     */
96    public static final int RECOGNITION_MODE_VOICE_TRIGGER
97            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
98    /**
99     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
100     */
101    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
102            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
103
104    static final String TAG = "AlwaysOnHotwordDetector";
105    // TODO: Set to false.
106    static final boolean DBG = true;
107
108    private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
109    private static final int STATUS_OK = SoundTrigger.STATUS_OK;
110
111    private static final int MSG_STATE_CHANGED = 1;
112    private static final int MSG_HOTWORD_DETECTED = 2;
113    private static final int MSG_DETECTION_STARTED = 3;
114    private static final int MSG_DETECTION_STOPPED = 4;
115    private static final int MSG_DETECTION_ERROR = 5;
116
117    private static final int FLAG_REQUESTED = 0x1;
118    private static final int FLAG_STARTED = 0x2;
119    private static final int FLAG_CALL_ACTIVE = 0x4;
120    private static final int FLAG_MICROPHONE_OPEN = 0x8;
121
122    private final String mText;
123    private final String mLocale;
124    /**
125     * The metadata of the Keyphrase, derived from the enrollment application.
126     * This may be null if this keyphrase isn't supported by the enrollment application.
127     */
128    private final KeyphraseMetadata mKeyphraseMetadata;
129    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
130    private final IVoiceInteractionService mVoiceInteractionService;
131    private final IVoiceInteractionManagerService mModelManagementService;
132    private final SoundTriggerListener mInternalCallback;
133    private final Callback mExternalCallback;
134    private final Object mLock = new Object();
135    private final Handler mHandler;
136
137    private int mAvailability = STATE_NOT_READY;
138    private int mInternalState = 0;
139    private int mRecognitionFlags = RECOGNITION_FLAG_NONE;
140
141    /**
142     * Callbacks for always-on hotword detection.
143     */
144    public interface Callback {
145        /**
146         * Called when the hotword availability changes.
147         * This indicates a change in the availability of recognition for the given keyphrase.
148         * It's called at least once with the initial availability.<p/>
149         *
150         * Availability implies whether the hardware on this system is capable of listening for
151         * the given keyphrase or not. <p/>
152         * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or
153         * {@link #STATE_KEYPHRASE_UNSUPPORTED},
154         * detection is not possible and no further interaction should be
155         * performed with this detector. <br/>
156         * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin
157         * an enrollment flow for the keyphrase. <br/>
158         * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/>
159         *
160         * If the return code is {@link #STATE_INVALID}, this detector is stale.
161         * A new detector should be obtained for use in the future.
162         */
163        void onAvailabilityChanged(int status);
164        /**
165         * Called when the keyphrase is spoken.
166         * This implicitly stops listening for the keyphrase once it's detected.
167         * Clients should start a recognition again once they are done handling this
168         * detection.
169         *
170         * @param data Optional trigger audio data, if it was requested during
171         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
172         */
173        void onDetected(byte[] data);
174        /**
175         * Called when the detection for the associated keyphrase starts.
176         * This is called as a result of a successful call to
177         * {@link AlwaysOnHotwordDetector#startRecognition(int)}.
178         */
179        void onDetectionStarted();
180        /**
181         * Called when the detection for the associated keyphrase stops.
182         * This is called as a result of a successful call to
183         * {@link AlwaysOnHotwordDetector#stopRecognition()}.
184         */
185        void onDetectionStopped();
186        /**
187         * Called when the detection fails due to an error.
188         */
189        void onError();
190    }
191
192    /**
193     * @param text The keyphrase text to get the detector for.
194     * @param locale The java locale for the detector.
195     * @param callback A non-null Callback for receiving the recognition events.
196     * @param voiceInteractionService The current voice interaction service.
197     * @param modelManagementService A service that allows management of sound models.
198     *
199     * @hide
200     */
201    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
202            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
203            IVoiceInteractionService voiceInteractionService,
204            IVoiceInteractionManagerService modelManagementService) {
205        mText = text;
206        mLocale = locale;
207        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
208        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
209        mExternalCallback = callback;
210        mHandler = new MyHandler();
211        mInternalCallback = new SoundTriggerListener(mHandler);
212        mVoiceInteractionService = voiceInteractionService;
213        mModelManagementService = modelManagementService;
214        new RefreshAvailabiltyTask().execute();
215    }
216
217    /**
218     * Gets the recognition modes supported by the associated keyphrase.
219     *
220     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
221     *         Callers should only call this method after a supported state callback on
222     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
223     */
224    public int getSupportedRecognitionModes() {
225        synchronized (mLock) {
226            return getSupportedRecognitionModesLocked();
227        }
228    }
229
230    private int getSupportedRecognitionModesLocked() {
231        // This method only makes sense if we can actually support a recognition.
232        if (mAvailability != STATE_KEYPHRASE_ENROLLED
233                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
234            throw new UnsupportedOperationException(
235                    "Getting supported recognition modes for the keyphrase is not supported");
236        }
237
238        return mKeyphraseMetadata.recognitionModeFlags;
239    }
240
241    /**
242     * Starts recognition for the associated keyphrase.
243     *
244     * @param recognitionFlags The flags to control the recognition properties.
245     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
246     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
247     * @throws UnsupportedOperationException if the recognition isn't supported.
248     *         Callers should only call this method after a supported state callback on
249     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
250     */
251    public void startRecognition(int recognitionFlags) {
252        synchronized (mLock) {
253            // Check if we can start/stop a recognition.
254            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
255                throw new UnsupportedOperationException(
256                        "Recognition for the given keyphrase is not supported");
257            }
258
259            mInternalState |= FLAG_REQUESTED;
260            mRecognitionFlags = recognitionFlags;
261            updateRecognitionLocked();
262        }
263    }
264
265    /**
266     * Stops recognition for the associated keyphrase.
267     *
268     * @throws UnsupportedOperationException if the recognition isn't supported.
269     *         Callers should only call this method after a supported state callback on
270     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
271     */
272    public void stopRecognition() {
273        synchronized (mLock) {
274            // Check if we can start/stop a recognition.
275            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
276                throw new UnsupportedOperationException(
277                        "Recognition for the given keyphrase is not supported");
278            }
279
280            mInternalState &= ~FLAG_REQUESTED;
281            mRecognitionFlags = RECOGNITION_FLAG_NONE;
282            updateRecognitionLocked();
283        }
284    }
285
286    /**
287     * Gets an intent to manage the associated keyphrase.
288     *
289     * @param action The manage action that needs to be performed.
290     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
291     *        {@link #MANAGE_ACTION_UN_ENROLL}.
292     * @return An {@link Intent} to manage the given keyphrase.
293     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
294     *         Callers should only call this method after a supported state callback on
295     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
296     */
297    public Intent getManageIntent(int action) {
298        // This method only makes sense if we can actually support a recognition.
299        if (mAvailability != STATE_KEYPHRASE_ENROLLED
300                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
301            throw new UnsupportedOperationException(
302                    "Managing the given keyphrase is not supported");
303        }
304        if (action != MANAGE_ACTION_ENROLL
305                && action != MANAGE_ACTION_RE_ENROLL
306                && action != MANAGE_ACTION_UN_ENROLL) {
307            throw new IllegalArgumentException("Invalid action specified " + action);
308        }
309
310        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
311    }
312
313    /**
314     * Invalidates this hotword detector so that any future calls to this result
315     * in an IllegalStateException.
316     *
317     * @hide
318     */
319    void invalidate() {
320        synchronized (mLock) {
321            mAvailability = STATE_INVALID;
322            notifyStateChangedLocked();
323        }
324    }
325
326    /**
327     * Reloads the sound models from the service.
328     *
329     * @hide
330     */
331    void onSoundModelsChanged() {
332        synchronized (mLock) {
333            // TODO: This should stop the recognition if it was using an enrolled sound model
334            // that's no longer available.
335            if (mAvailability == STATE_INVALID
336                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
337                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
338                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
339                return;
340            }
341
342            // Execute a refresh availability task - which should then notify of a change.
343            new RefreshAvailabiltyTask().execute();
344        }
345    }
346
347    @SuppressWarnings("unused")
348    private void onCallStateChanged(boolean active) {
349        synchronized (mLock) {
350            if (active) {
351                mInternalState |= FLAG_CALL_ACTIVE;
352            } else {
353                mInternalState &= ~FLAG_CALL_ACTIVE;
354            }
355
356            updateRecognitionLocked();
357        }
358    }
359
360    @SuppressWarnings("unused")
361    private void onMicrophoneStateChanged(boolean open) {
362        synchronized (mLock) {
363            if (open) {
364                mInternalState |= FLAG_MICROPHONE_OPEN;
365            } else {
366                mInternalState &= ~FLAG_MICROPHONE_OPEN;
367            }
368
369            updateRecognitionLocked();
370        }
371    }
372
373    private void updateRecognitionLocked() {
374        // Don't attempt to update the recognition state if keyphrase isn't enrolled.
375        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
376            return;
377        }
378
379        // Start recognition if requested and not in a call/reading from the microphone
380        boolean start = (mInternalState&FLAG_REQUESTED) != 0
381                && (mInternalState&FLAG_CALL_ACTIVE) == 0
382                && (mInternalState&FLAG_MICROPHONE_OPEN) == 0;
383        boolean requested = (mInternalState&FLAG_REQUESTED) != 0;
384
385        if (start && (mInternalState&FLAG_STARTED) == 0) {
386            // Start recognition.
387            if (DBG) Slog.d(TAG, "starting recognition...");
388            int status = startRecognitionLocked();
389            if (status == STATUS_OK) {
390                mInternalState |= FLAG_STARTED;
391                mHandler.sendEmptyMessage(MSG_DETECTION_STARTED);
392            } else {
393                if (DBG) Slog.d(TAG, "failed to start recognition: " + status);
394                mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
395            }
396            // Post the callback
397            return;
398        }
399
400        if (!start && (mInternalState&FLAG_STARTED) != 0) {
401            // Stop recognition
402            // Only notify the callback if a recognition was *not* requested.
403            // For internal stoppages, don't notify the callback.
404            if (DBG) Slog.d(TAG, "stopping recognition...");
405            int status = stopRecognitionLocked();
406            if (status == STATUS_OK) {
407                mInternalState &= ~FLAG_STARTED;
408                if (!requested) mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
409            } else {
410                if (!requested) mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
411                if (DBG) Slog.d(TAG, "failed to stop recognition: " + status);
412            }
413            return;
414        }
415    }
416
417    private int startRecognitionLocked() {
418        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
419        // TODO: Do we need to do something about the confidence level here?
420        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
421                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
422        boolean captureTriggerAudio =
423                (mRecognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
424        int code = STATUS_ERROR;
425        try {
426            code = mModelManagementService.startRecognition(mVoiceInteractionService,
427                    mKeyphraseMetadata.id, mInternalCallback,
428                    new RecognitionConfig(
429                            captureTriggerAudio, recognitionExtra, null /* additional data */));
430        } catch (RemoteException e) {
431            Slog.w(TAG, "RemoteException in startRecognition!");
432        }
433        if (code != STATUS_OK) {
434            Slog.w(TAG, "startRecognition() failed with error code " + code);
435        }
436        return code;
437    }
438
439    private int stopRecognitionLocked() {
440        int code = STATUS_ERROR;
441        try {
442            code = mModelManagementService.stopRecognition(
443                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
444        } catch (RemoteException e) {
445            Slog.w(TAG, "RemoteException in stopRecognition!");
446        }
447
448        if (code != STATUS_OK) {
449            Slog.w(TAG, "stopRecognition() failed with error code " + code);
450        }
451        return code;
452    }
453
454    private void notifyStateChangedLocked() {
455        Message message = Message.obtain(mHandler, MSG_STATE_CHANGED);
456        message.arg1 = mAvailability;
457        message.sendToTarget();
458    }
459
460    /** @hide */
461    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
462        private final Handler mHandler;
463
464        public SoundTriggerListener(Handler handler) {
465            mHandler = handler;
466        }
467
468        @Override
469        public void onDetected(KeyphraseRecognitionEvent event) {
470            Slog.i(TAG, "onDetected");
471            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
472            message.obj = event.data;
473            message.sendToTarget();
474        }
475
476        @Override
477        public void onError(int status) {
478            Slog.i(TAG, "onError: " + status);
479            mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
480        }
481    }
482
483    class MyHandler extends Handler {
484        @Override
485        public void handleMessage(Message msg) {
486            switch (msg.what) {
487                case MSG_STATE_CHANGED:
488                    mExternalCallback.onAvailabilityChanged(msg.arg1);
489                    break;
490                case MSG_HOTWORD_DETECTED:
491                    mExternalCallback.onDetected((byte[]) msg.obj);
492                    break;
493                case MSG_DETECTION_STARTED:
494                    mExternalCallback.onDetectionStarted();
495                    break;
496                case MSG_DETECTION_STOPPED:
497                    mExternalCallback.onDetectionStopped();
498                    break;
499                case MSG_DETECTION_ERROR:
500                    mExternalCallback.onError();
501                    break;
502                default:
503                    super.handleMessage(msg);
504            }
505        }
506    }
507
508    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
509
510        @Override
511        public Void doInBackground(Void... params) {
512            int availability = internalGetInitialAvailability();
513            boolean enrolled = false;
514            // Fetch the sound model if the availability is one of the supported ones.
515            if (availability == STATE_NOT_READY
516                    || availability == STATE_KEYPHRASE_UNENROLLED
517                    || availability == STATE_KEYPHRASE_ENROLLED) {
518                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
519                if (!enrolled) {
520                    availability = STATE_KEYPHRASE_UNENROLLED;
521                } else {
522                    availability = STATE_KEYPHRASE_ENROLLED;
523                }
524            }
525
526            synchronized (mLock) {
527                if (DBG) {
528                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
529                            + " -> " + availability);
530                }
531                mAvailability = availability;
532                notifyStateChangedLocked();
533            }
534            return null;
535        }
536
537        /**
538         * @return The initial availability without checking the enrollment status.
539         */
540        private int internalGetInitialAvailability() {
541            synchronized (mLock) {
542                // This detector has already been invalidated.
543                if (mAvailability == STATE_INVALID) {
544                    return STATE_INVALID;
545                }
546            }
547
548            ModuleProperties dspModuleProperties = null;
549            try {
550                dspModuleProperties =
551                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
552            } catch (RemoteException e) {
553                Slog.w(TAG, "RemoteException in getDspProperties!");
554            }
555            // No DSP available
556            if (dspModuleProperties == null) {
557                return STATE_HARDWARE_UNAVAILABLE;
558            }
559            // No enrollment application supports this keyphrase/locale
560            if (mKeyphraseMetadata == null) {
561                return STATE_KEYPHRASE_UNSUPPORTED;
562            }
563            return STATE_NOT_READY;
564        }
565
566        /**
567         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
568         */
569        private boolean internalGetIsEnrolled(int keyphraseId) {
570            try {
571                return mModelManagementService.isEnrolledForKeyphrase(
572                        mVoiceInteractionService, keyphraseId);
573            } catch (RemoteException e) {
574                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
575            }
576            return false;
577        }
578    }
579}
580