AlwaysOnHotwordDetector.java revision 2178e2e085056186141ac44563103c6f455de89c
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.annotation.IntDef;
20import android.annotation.NonNull;
21import android.annotation.Nullable;
22import android.content.Intent;
23import android.hardware.soundtrigger.IRecognitionStatusCallback;
24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
25import android.hardware.soundtrigger.KeyphraseMetadata;
26import android.hardware.soundtrigger.SoundTrigger;
27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent;
29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
33import android.media.AudioFormat;
34import android.os.AsyncTask;
35import android.os.Handler;
36import android.os.Message;
37import android.os.RemoteException;
38import android.util.Slog;
39
40import com.android.internal.app.IVoiceInteractionManagerService;
41
42import java.lang.annotation.Retention;
43import java.lang.annotation.RetentionPolicy;
44
45/**
46 * A class that lets a VoiceInteractionService implementation interact with
47 * always-on keyphrase detection APIs.
48 */
49public class AlwaysOnHotwordDetector {
50    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
51    /**
52     * Indicates that this hotword detector is no longer valid for any recognition
53     * and should not be used anymore.
54     */
55    private static final int STATE_INVALID = -3;
56
57    /**
58     * Indicates that recognition for the given keyphrase is not available on the system
59     * because of the hardware configuration.
60     * No further interaction should be performed with the detector that returns this availability.
61     */
62    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
63    /**
64     * Indicates that recognition for the given keyphrase is not supported.
65     * No further interaction should be performed with the detector that returns this availability.
66     */
67    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
68    /**
69     * Indicates that the given keyphrase is not enrolled.
70     * The caller may choose to begin an enrollment flow for the keyphrase.
71     */
72    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
73    /**
74     * Indicates that the given keyphrase is currently enrolled and it's possible to start
75     * recognition for it.
76     */
77    public static final int STATE_KEYPHRASE_ENROLLED = 2;
78
79    /**
80     * Indicates that the detector isn't ready currently.
81     */
82    private static final int STATE_NOT_READY = 0;
83
84    // Keyphrase management actions. Used in getManageIntent() ----//
85    /** @hide */
86    @Retention(RetentionPolicy.SOURCE)
87    @IntDef(value = {
88                MANAGE_ACTION_ENROLL,
89                MANAGE_ACTION_RE_ENROLL,
90                MANAGE_ACTION_UN_ENROLL
91            })
92    public @interface ManageActions {}
93
94    /** Indicates that we need to enroll. */
95    public static final int MANAGE_ACTION_ENROLL = 0;
96    /** Indicates that we need to re-enroll. */
97    public static final int MANAGE_ACTION_RE_ENROLL = 1;
98    /** Indicates that we need to un-enroll. */
99    public static final int MANAGE_ACTION_UN_ENROLL = 2;
100
101    //-- Flags for startRecognition    ----//
102    /** @hide */
103    @Retention(RetentionPolicy.SOURCE)
104    @IntDef(flag = true,
105            value = {
106                RECOGNITION_FLAG_NONE,
107                RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
108                RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
109            })
110    public @interface RecognitionFlags {}
111
112    /** Empty flag for {@link #startRecognition(int)}. */
113    public static final int RECOGNITION_FLAG_NONE = 0;
114    /**
115     * Recognition flag for {@link #startRecognition(int)} that indicates
116     * whether the trigger audio for hotword needs to be captured.
117     */
118    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
119    /**
120     * Recognition flag for {@link #startRecognition(int)} that indicates
121     * whether the recognition should keep going on even after the keyphrase triggers.
122     * If this flag is specified, it's possible to get multiple triggers after a
123     * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times.
124     * When this isn't specified, the default behavior is to stop recognition once the
125     * keyphrase is spoken, till the caller starts recognition again.
126     */
127    public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
128
129    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
130    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
131
132    /** @hide */
133    @Retention(RetentionPolicy.SOURCE)
134    @IntDef(flag = true,
135            value = {
136                RECOGNITION_MODE_VOICE_TRIGGER,
137                RECOGNITION_MODE_USER_IDENTIFICATION,
138            })
139    public @interface RecognitionModes {}
140
141    /**
142     * Simple recognition of the key phrase.
143     * Returned by {@link #getSupportedRecognitionModes()}
144     */
145    public static final int RECOGNITION_MODE_VOICE_TRIGGER
146            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
147    /**
148     * User identification performed with the keyphrase recognition.
149     * Returned by {@link #getSupportedRecognitionModes()}
150     */
151    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
152            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
153
154    static final String TAG = "AlwaysOnHotwordDetector";
155    // TODO: Set to false.
156    static final boolean DBG = true;
157
158    private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
159    private static final int STATUS_OK = SoundTrigger.STATUS_OK;
160
161    private static final int MSG_AVAILABILITY_CHANGED = 1;
162    private static final int MSG_HOTWORD_DETECTED = 2;
163    private static final int MSG_DETECTION_ERROR = 3;
164
165    private final String mText;
166    private final String mLocale;
167    /**
168     * The metadata of the Keyphrase, derived from the enrollment application.
169     * This may be null if this keyphrase isn't supported by the enrollment application.
170     */
171    private final KeyphraseMetadata mKeyphraseMetadata;
172    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
173    private final IVoiceInteractionService mVoiceInteractionService;
174    private final IVoiceInteractionManagerService mModelManagementService;
175    private final SoundTriggerListener mInternalCallback;
176    private final Callback mExternalCallback;
177    private final Object mLock = new Object();
178    private final Handler mHandler;
179
180    private int mAvailability = STATE_NOT_READY;
181
182    /**
183     * Additional payload for {@link Callback#onDetected}.
184     */
185    public static class EventPayload {
186        /**
187         * Indicates if {@code data} is the audio that triggered the keyphrase.
188         */
189        public final boolean isTriggerAudio;
190        /**
191         * Format of {@code data}. May be null if {@code isTriggerAudio} is false.
192         */
193        @Nullable
194        public final AudioFormat audioFormat;
195        /**
196         * Raw data associated with the event.
197         * This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
198         */
199        @Nullable
200        public final byte[] data;
201
202        private EventPayload(boolean _isTriggerAudio, AudioFormat _audioFormat, byte[] _data) {
203            isTriggerAudio = _isTriggerAudio;
204            audioFormat = _audioFormat;
205            data = _data;
206        }
207    }
208
209    /**
210     * Callbacks for always-on hotword detection.
211     */
212    public interface Callback {
213        /**
214         * Called when the hotword availability changes.
215         * This indicates a change in the availability of recognition for the given keyphrase.
216         * It's called at least once with the initial availability.<p/>
217         *
218         * Availability implies whether the hardware on this system is capable of listening for
219         * the given keyphrase or not. <p/>
220         *
221         * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE
222         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED
223         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED
224         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED
225         */
226        void onAvailabilityChanged(int status);
227        /**
228         * Called when the keyphrase is spoken.
229         * This implicitly stops listening for the keyphrase once it's detected.
230         * Clients should start a recognition again once they are done handling this
231         * detection.
232         *
233         * @param eventPayload Payload data for the detection event.
234         *        This may contain the trigger audio, if requested when calling
235         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
236         */
237        void onDetected(@NonNull EventPayload eventPayload);
238        /**
239         * Called when the detection fails due to an error.
240         */
241        void onError();
242    }
243
244    /**
245     * @param text The keyphrase text to get the detector for.
246     * @param locale The java locale for the detector.
247     * @param callback A non-null Callback for receiving the recognition events.
248     * @param voiceInteractionService The current voice interaction service.
249     * @param modelManagementService A service that allows management of sound models.
250     *
251     * @hide
252     */
253    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
254            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
255            IVoiceInteractionService voiceInteractionService,
256            IVoiceInteractionManagerService modelManagementService) {
257        mText = text;
258        mLocale = locale;
259        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
260        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
261        mExternalCallback = callback;
262        mHandler = new MyHandler();
263        mInternalCallback = new SoundTriggerListener(mHandler);
264        mVoiceInteractionService = voiceInteractionService;
265        mModelManagementService = modelManagementService;
266        new RefreshAvailabiltyTask().execute();
267    }
268
269    /**
270     * Gets the recognition modes supported by the associated keyphrase.
271     *
272     * @see #RECOGNITION_MODE_USER_IDENTIFICATION
273     * @see #RECOGNITION_MODE_VOICE_TRIGGER
274     *
275     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
276     *         Callers should only call this method after a supported state callback on
277     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
278     * @throws IllegalStateException if the detector is in an invalid state.
279     *         This may happen if another detector has been instantiated or the
280     *         {@link VoiceInteractionService} hosting this detector has been shut down.
281     */
282    public @RecognitionModes int getSupportedRecognitionModes() {
283        if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()");
284        synchronized (mLock) {
285            return getSupportedRecognitionModesLocked();
286        }
287    }
288
289    private int getSupportedRecognitionModesLocked() {
290        if (mAvailability == STATE_INVALID) {
291            throw new IllegalStateException(
292                    "getSupportedRecognitionModes called on an invalid detector");
293        }
294
295        // This method only makes sense if we can actually support a recognition.
296        if (mAvailability != STATE_KEYPHRASE_ENROLLED
297                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
298            throw new UnsupportedOperationException(
299                    "Getting supported recognition modes for the keyphrase is not supported");
300        }
301
302        return mKeyphraseMetadata.recognitionModeFlags;
303    }
304
305    /**
306     * Starts recognition for the associated keyphrase.
307     *
308     * @param recognitionFlags The flags to control the recognition properties.
309     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE},
310     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and
311     *        {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}.
312     * @return Indicates whether the call succeeded or not.
313     * @throws UnsupportedOperationException if the recognition isn't supported.
314     *         Callers should only call this method after a supported state callback on
315     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
316     * @throws IllegalStateException if the detector is in an invalid state.
317     *         This may happen if another detector has been instantiated or the
318     *         {@link VoiceInteractionService} hosting this detector has been shut down.
319     */
320    public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
321        if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")");
322        synchronized (mLock) {
323            if (mAvailability == STATE_INVALID) {
324                throw new IllegalStateException("startRecognition called on an invalid detector");
325            }
326
327            // Check if we can start/stop a recognition.
328            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
329                throw new UnsupportedOperationException(
330                        "Recognition for the given keyphrase is not supported");
331            }
332
333            return startRecognitionLocked(recognitionFlags) == STATUS_OK;
334        }
335    }
336
337    /**
338     * Stops recognition for the associated keyphrase.
339     *
340     * @return Indicates whether the call succeeded or not.
341     * @throws UnsupportedOperationException if the recognition isn't supported.
342     *         Callers should only call this method after a supported state callback on
343     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
344     * @throws IllegalStateException if the detector is in an invalid state.
345     *         This may happen if another detector has been instantiated or the
346     *         {@link VoiceInteractionService} hosting this detector has been shut down.
347     */
348    public boolean stopRecognition() {
349        if (DBG) Slog.d(TAG, "stopRecognition()");
350        synchronized (mLock) {
351            if (mAvailability == STATE_INVALID) {
352                throw new IllegalStateException("stopRecognition called on an invalid detector");
353            }
354
355            // Check if we can start/stop a recognition.
356            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
357                throw new UnsupportedOperationException(
358                        "Recognition for the given keyphrase is not supported");
359            }
360
361            return stopRecognitionLocked() == STATUS_OK;
362        }
363    }
364
365    /**
366     * Gets an intent to manage the associated keyphrase.
367     *
368     * @param action The manage action that needs to be performed.
369     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
370     *        {@link #MANAGE_ACTION_UN_ENROLL}.
371     * @return An {@link Intent} to manage the given keyphrase.
372     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
373     *         Callers should only call this method after a supported state callback on
374     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
375     * @throws IllegalStateException if the detector is in an invalid state.
376     *         This may happen if another detector has been instantiated or the
377     *         {@link VoiceInteractionService} hosting this detector has been shut down.
378     */
379    public Intent getManageIntent(@ManageActions int action) {
380        if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")");
381        synchronized (mLock) {
382            return getManageIntentLocked(action);
383        }
384    }
385
386    private Intent getManageIntentLocked(int action) {
387        if (mAvailability == STATE_INVALID) {
388            throw new IllegalStateException("getManageIntent called on an invalid detector");
389        }
390
391        // This method only makes sense if we can actually support a recognition.
392        if (mAvailability != STATE_KEYPHRASE_ENROLLED
393                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
394            throw new UnsupportedOperationException(
395                    "Managing the given keyphrase is not supported");
396        }
397
398        if (action != MANAGE_ACTION_ENROLL
399                && action != MANAGE_ACTION_RE_ENROLL
400                && action != MANAGE_ACTION_UN_ENROLL) {
401            throw new IllegalArgumentException("Invalid action specified " + action);
402        }
403
404        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
405    }
406
407    /**
408     * Invalidates this hotword detector so that any future calls to this result
409     * in an IllegalStateException.
410     *
411     * @hide
412     */
413    void invalidate() {
414        synchronized (mLock) {
415            mAvailability = STATE_INVALID;
416            notifyStateChangedLocked();
417        }
418    }
419
420    /**
421     * Reloads the sound models from the service.
422     *
423     * @hide
424     */
425    void onSoundModelsChanged() {
426        synchronized (mLock) {
427            // FIXME: This should stop the recognition if it was using an enrolled sound model
428            // that's no longer available.
429            if (mAvailability == STATE_INVALID
430                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
431                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
432                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
433                return;
434            }
435
436            // Execute a refresh availability task - which should then notify of a change.
437            new RefreshAvailabiltyTask().execute();
438        }
439    }
440
441    private int startRecognitionLocked(int recognitionFlags) {
442        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
443        // TODO: Do we need to do something about the confidence level here?
444        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
445                mKeyphraseMetadata.recognitionModeFlags, 0, new ConfidenceLevel[0]);
446        boolean captureTriggerAudio =
447                (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
448        boolean allowMultipleTriggers =
449                (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
450        int code = STATUS_ERROR;
451        try {
452            code = mModelManagementService.startRecognition(mVoiceInteractionService,
453                    mKeyphraseMetadata.id, mInternalCallback,
454                    new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers,
455                            recognitionExtra, null /* additional data */));
456        } catch (RemoteException e) {
457            Slog.w(TAG, "RemoteException in startRecognition!");
458        }
459        if (code != STATUS_OK) {
460            Slog.w(TAG, "startRecognition() failed with error code " + code);
461        }
462        return code;
463    }
464
465    private int stopRecognitionLocked() {
466        int code = STATUS_ERROR;
467        try {
468            code = mModelManagementService.stopRecognition(
469                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
470        } catch (RemoteException e) {
471            Slog.w(TAG, "RemoteException in stopRecognition!");
472        }
473
474        if (code != STATUS_OK) {
475            Slog.w(TAG, "stopRecognition() failed with error code " + code);
476        }
477        return code;
478    }
479
480    private void notifyStateChangedLocked() {
481        Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED);
482        message.arg1 = mAvailability;
483        message.sendToTarget();
484    }
485
486    /** @hide */
487    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
488        private final Handler mHandler;
489
490        public SoundTriggerListener(Handler handler) {
491            mHandler = handler;
492        }
493
494        @Override
495        public void onDetected(KeyphraseRecognitionEvent event) {
496            if (DBG) {
497                Slog.d(TAG, "onDetected(" + event + ")");
498            } else {
499                Slog.i(TAG, "onDetected");
500            }
501            Message.obtain(mHandler, MSG_HOTWORD_DETECTED,
502                    new EventPayload(event.triggerInData, event.captureFormat, event.data))
503                    .sendToTarget();
504        }
505
506        @Override
507        public void onError(int status) {
508            Slog.i(TAG, "onError: " + status);
509            mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
510        }
511    }
512
513    class MyHandler extends Handler {
514        @Override
515        public void handleMessage(Message msg) {
516            synchronized (mLock) {
517                if (mAvailability == STATE_INVALID) {
518                    Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector");
519                    return;
520                }
521            }
522
523            switch (msg.what) {
524                case MSG_AVAILABILITY_CHANGED:
525                    mExternalCallback.onAvailabilityChanged(msg.arg1);
526                    break;
527                case MSG_HOTWORD_DETECTED:
528                    mExternalCallback.onDetected((EventPayload) msg.obj);
529                    break;
530                case MSG_DETECTION_ERROR:
531                    mExternalCallback.onError();
532                    break;
533                default:
534                    super.handleMessage(msg);
535            }
536        }
537    }
538
539    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
540
541        @Override
542        public Void doInBackground(Void... params) {
543            int availability = internalGetInitialAvailability();
544            boolean enrolled = false;
545            // Fetch the sound model if the availability is one of the supported ones.
546            if (availability == STATE_NOT_READY
547                    || availability == STATE_KEYPHRASE_UNENROLLED
548                    || availability == STATE_KEYPHRASE_ENROLLED) {
549                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
550                if (!enrolled) {
551                    availability = STATE_KEYPHRASE_UNENROLLED;
552                } else {
553                    availability = STATE_KEYPHRASE_ENROLLED;
554                }
555            }
556
557            synchronized (mLock) {
558                if (DBG) {
559                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
560                            + " -> " + availability);
561                }
562                mAvailability = availability;
563                notifyStateChangedLocked();
564            }
565            return null;
566        }
567
568        /**
569         * @return The initial availability without checking the enrollment status.
570         */
571        private int internalGetInitialAvailability() {
572            synchronized (mLock) {
573                // This detector has already been invalidated.
574                if (mAvailability == STATE_INVALID) {
575                    return STATE_INVALID;
576                }
577            }
578
579            ModuleProperties dspModuleProperties = null;
580            try {
581                dspModuleProperties =
582                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
583            } catch (RemoteException e) {
584                Slog.w(TAG, "RemoteException in getDspProperties!");
585            }
586            // No DSP available
587            if (dspModuleProperties == null) {
588                return STATE_HARDWARE_UNAVAILABLE;
589            }
590            // No enrollment application supports this keyphrase/locale
591            if (mKeyphraseMetadata == null) {
592                return STATE_KEYPHRASE_UNSUPPORTED;
593            }
594            return STATE_NOT_READY;
595        }
596
597        /**
598         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
599         */
600        private boolean internalGetIsEnrolled(int keyphraseId) {
601            try {
602                return mModelManagementService.isEnrolledForKeyphrase(
603                        mVoiceInteractionService, keyphraseId);
604            } catch (RemoteException e) {
605                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
606            }
607            return false;
608        }
609    }
610}
611