AlwaysOnHotwordDetector.java revision 0db30899f0d44e4fbaddffb79cc3415db6efb657
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.annotation.IntDef;
20import android.annotation.NonNull;
21import android.annotation.Nullable;
22import android.content.Intent;
23import android.hardware.soundtrigger.IRecognitionStatusCallback;
24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
25import android.hardware.soundtrigger.KeyphraseMetadata;
26import android.hardware.soundtrigger.SoundTrigger;
27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent;
29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
33import android.media.AudioFormat;
34import android.os.AsyncTask;
35import android.os.Handler;
36import android.os.Message;
37import android.os.RemoteException;
38import android.util.Slog;
39
40import com.android.internal.app.IVoiceInteractionManagerService;
41
42import java.lang.annotation.Retention;
43import java.lang.annotation.RetentionPolicy;
44
45/**
46 * A class that lets a VoiceInteractionService implementation interact with
47 * always-on keyphrase detection APIs.
48 */
49public class AlwaysOnHotwordDetector {
50    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
51    /**
52     * Indicates that this hotword detector is no longer valid for any recognition
53     * and should not be used anymore.
54     */
55    private static final int STATE_INVALID = -3;
56
57    /**
58     * Indicates that recognition for the given keyphrase is not available on the system
59     * because of the hardware configuration.
60     * No further interaction should be performed with the detector that returns this availability.
61     */
62    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
63    /**
64     * Indicates that recognition for the given keyphrase is not supported.
65     * No further interaction should be performed with the detector that returns this availability.
66     */
67    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
68    /**
69     * Indicates that the given keyphrase is not enrolled.
70     * The caller may choose to begin an enrollment flow for the keyphrase.
71     */
72    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
73    /**
74     * Indicates that the given keyphrase is currently enrolled and it's possible to start
75     * recognition for it.
76     */
77    public static final int STATE_KEYPHRASE_ENROLLED = 2;
78
79    /**
80     * Indicates that the detector isn't ready currently.
81     */
82    private static final int STATE_NOT_READY = 0;
83
84    // Keyphrase management actions. Used in getManageIntent() ----//
85    /** @hide */
86    @Retention(RetentionPolicy.SOURCE)
87    @IntDef(value = {
88                MANAGE_ACTION_ENROLL,
89                MANAGE_ACTION_RE_ENROLL,
90                MANAGE_ACTION_UN_ENROLL
91            })
92    public @interface ManageActions {}
93
94    /** Indicates that we need to enroll. */
95    public static final int MANAGE_ACTION_ENROLL = 0;
96    /** Indicates that we need to re-enroll. */
97    public static final int MANAGE_ACTION_RE_ENROLL = 1;
98    /** Indicates that we need to un-enroll. */
99    public static final int MANAGE_ACTION_UN_ENROLL = 2;
100
101    //-- Flags for startRecognition    ----//
102    /** @hide */
103    @Retention(RetentionPolicy.SOURCE)
104    @IntDef(flag = true,
105            value = {
106                RECOGNITION_FLAG_NONE,
107                RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
108                RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
109            })
110    public @interface RecognitionFlags {}
111
112    /** Empty flag for {@link #startRecognition(int)}. */
113    public static final int RECOGNITION_FLAG_NONE = 0;
114    /**
115     * Recognition flag for {@link #startRecognition(int)} that indicates
116     * whether the trigger audio for hotword needs to be captured.
117     */
118    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
119    /**
120     * Recognition flag for {@link #startRecognition(int)} that indicates
121     * whether the recognition should keep going on even after the keyphrase triggers.
122     * If this flag is specified, it's possible to get multiple triggers after a
123     * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times.
124     * When this isn't specified, the default behavior is to stop recognition once the
125     * keyphrase is spoken, till the caller starts recognition again.
126     */
127    public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
128
129    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
130    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
131
132    /** @hide */
133    @Retention(RetentionPolicy.SOURCE)
134    @IntDef(flag = true,
135            value = {
136                RECOGNITION_MODE_VOICE_TRIGGER,
137                RECOGNITION_MODE_USER_IDENTIFICATION,
138            })
139    public @interface RecognitionModes {}
140
141    /**
142     * Simple recognition of the key phrase.
143     * Returned by {@link #getSupportedRecognitionModes()}
144     */
145    public static final int RECOGNITION_MODE_VOICE_TRIGGER
146            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
147    /**
148     * User identification performed with the keyphrase recognition.
149     * Returned by {@link #getSupportedRecognitionModes()}
150     */
151    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
152            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
153
154    static final String TAG = "AlwaysOnHotwordDetector";
155    // TODO: Set to false.
156    static final boolean DBG = true;
157
158    private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
159    private static final int STATUS_OK = SoundTrigger.STATUS_OK;
160
161    private static final int MSG_AVAILABILITY_CHANGED = 1;
162    private static final int MSG_HOTWORD_DETECTED = 2;
163    private static final int MSG_DETECTION_ERROR = 3;
164
165    private final String mText;
166    private final String mLocale;
167    /**
168     * The metadata of the Keyphrase, derived from the enrollment application.
169     * This may be null if this keyphrase isn't supported by the enrollment application.
170     */
171    private final KeyphraseMetadata mKeyphraseMetadata;
172    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
173    private final IVoiceInteractionService mVoiceInteractionService;
174    private final IVoiceInteractionManagerService mModelManagementService;
175    private final SoundTriggerListener mInternalCallback;
176    private final Callback mExternalCallback;
177    private final Object mLock = new Object();
178    private final Handler mHandler;
179
180    private int mAvailability = STATE_NOT_READY;
181
182    /**
183     * Details of the audio that triggered the keyphrase.
184     */
185    public static class TriggerAudio {
186        /**
187         * Format of {@code data}.
188         */
189        @NonNull
190        public final AudioFormat audioFormat;
191        /**
192         * Raw audio data that triggered they keyphrase.
193         */
194        @NonNull
195        public final byte[] data;
196
197        private TriggerAudio(AudioFormat _audioFormat, byte[] _data) {
198            audioFormat = _audioFormat;
199            data = _data;
200        }
201    }
202
203    /**
204     * Callbacks for always-on hotword detection.
205     */
206    public interface Callback {
207        /**
208         * Called when the hotword availability changes.
209         * This indicates a change in the availability of recognition for the given keyphrase.
210         * It's called at least once with the initial availability.<p/>
211         *
212         * Availability implies whether the hardware on this system is capable of listening for
213         * the given keyphrase or not. <p/>
214         *
215         * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE
216         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED
217         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED
218         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED
219         */
220        void onAvailabilityChanged(int status);
221        /**
222         * Called when the keyphrase is spoken.
223         * This implicitly stops listening for the keyphrase once it's detected.
224         * Clients should start a recognition again once they are done handling this
225         * detection.
226         *
227         * @param triggerAudio Optional trigger audio data, if it was requested during
228         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
229         */
230        void onDetected(@Nullable TriggerAudio triggerAudio);
231        /**
232         * Called when the detection fails due to an error.
233         */
234        void onError();
235    }
236
237    /**
238     * @param text The keyphrase text to get the detector for.
239     * @param locale The java locale for the detector.
240     * @param callback A non-null Callback for receiving the recognition events.
241     * @param voiceInteractionService The current voice interaction service.
242     * @param modelManagementService A service that allows management of sound models.
243     *
244     * @hide
245     */
246    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
247            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
248            IVoiceInteractionService voiceInteractionService,
249            IVoiceInteractionManagerService modelManagementService) {
250        mText = text;
251        mLocale = locale;
252        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
253        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
254        mExternalCallback = callback;
255        mHandler = new MyHandler();
256        mInternalCallback = new SoundTriggerListener(mHandler);
257        mVoiceInteractionService = voiceInteractionService;
258        mModelManagementService = modelManagementService;
259        new RefreshAvailabiltyTask().execute();
260    }
261
262    /**
263     * Gets the recognition modes supported by the associated keyphrase.
264     *
265     * @see #RECOGNITION_MODE_USER_IDENTIFICATION
266     * @see #RECOGNITION_MODE_VOICE_TRIGGER
267     *
268     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
269     *         Callers should only call this method after a supported state callback on
270     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
271     * @throws IllegalStateException if the detector is in an invalid state.
272     *         This may happen if another detector has been instantiated or the
273     *         {@link VoiceInteractionService} hosting this detector has been shut down.
274     */
275    public @RecognitionModes int getSupportedRecognitionModes() {
276        if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()");
277        synchronized (mLock) {
278            return getSupportedRecognitionModesLocked();
279        }
280    }
281
282    private int getSupportedRecognitionModesLocked() {
283        if (mAvailability == STATE_INVALID) {
284            throw new IllegalStateException(
285                    "getSupportedRecognitionModes called on an invalid detector");
286        }
287
288        // This method only makes sense if we can actually support a recognition.
289        if (mAvailability != STATE_KEYPHRASE_ENROLLED
290                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
291            throw new UnsupportedOperationException(
292                    "Getting supported recognition modes for the keyphrase is not supported");
293        }
294
295        return mKeyphraseMetadata.recognitionModeFlags;
296    }
297
298    /**
299     * Starts recognition for the associated keyphrase.
300     *
301     * @param recognitionFlags The flags to control the recognition properties.
302     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE},
303     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and
304     *        {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}.
305     * @return Indicates whether the call succeeded or not.
306     * @throws UnsupportedOperationException if the recognition isn't supported.
307     *         Callers should only call this method after a supported state callback on
308     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
309     * @throws IllegalStateException if the detector is in an invalid state.
310     *         This may happen if another detector has been instantiated or the
311     *         {@link VoiceInteractionService} hosting this detector has been shut down.
312     */
313    public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
314        if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")");
315        synchronized (mLock) {
316            if (mAvailability == STATE_INVALID) {
317                throw new IllegalStateException("startRecognition called on an invalid detector");
318            }
319
320            // Check if we can start/stop a recognition.
321            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
322                throw new UnsupportedOperationException(
323                        "Recognition for the given keyphrase is not supported");
324            }
325
326            return startRecognitionLocked(recognitionFlags) == STATUS_OK;
327        }
328    }
329
330    /**
331     * Stops recognition for the associated keyphrase.
332     *
333     * @return Indicates whether the call succeeded or not.
334     * @throws UnsupportedOperationException if the recognition isn't supported.
335     *         Callers should only call this method after a supported state callback on
336     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
337     * @throws IllegalStateException if the detector is in an invalid state.
338     *         This may happen if another detector has been instantiated or the
339     *         {@link VoiceInteractionService} hosting this detector has been shut down.
340     */
341    public boolean stopRecognition() {
342        if (DBG) Slog.d(TAG, "stopRecognition()");
343        synchronized (mLock) {
344            if (mAvailability == STATE_INVALID) {
345                throw new IllegalStateException("stopRecognition called on an invalid detector");
346            }
347
348            // Check if we can start/stop a recognition.
349            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
350                throw new UnsupportedOperationException(
351                        "Recognition for the given keyphrase is not supported");
352            }
353
354            return stopRecognitionLocked() == STATUS_OK;
355        }
356    }
357
358    /**
359     * Gets an intent to manage the associated keyphrase.
360     *
361     * @param action The manage action that needs to be performed.
362     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
363     *        {@link #MANAGE_ACTION_UN_ENROLL}.
364     * @return An {@link Intent} to manage the given keyphrase.
365     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
366     *         Callers should only call this method after a supported state callback on
367     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
368     * @throws IllegalStateException if the detector is in an invalid state.
369     *         This may happen if another detector has been instantiated or the
370     *         {@link VoiceInteractionService} hosting this detector has been shut down.
371     */
372    public Intent getManageIntent(@ManageActions int action) {
373        if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")");
374        synchronized (mLock) {
375            return getManageIntentLocked(action);
376        }
377    }
378
379    private Intent getManageIntentLocked(int action) {
380        if (mAvailability == STATE_INVALID) {
381            throw new IllegalStateException("getManageIntent called on an invalid detector");
382        }
383
384        // This method only makes sense if we can actually support a recognition.
385        if (mAvailability != STATE_KEYPHRASE_ENROLLED
386                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
387            throw new UnsupportedOperationException(
388                    "Managing the given keyphrase is not supported");
389        }
390
391        if (action != MANAGE_ACTION_ENROLL
392                && action != MANAGE_ACTION_RE_ENROLL
393                && action != MANAGE_ACTION_UN_ENROLL) {
394            throw new IllegalArgumentException("Invalid action specified " + action);
395        }
396
397        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
398    }
399
400    /**
401     * Invalidates this hotword detector so that any future calls to this result
402     * in an IllegalStateException.
403     *
404     * @hide
405     */
406    void invalidate() {
407        synchronized (mLock) {
408            mAvailability = STATE_INVALID;
409            notifyStateChangedLocked();
410        }
411    }
412
413    /**
414     * Reloads the sound models from the service.
415     *
416     * @hide
417     */
418    void onSoundModelsChanged() {
419        synchronized (mLock) {
420            // FIXME: This should stop the recognition if it was using an enrolled sound model
421            // that's no longer available.
422            if (mAvailability == STATE_INVALID
423                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
424                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
425                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
426                return;
427            }
428
429            // Execute a refresh availability task - which should then notify of a change.
430            new RefreshAvailabiltyTask().execute();
431        }
432    }
433
434    private int startRecognitionLocked(int recognitionFlags) {
435        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
436        // TODO: Do we need to do something about the confidence level here?
437        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
438                mKeyphraseMetadata.recognitionModeFlags, 0, new ConfidenceLevel[0]);
439        boolean captureTriggerAudio =
440                (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
441        boolean allowMultipleTriggers =
442                (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
443        int code = STATUS_ERROR;
444        try {
445            code = mModelManagementService.startRecognition(mVoiceInteractionService,
446                    mKeyphraseMetadata.id, mInternalCallback,
447                    new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers,
448                            recognitionExtra, null /* additional data */));
449        } catch (RemoteException e) {
450            Slog.w(TAG, "RemoteException in startRecognition!");
451        }
452        if (code != STATUS_OK) {
453            Slog.w(TAG, "startRecognition() failed with error code " + code);
454        }
455        return code;
456    }
457
458    private int stopRecognitionLocked() {
459        int code = STATUS_ERROR;
460        try {
461            code = mModelManagementService.stopRecognition(
462                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
463        } catch (RemoteException e) {
464            Slog.w(TAG, "RemoteException in stopRecognition!");
465        }
466
467        if (code != STATUS_OK) {
468            Slog.w(TAG, "stopRecognition() failed with error code " + code);
469        }
470        return code;
471    }
472
473    private void notifyStateChangedLocked() {
474        Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED);
475        message.arg1 = mAvailability;
476        message.sendToTarget();
477    }
478
479    /** @hide */
480    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
481        private final Handler mHandler;
482
483        public SoundTriggerListener(Handler handler) {
484            mHandler = handler;
485        }
486
487        @Override
488        public void onDetected(KeyphraseRecognitionEvent event) {
489            if (DBG) {
490                Slog.d(TAG, "OnDetected(" + event + ")");
491            } else {
492                Slog.i(TAG, "onDetected");
493            }
494            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
495            // FIXME: Check whether the event contains trigger data or not.
496            // FIXME: Read the audio format from the event.
497            if (event.data != null) {
498                AudioFormat audioFormat = new AudioFormat.Builder()
499                        .setChannelMask(AudioFormat.CHANNEL_IN_MONO)
500                        .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
501                        .setSampleRate(16000)
502                        .build();
503                message.obj = new TriggerAudio(audioFormat, event.data);
504            }
505            message.sendToTarget();
506        }
507
508        @Override
509        public void onError(int status) {
510            Slog.i(TAG, "onError: " + status);
511            mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
512        }
513    }
514
515    class MyHandler extends Handler {
516        @Override
517        public void handleMessage(Message msg) {
518            synchronized (mLock) {
519                if (mAvailability == STATE_INVALID) {
520                    Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector");
521                    return;
522                }
523            }
524
525            switch (msg.what) {
526                case MSG_AVAILABILITY_CHANGED:
527                    mExternalCallback.onAvailabilityChanged(msg.arg1);
528                    break;
529                case MSG_HOTWORD_DETECTED:
530                    mExternalCallback.onDetected((TriggerAudio) msg.obj);
531                    break;
532                case MSG_DETECTION_ERROR:
533                    mExternalCallback.onError();
534                    break;
535                default:
536                    super.handleMessage(msg);
537            }
538        }
539    }
540
541    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
542
543        @Override
544        public Void doInBackground(Void... params) {
545            int availability = internalGetInitialAvailability();
546            boolean enrolled = false;
547            // Fetch the sound model if the availability is one of the supported ones.
548            if (availability == STATE_NOT_READY
549                    || availability == STATE_KEYPHRASE_UNENROLLED
550                    || availability == STATE_KEYPHRASE_ENROLLED) {
551                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
552                if (!enrolled) {
553                    availability = STATE_KEYPHRASE_UNENROLLED;
554                } else {
555                    availability = STATE_KEYPHRASE_ENROLLED;
556                }
557            }
558
559            synchronized (mLock) {
560                if (DBG) {
561                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
562                            + " -> " + availability);
563                }
564                mAvailability = availability;
565                notifyStateChangedLocked();
566            }
567            return null;
568        }
569
570        /**
571         * @return The initial availability without checking the enrollment status.
572         */
573        private int internalGetInitialAvailability() {
574            synchronized (mLock) {
575                // This detector has already been invalidated.
576                if (mAvailability == STATE_INVALID) {
577                    return STATE_INVALID;
578                }
579            }
580
581            ModuleProperties dspModuleProperties = null;
582            try {
583                dspModuleProperties =
584                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
585            } catch (RemoteException e) {
586                Slog.w(TAG, "RemoteException in getDspProperties!");
587            }
588            // No DSP available
589            if (dspModuleProperties == null) {
590                return STATE_HARDWARE_UNAVAILABLE;
591            }
592            // No enrollment application supports this keyphrase/locale
593            if (mKeyphraseMetadata == null) {
594                return STATE_KEYPHRASE_UNSUPPORTED;
595            }
596            return STATE_NOT_READY;
597        }
598
599        /**
600         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
601         */
602        private boolean internalGetIsEnrolled(int keyphraseId) {
603            try {
604                return mModelManagementService.isEnrolledForKeyphrase(
605                        mVoiceInteractionService, keyphraseId);
606            } catch (RemoteException e) {
607                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
608            }
609            return false;
610        }
611    }
612}
613