AlwaysOnHotwordDetector.java revision 2c0273e50a3162595e9a54030166f2369b039a5a
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.annotation.IntDef;
20import android.annotation.NonNull;
21import android.annotation.Nullable;
22import android.content.Intent;
23import android.hardware.soundtrigger.IRecognitionStatusCallback;
24import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
25import android.hardware.soundtrigger.KeyphraseMetadata;
26import android.hardware.soundtrigger.SoundTrigger;
27import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
28import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionEvent;
29import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
30import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
31import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
32import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
33import android.media.AudioFormat;
34import android.os.AsyncTask;
35import android.os.Handler;
36import android.os.Message;
37import android.os.RemoteException;
38import android.util.Slog;
39
40import com.android.internal.app.IVoiceInteractionManagerService;
41
42import java.lang.annotation.Retention;
43import java.lang.annotation.RetentionPolicy;
44
45/**
46 * A class that lets a VoiceInteractionService implementation interact with
47 * always-on keyphrase detection APIs.
48 */
49public class AlwaysOnHotwordDetector {
50    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
51    /**
52     * Indicates that this hotword detector is no longer valid for any recognition
53     * and should not be used anymore.
54     */
55    private static final int STATE_INVALID = -3;
56
57    /**
58     * Indicates that recognition for the given keyphrase is not available on the system
59     * because of the hardware configuration.
60     * No further interaction should be performed with the detector that returns this availability.
61     */
62    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
63    /**
64     * Indicates that recognition for the given keyphrase is not supported.
65     * No further interaction should be performed with the detector that returns this availability.
66     */
67    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
68    /**
69     * Indicates that the given keyphrase is not enrolled.
70     * The caller may choose to begin an enrollment flow for the keyphrase.
71     */
72    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
73    /**
74     * Indicates that the given keyphrase is currently enrolled and it's possible to start
75     * recognition for it.
76     */
77    public static final int STATE_KEYPHRASE_ENROLLED = 2;
78
79    /**
80     * Indicates that the detector isn't ready currently.
81     */
82    private static final int STATE_NOT_READY = 0;
83
84    // Keyphrase management actions. Used in getManageIntent() ----//
85    /** Indicates that we need to enroll. */
86    public static final int MANAGE_ACTION_ENROLL = 0;
87    /** Indicates that we need to re-enroll. */
88    public static final int MANAGE_ACTION_RE_ENROLL = 1;
89    /** Indicates that we need to un-enroll. */
90    public static final int MANAGE_ACTION_UN_ENROLL = 2;
91
92    //-- Flags for startRecognition    ----//
93    /** @hide */
94    @Retention(RetentionPolicy.SOURCE)
95    @IntDef(flag = true,
96            value = {
97                RECOGNITION_FLAG_NONE,
98                RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
99                RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
100            })
101    public @interface RecognitionFlags {}
102
103    /** Empty flag for {@link #startRecognition(int)}. */
104    public static final int RECOGNITION_FLAG_NONE = 0;
105    /**
106     * Recognition flag for {@link #startRecognition(int)} that indicates
107     * whether the trigger audio for hotword needs to be captured.
108     */
109    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
110    /**
111     * Recognition flag for {@link #startRecognition(int)} that indicates
112     * whether the recognition should keep going on even after the keyphrase triggers.
113     * If this flag is specified, it's possible to get multiple triggers after a
114     * call to {@link #startRecognition(int)} if the user speaks the keyphrase multiple times.
115     * When this isn't specified, the default behavior is to stop recognition once the
116     * keyphrase is spoken, till the caller starts recognition again.
117     */
118    public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
119
120    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
121    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
122
123    /** @hide */
124    @Retention(RetentionPolicy.SOURCE)
125    @IntDef(flag = true,
126            value = {
127                RECOGNITION_MODE_VOICE_TRIGGER,
128                RECOGNITION_MODE_USER_IDENTIFICATION,
129            })
130    public @interface RecognitionModes {}
131
132    /**
133     * Simple recognition of the key phrase.
134     * Returned by {@link #getSupportedRecognitionModes()}
135     */
136    public static final int RECOGNITION_MODE_VOICE_TRIGGER
137            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
138    /**
139     * User identification performed with the keyphrase recognition.
140     * Returned by {@link #getSupportedRecognitionModes()}
141     */
142    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
143            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
144
145    static final String TAG = "AlwaysOnHotwordDetector";
146    // TODO: Set to false.
147    static final boolean DBG = true;
148
149    private static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
150    private static final int STATUS_OK = SoundTrigger.STATUS_OK;
151
152    private static final int MSG_AVAILABILITY_CHANGED = 1;
153    private static final int MSG_HOTWORD_DETECTED = 2;
154    private static final int MSG_DETECTION_ERROR = 3;
155
156    private final String mText;
157    private final String mLocale;
158    /**
159     * The metadata of the Keyphrase, derived from the enrollment application.
160     * This may be null if this keyphrase isn't supported by the enrollment application.
161     */
162    private final KeyphraseMetadata mKeyphraseMetadata;
163    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
164    private final IVoiceInteractionService mVoiceInteractionService;
165    private final IVoiceInteractionManagerService mModelManagementService;
166    private final SoundTriggerListener mInternalCallback;
167    private final Callback mExternalCallback;
168    private final Object mLock = new Object();
169    private final Handler mHandler;
170
171    private int mAvailability = STATE_NOT_READY;
172
173    /**
174     * Details of the audio that triggered the keyphrase.
175     */
176    public static class TriggerAudio {
177        /**
178         * Format of {@code data}.
179         */
180        @NonNull
181        public final AudioFormat audioFormat;
182        /**
183         * Raw audio data that triggered they keyphrase.
184         */
185        @NonNull
186        public final byte[] data;
187
188        private TriggerAudio(AudioFormat _audioFormat, byte[] _data) {
189            audioFormat = _audioFormat;
190            data = _data;
191        }
192    }
193
194    /**
195     * Callbacks for always-on hotword detection.
196     */
197    public interface Callback {
198        /**
199         * Called when the hotword availability changes.
200         * This indicates a change in the availability of recognition for the given keyphrase.
201         * It's called at least once with the initial availability.<p/>
202         *
203         * Availability implies whether the hardware on this system is capable of listening for
204         * the given keyphrase or not. <p/>
205         *
206         * @see AlwaysOnHotwordDetector#STATE_HARDWARE_UNAVAILABLE
207         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNSUPPORTED
208         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_UNENROLLED
209         * @see AlwaysOnHotwordDetector#STATE_KEYPHRASE_ENROLLED
210         */
211        void onAvailabilityChanged(int status);
212        /**
213         * Called when the keyphrase is spoken.
214         * This implicitly stops listening for the keyphrase once it's detected.
215         * Clients should start a recognition again once they are done handling this
216         * detection.
217         *
218         * @param triggerAudio Optional trigger audio data, if it was requested during
219         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
220         */
221        void onDetected(@Nullable TriggerAudio triggerAudio);
222        /**
223         * Called when the detection fails due to an error.
224         */
225        void onError();
226    }
227
228    /**
229     * @param text The keyphrase text to get the detector for.
230     * @param locale The java locale for the detector.
231     * @param callback A non-null Callback for receiving the recognition events.
232     * @param voiceInteractionService The current voice interaction service.
233     * @param modelManagementService A service that allows management of sound models.
234     *
235     * @hide
236     */
237    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
238            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
239            IVoiceInteractionService voiceInteractionService,
240            IVoiceInteractionManagerService modelManagementService) {
241        mText = text;
242        mLocale = locale;
243        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
244        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
245        mExternalCallback = callback;
246        mHandler = new MyHandler();
247        mInternalCallback = new SoundTriggerListener(mHandler);
248        mVoiceInteractionService = voiceInteractionService;
249        mModelManagementService = modelManagementService;
250        new RefreshAvailabiltyTask().execute();
251    }
252
253    /**
254     * Gets the recognition modes supported by the associated keyphrase.
255     *
256     * @see #RECOGNITION_MODE_USER_IDENTIFICATION
257     * @see #RECOGNITION_MODE_VOICE_TRIGGER
258     *
259     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
260     *         Callers should only call this method after a supported state callback on
261     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
262     * @throws IllegalStateException if the detector is in an invalid state.
263     *         This may happen if another detector has been instantiated or the
264     *         {@link VoiceInteractionService} hosting this detector has been shut down.
265     */
266    public @RecognitionModes int getSupportedRecognitionModes() {
267        if (DBG) Slog.d(TAG, "getSupportedRecognitionModes()");
268        synchronized (mLock) {
269            return getSupportedRecognitionModesLocked();
270        }
271    }
272
273    private int getSupportedRecognitionModesLocked() {
274        if (mAvailability == STATE_INVALID) {
275            throw new IllegalStateException(
276                    "getSupportedRecognitionModes called on an invalid detector");
277        }
278
279        // This method only makes sense if we can actually support a recognition.
280        if (mAvailability != STATE_KEYPHRASE_ENROLLED
281                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
282            throw new UnsupportedOperationException(
283                    "Getting supported recognition modes for the keyphrase is not supported");
284        }
285
286        return mKeyphraseMetadata.recognitionModeFlags;
287    }
288
289    /**
290     * Starts recognition for the associated keyphrase.
291     *
292     * @param recognitionFlags The flags to control the recognition properties.
293     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE},
294     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO} and
295     *        {@link #RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS}.
296     * @return Indicates whether the call succeeded or not.
297     * @throws UnsupportedOperationException if the recognition isn't supported.
298     *         Callers should only call this method after a supported state callback on
299     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
300     * @throws IllegalStateException if the detector is in an invalid state.
301     *         This may happen if another detector has been instantiated or the
302     *         {@link VoiceInteractionService} hosting this detector has been shut down.
303     */
304    public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
305        if (DBG) Slog.d(TAG, "startRecognition(" + recognitionFlags + ")");
306        synchronized (mLock) {
307            if (mAvailability == STATE_INVALID) {
308                throw new IllegalStateException("startRecognition called on an invalid detector");
309            }
310
311            // Check if we can start/stop a recognition.
312            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
313                throw new UnsupportedOperationException(
314                        "Recognition for the given keyphrase is not supported");
315            }
316
317            return startRecognitionLocked(recognitionFlags) == STATUS_OK;
318        }
319    }
320
321    /**
322     * Stops recognition for the associated keyphrase.
323     *
324     * @return Indicates whether the call succeeded or not.
325     * @throws UnsupportedOperationException if the recognition isn't supported.
326     *         Callers should only call this method after a supported state callback on
327     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
328     * @throws IllegalStateException if the detector is in an invalid state.
329     *         This may happen if another detector has been instantiated or the
330     *         {@link VoiceInteractionService} hosting this detector has been shut down.
331     */
332    public boolean stopRecognition() {
333        if (DBG) Slog.d(TAG, "stopRecognition()");
334        synchronized (mLock) {
335            if (mAvailability == STATE_INVALID) {
336                throw new IllegalStateException("stopRecognition called on an invalid detector");
337            }
338
339            // Check if we can start/stop a recognition.
340            if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
341                throw new UnsupportedOperationException(
342                        "Recognition for the given keyphrase is not supported");
343            }
344
345            return stopRecognitionLocked() == STATUS_OK;
346        }
347    }
348
349    /**
350     * Gets an intent to manage the associated keyphrase.
351     *
352     * @param action The manage action that needs to be performed.
353     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
354     *        {@link #MANAGE_ACTION_UN_ENROLL}.
355     * @return An {@link Intent} to manage the given keyphrase.
356     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
357     *         Callers should only call this method after a supported state callback on
358     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
359     * @throws IllegalStateException if the detector is in an invalid state.
360     *         This may happen if another detector has been instantiated or the
361     *         {@link VoiceInteractionService} hosting this detector has been shut down.
362     */
363    public Intent getManageIntent(int action) {
364        if (DBG) Slog.d(TAG, "getManageIntent(" + action + ")");
365        synchronized (mLock) {
366            return getManageIntentLocked(action);
367        }
368    }
369
370    private Intent getManageIntentLocked(int action) {
371        if (mAvailability == STATE_INVALID) {
372            throw new IllegalStateException("getManageIntent called on an invalid detector");
373        }
374
375        // This method only makes sense if we can actually support a recognition.
376        if (mAvailability != STATE_KEYPHRASE_ENROLLED
377                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
378            throw new UnsupportedOperationException(
379                    "Managing the given keyphrase is not supported");
380        }
381
382        if (action != MANAGE_ACTION_ENROLL
383                && action != MANAGE_ACTION_RE_ENROLL
384                && action != MANAGE_ACTION_UN_ENROLL) {
385            throw new IllegalArgumentException("Invalid action specified " + action);
386        }
387
388        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
389    }
390
391    /**
392     * Invalidates this hotword detector so that any future calls to this result
393     * in an IllegalStateException.
394     *
395     * @hide
396     */
397    void invalidate() {
398        synchronized (mLock) {
399            mAvailability = STATE_INVALID;
400            notifyStateChangedLocked();
401        }
402    }
403
404    /**
405     * Reloads the sound models from the service.
406     *
407     * @hide
408     */
409    void onSoundModelsChanged() {
410        synchronized (mLock) {
411            // FIXME: This should stop the recognition if it was using an enrolled sound model
412            // that's no longer available.
413            if (mAvailability == STATE_INVALID
414                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
415                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
416                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
417                return;
418            }
419
420            // Execute a refresh availability task - which should then notify of a change.
421            new RefreshAvailabiltyTask().execute();
422        }
423    }
424
425    private int startRecognitionLocked(int recognitionFlags) {
426        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
427        // TODO: Do we need to do something about the confidence level here?
428        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
429                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
430        boolean captureTriggerAudio =
431                (recognitionFlags&RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
432        boolean allowMultipleTriggers =
433                (recognitionFlags&RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
434        int code = STATUS_ERROR;
435        try {
436            code = mModelManagementService.startRecognition(mVoiceInteractionService,
437                    mKeyphraseMetadata.id, mInternalCallback,
438                    new RecognitionConfig(captureTriggerAudio, allowMultipleTriggers,
439                            recognitionExtra, null /* additional data */));
440        } catch (RemoteException e) {
441            Slog.w(TAG, "RemoteException in startRecognition!");
442        }
443        if (code != STATUS_OK) {
444            Slog.w(TAG, "startRecognition() failed with error code " + code);
445        }
446        return code;
447    }
448
449    private int stopRecognitionLocked() {
450        int code = STATUS_ERROR;
451        try {
452            code = mModelManagementService.stopRecognition(
453                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
454        } catch (RemoteException e) {
455            Slog.w(TAG, "RemoteException in stopRecognition!");
456        }
457
458        if (code != STATUS_OK) {
459            Slog.w(TAG, "stopRecognition() failed with error code " + code);
460        }
461        return code;
462    }
463
464    private void notifyStateChangedLocked() {
465        Message message = Message.obtain(mHandler, MSG_AVAILABILITY_CHANGED);
466        message.arg1 = mAvailability;
467        message.sendToTarget();
468    }
469
470    /** @hide */
471    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
472        private final Handler mHandler;
473
474        public SoundTriggerListener(Handler handler) {
475            mHandler = handler;
476        }
477
478        @Override
479        public void onDetected(KeyphraseRecognitionEvent event) {
480            if (DBG) {
481                Slog.d(TAG, "OnDetected(" + event + ")");
482            } else {
483                Slog.i(TAG, "onDetected");
484            }
485            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
486            // FIXME: Check whether the event contains trigger data or not.
487            // FIXME: Read the audio format from the event.
488            if (event.data != null) {
489                AudioFormat audioFormat = new AudioFormat.Builder()
490                        .setChannelMask(AudioFormat.CHANNEL_IN_MONO)
491                        .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
492                        .setSampleRate(16000)
493                        .build();
494                message.obj = new TriggerAudio(audioFormat, event.data);
495            }
496            message.sendToTarget();
497        }
498
499        @Override
500        public void onError(int status) {
501            Slog.i(TAG, "onError: " + status);
502            mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
503        }
504    }
505
506    class MyHandler extends Handler {
507        @Override
508        public void handleMessage(Message msg) {
509            synchronized (mLock) {
510                if (mAvailability == STATE_INVALID) {
511                    Slog.w(TAG, "Received message: " + msg.what + " for an invalid detector");
512                    return;
513                }
514            }
515
516            switch (msg.what) {
517                case MSG_AVAILABILITY_CHANGED:
518                    mExternalCallback.onAvailabilityChanged(msg.arg1);
519                    break;
520                case MSG_HOTWORD_DETECTED:
521                    mExternalCallback.onDetected((TriggerAudio) msg.obj);
522                    break;
523                case MSG_DETECTION_ERROR:
524                    mExternalCallback.onError();
525                    break;
526                default:
527                    super.handleMessage(msg);
528            }
529        }
530    }
531
532    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
533
534        @Override
535        public Void doInBackground(Void... params) {
536            int availability = internalGetInitialAvailability();
537            boolean enrolled = false;
538            // Fetch the sound model if the availability is one of the supported ones.
539            if (availability == STATE_NOT_READY
540                    || availability == STATE_KEYPHRASE_UNENROLLED
541                    || availability == STATE_KEYPHRASE_ENROLLED) {
542                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
543                if (!enrolled) {
544                    availability = STATE_KEYPHRASE_UNENROLLED;
545                } else {
546                    availability = STATE_KEYPHRASE_ENROLLED;
547                }
548            }
549
550            synchronized (mLock) {
551                if (DBG) {
552                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
553                            + " -> " + availability);
554                }
555                mAvailability = availability;
556                notifyStateChangedLocked();
557            }
558            return null;
559        }
560
561        /**
562         * @return The initial availability without checking the enrollment status.
563         */
564        private int internalGetInitialAvailability() {
565            synchronized (mLock) {
566                // This detector has already been invalidated.
567                if (mAvailability == STATE_INVALID) {
568                    return STATE_INVALID;
569                }
570            }
571
572            ModuleProperties dspModuleProperties = null;
573            try {
574                dspModuleProperties =
575                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
576            } catch (RemoteException e) {
577                Slog.w(TAG, "RemoteException in getDspProperties!");
578            }
579            // No DSP available
580            if (dspModuleProperties == null) {
581                return STATE_HARDWARE_UNAVAILABLE;
582            }
583            // No enrollment application supports this keyphrase/locale
584            if (mKeyphraseMetadata == null) {
585                return STATE_KEYPHRASE_UNSUPPORTED;
586            }
587            return STATE_NOT_READY;
588        }
589
590        /**
591         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
592         */
593        private boolean internalGetIsEnrolled(int keyphraseId) {
594            try {
595                return mModelManagementService.isEnrolledForKeyphrase(
596                        mVoiceInteractionService, keyphraseId);
597            } catch (RemoteException e) {
598                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
599            }
600            return false;
601        }
602    }
603}
604