AlwaysOnHotwordDetector.java revision f63bc523eadbe01ce0a5ad52868a5dccb3d5f6dd
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.os.AsyncTask;
31import android.os.Handler;
32import android.os.Message;
33import android.os.RemoteException;
34import android.util.Slog;
35
36import com.android.internal.app.IVoiceInteractionManagerService;
37
38import java.util.List;
39
40/**
41 * A class that lets a VoiceInteractionService implementation interact with
42 * always-on keyphrase detection APIs.
43 */
44public class AlwaysOnHotwordDetector {
45    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
46    /**
47     * Indicates that this hotword detector is no longer valid for any recognition
48     * and should not be used anymore.
49     */
50    public static final int STATE_INVALID = -3;
51    /**
52     * Indicates that recognition for the given keyphrase is not available on the system
53     * because of the hardware configuration.
54     */
55    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
56    /**
57     * Indicates that recognition for the given keyphrase is not supported.
58     */
59    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
60    /**
61     * Indicates that the given keyphrase is not enrolled.
62     */
63    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
64    /**
65     * Indicates that the given keyphrase is currently enrolled and it's possible to start
66     * recognition for it.
67     */
68    public static final int STATE_KEYPHRASE_ENROLLED = 2;
69
70    /**
71     * Indicates that the detector isn't ready currently.
72     */
73    private static final int STATE_NOT_READY = 0;
74
75    // Keyphrase management actions. Used in getManageIntent() ----//
76    /** Indicates that we need to enroll. */
77    public static final int MANAGE_ACTION_ENROLL = 0;
78    /** Indicates that we need to re-enroll. */
79    public static final int MANAGE_ACTION_RE_ENROLL = 1;
80    /** Indicates that we need to un-enroll. */
81    public static final int MANAGE_ACTION_UN_ENROLL = 2;
82
83    /**
84     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
85     */
86    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
87    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
88
89    //-- Flags for startRecogntion    ----//
90    /** Empty flag for {@link #startRecognition(int)}. */
91    public static final int RECOGNITION_FLAG_NONE = 0;
92    /**
93     * Recognition flag for {@link #startRecognition(int)} that indicates
94     * whether the trigger audio for hotword needs to be captured.
95     */
96    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
97
98    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
99    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
100
101    /**
102     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
103     */
104    public static final int RECOGNITION_MODE_VOICE_TRIGGER
105            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
106    /**
107     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
108     */
109    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
110            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
111
112    static final String TAG = "AlwaysOnHotwordDetector";
113    // TODO: Set to false.
114    static final boolean DBG = true;
115
116    private static final int MSG_STATE_CHANGED = 1;
117    private static final int MSG_HOTWORD_DETECTED = 2;
118    private static final int MSG_DETECTION_STOPPED = 3;
119
120    private final String mText;
121    private final String mLocale;
122    /**
123     * The metadata of the Keyphrase, derived from the enrollment application.
124     * This may be null if this keyphrase isn't supported by the enrollment application.
125     */
126    private final KeyphraseMetadata mKeyphraseMetadata;
127    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
128    private final IVoiceInteractionService mVoiceInteractionService;
129    private final IVoiceInteractionManagerService mModelManagementService;
130    private final SoundTriggerListener mInternalCallback;
131    private final Callback mExternalCallback;
132    private final Object mLock = new Object();
133    private final Handler mHandler;
134
135    /**
136     * The sound model for the keyphrase, derived from the model management service
137     * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet.
138     */
139    private KeyphraseSoundModel mEnrolledSoundModel;
140    private int mAvailability = STATE_NOT_READY;
141
142    /**
143     * Callbacks for always-on hotword detection.
144     */
145    public interface Callback {
146        /**
147         * Called when the hotword availability changes.
148         * This indicates a change in the availability of recognition for the given keyphrase.
149         * It's called at least once with the initial availability.<p/>
150         *
151         * Availability implies whether the hardware on this system is capable of listening for
152         * the given keyphrase or not. <p/>
153         * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or
154         * {@link #STATE_KEYPHRASE_UNSUPPORTED},
155         * detection is not possible and no further interaction should be
156         * performed with this detector. <br/>
157         * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin
158         * an enrollment flow for the keyphrase. <br/>
159         * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/>
160         *
161         * If the return code is {@link #STATE_INVALID}, this detector is stale.
162         * A new detector should be obtained for use in the future.
163         */
164        void onAvailabilityChanged(int status);
165        /**
166         * Called when the keyphrase is spoken.
167         *
168         * @param data Optional trigger audio data, if it was requested during
169         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
170         */
171        void onDetected(byte[] data);
172        /**
173         * Called when the detection for the associated keyphrase stops.
174         */
175        void onDetectionStopped();
176    }
177
178    /**
179     * @param text The keyphrase text to get the detector for.
180     * @param locale The java locale for the detector.
181     * @param callback A non-null Callback for receiving the recognition events.
182     * @param voiceInteractionService The current voice interaction service.
183     * @param modelManagementService A service that allows management of sound models.
184     *
185     * @hide
186     */
187    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
188            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
189            IVoiceInteractionService voiceInteractionService,
190            IVoiceInteractionManagerService modelManagementService) {
191        mText = text;
192        mLocale = locale;
193        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
194        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
195        mExternalCallback = callback;
196        mHandler = new MyHandler();
197        mInternalCallback = new SoundTriggerListener(mHandler);
198        mVoiceInteractionService = voiceInteractionService;
199        mModelManagementService = modelManagementService;
200        new RefreshAvailabiltyTask().execute();
201    }
202
203    /**
204     * Gets the recognition modes supported by the associated keyphrase.
205     *
206     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
207     *         Callers should only call this method after a supported state callback on
208     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
209     */
210    public int getSupportedRecognitionModes() {
211        synchronized (mLock) {
212            return getSupportedRecognitionModesLocked();
213        }
214    }
215
216    private int getSupportedRecognitionModesLocked() {
217        // This method only makes sense if we can actually support a recognition.
218        if (mAvailability != STATE_KEYPHRASE_ENROLLED
219                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
220            throw new UnsupportedOperationException(
221                    "Getting supported recognition modes for the keyphrase is not supported");
222        }
223
224        return mKeyphraseMetadata.recognitionModeFlags;
225    }
226
227    /**
228     * Starts recognition for the associated keyphrase.
229     *
230     * @param recognitionFlags The flags to control the recognition properties.
231     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
232     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
233     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
234     * @throws UnsupportedOperationException if the recognition isn't supported.
235     *         Callers should only call this method after a supported state callback on
236     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
237     */
238    public int startRecognition(int recognitionFlags) {
239        synchronized (mLock) {
240            return startRecognitionLocked(recognitionFlags);
241        }
242    }
243
244    private int startRecognitionLocked(int recognitionFlags) {
245        // This method only makes sense if we can start a recognition.
246        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
247            throw new UnsupportedOperationException(
248                    "Recognition for the given keyphrase is not supported");
249        }
250
251        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
252        // TODO: Do we need to do something about the confidence level here?
253        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
254                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
255        boolean captureTriggerAudio =
256                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
257        int code = STATUS_ERROR;
258        try {
259            code = mModelManagementService.startRecognition(mVoiceInteractionService,
260                    mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback,
261                    new RecognitionConfig(
262                            captureTriggerAudio, recognitionExtra, null /* additional data */));
263        } catch (RemoteException e) {
264            Slog.w(TAG, "RemoteException in startRecognition!");
265        }
266        if (code != STATUS_OK) {
267            Slog.w(TAG, "startRecognition() failed with error code " + code);
268        }
269        return code;
270    }
271
272    /**
273     * Stops recognition for the associated keyphrase.
274     *
275     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
276     * @throws UnsupportedOperationException if the recognition isn't supported.
277     *         Callers should only call this method after a supported state callback on
278     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
279     */
280    public int stopRecognition() {
281        synchronized (mLock) {
282            return stopRecognitionLocked();
283        }
284    }
285
286    private int stopRecognitionLocked() {
287        // This method only makes sense if we can start a recognition.
288        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
289            throw new UnsupportedOperationException(
290                    "Recognition for the given keyphrase is not supported");
291        }
292
293        int code = STATUS_ERROR;
294        try {
295            code = mModelManagementService.stopRecognition(
296                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
297        } catch (RemoteException e) {
298            Slog.w(TAG, "RemoteException in stopRecognition!");
299        }
300
301        if (code != STATUS_OK) {
302            Slog.w(TAG, "stopRecognition() failed with error code " + code);
303        }
304        return code;
305    }
306
307    /**
308     * Gets an intent to manage the associated keyphrase.
309     *
310     * @param action The manage action that needs to be performed.
311     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
312     *        {@link #MANAGE_ACTION_UN_ENROLL}.
313     * @return An {@link Intent} to manage the given keyphrase.
314     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
315     *         Callers should only call this method after a supported state callback on
316     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
317     */
318    public Intent getManageIntent(int action) {
319        // This method only makes sense if we can actually support a recognition.
320        if (mAvailability != STATE_KEYPHRASE_ENROLLED
321                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
322            throw new UnsupportedOperationException(
323                    "Managing the given keyphrase is not supported");
324        }
325        if (action != MANAGE_ACTION_ENROLL
326                && action != MANAGE_ACTION_RE_ENROLL
327                && action != MANAGE_ACTION_UN_ENROLL) {
328            throw new IllegalArgumentException("Invalid action specified " + action);
329        }
330
331        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
332    }
333
334    /**
335     * Invalidates this hotword detector so that any future calls to this result
336     * in an IllegalStateException.
337     *
338     * @hide
339     */
340    void invalidate() {
341        synchronized (mLock) {
342            mAvailability = STATE_INVALID;
343            notifyStateChangedLocked();
344        }
345    }
346
347    /**
348     * Reloads the sound models from the service.
349     *
350     * @hide
351     */
352    void onSoundModelsChanged() {
353        synchronized (mLock) {
354            // TODO: This should stop the recognition if it was using an enrolled sound model
355            // that's no longer available.
356            if (mAvailability == STATE_INVALID
357                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
358                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
359                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
360                return;
361            }
362
363            // Execute a refresh availability task - which should then notify of a change.
364            new RefreshAvailabiltyTask().execute();
365        }
366    }
367
368    private void notifyStateChangedLocked() {
369        Message message = Message.obtain(mHandler, MSG_STATE_CHANGED);
370        message.arg1 = mAvailability;
371        message.sendToTarget();
372    }
373
374    /** @hide */
375    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
376        private final Handler mHandler;
377
378        public SoundTriggerListener(Handler handler) {
379            mHandler = handler;
380        }
381
382        @Override
383        public void onDetected(byte[] data) {
384            Slog.i(TAG, "onDetected");
385            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
386            message.obj = data;
387            message.sendToTarget();
388        }
389
390        @Override
391        public void onDetectionStopped() {
392            Slog.i(TAG, "onDetectionStopped");
393            mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
394        }
395    }
396
397    class MyHandler extends Handler {
398        @Override
399        public void handleMessage(Message msg) {
400            switch (msg.what) {
401                case MSG_STATE_CHANGED:
402                    mExternalCallback.onAvailabilityChanged(msg.arg1);
403                    break;
404                case MSG_HOTWORD_DETECTED:
405                    mExternalCallback.onDetected((byte[]) msg.obj);
406                    break;
407                case MSG_DETECTION_STOPPED:
408                    mExternalCallback.onDetectionStopped();
409                default:
410                    super.handleMessage(msg);
411            }
412        }
413    }
414
415    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
416
417        @Override
418        public Void doInBackground(Void... params) {
419            int availability = internalGetInitialAvailability();
420            KeyphraseSoundModel soundModel = null;
421            // Fetch the sound model if the availability is one of the supported ones.
422            if (availability == STATE_NOT_READY
423                    || availability == STATE_KEYPHRASE_UNENROLLED
424                    || availability == STATE_KEYPHRASE_ENROLLED) {
425                soundModel =
426                        internalGetKeyphraseSoundModel(mKeyphraseMetadata.id);
427                if (soundModel == null) {
428                    availability = STATE_KEYPHRASE_UNENROLLED;
429                } else {
430                    availability = STATE_KEYPHRASE_ENROLLED;
431                }
432            }
433
434            synchronized (mLock) {
435                if (DBG) {
436                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
437                            + " -> " + availability);
438                }
439                mAvailability = availability;
440                mEnrolledSoundModel = soundModel;
441                notifyStateChangedLocked();
442            }
443            return null;
444        }
445
446        /**
447         * @return The initial availability without checking the enrollment status.
448         */
449        private int internalGetInitialAvailability() {
450            synchronized (mLock) {
451                // This detector has already been invalidated.
452                if (mAvailability == STATE_INVALID) {
453                    return STATE_INVALID;
454                }
455            }
456
457            ModuleProperties dspModuleProperties = null;
458            try {
459                dspModuleProperties =
460                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
461            } catch (RemoteException e) {
462                Slog.w(TAG, "RemoteException in getDspProperties!");
463            }
464            // No DSP available
465            if (dspModuleProperties == null) {
466                return STATE_HARDWARE_UNAVAILABLE;
467            }
468            // No enrollment application supports this keyphrase/locale
469            if (mKeyphraseMetadata == null) {
470                return STATE_KEYPHRASE_UNSUPPORTED;
471            }
472            return STATE_NOT_READY;
473        }
474
475        /**
476         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
477         */
478        private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) {
479            List<KeyphraseSoundModel> soundModels;
480            try {
481                soundModels = mModelManagementService
482                        .listRegisteredKeyphraseSoundModels(mVoiceInteractionService);
483                if (soundModels == null || soundModels.isEmpty()) {
484                    Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId);
485                    return null;
486                }
487                for (int i = 0; i < soundModels.size(); i++) {
488                    KeyphraseSoundModel soundModel = soundModels.get(i);
489                    if (soundModel.keyphrases == null || soundModel.keyphrases.length == 0) {
490                        continue;
491                    }
492                    for (int j = 0; i < soundModel.keyphrases.length; j++) {
493                        Keyphrase keyphrase = soundModel.keyphrases[j];
494                        if (keyphrase.id == keyphraseId) {
495                            return soundModel;
496                        }
497                    }
498                }
499            } catch (RemoteException e) {
500                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
501            }
502            return null;
503        }
504    }
505}
506