AlwaysOnHotwordDetector.java revision 39c12fab49075b715c253c68c84b5c10c3150197
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.hardware.soundtrigger.SoundTrigger.RecognitionEvent;
31import android.os.AsyncTask;
32import android.os.Handler;
33import android.os.Message;
34import android.os.RemoteException;
35import android.util.Slog;
36
37import com.android.internal.app.IVoiceInteractionManagerService;
38
39import java.util.List;
40
41/**
42 * A class that lets a VoiceInteractionService implementation interact with
43 * always-on keyphrase detection APIs.
44 */
45public class AlwaysOnHotwordDetector {
46    //---- States of Keyphrase availability. Return codes for onAvailabilityChanged() ----//
47    /**
48     * Indicates that this hotword detector is no longer valid for any recognition
49     * and should not be used anymore.
50     */
51    public static final int STATE_INVALID = -3;
52    /**
53     * Indicates that recognition for the given keyphrase is not available on the system
54     * because of the hardware configuration.
55     */
56    public static final int STATE_HARDWARE_UNAVAILABLE = -2;
57    /**
58     * Indicates that recognition for the given keyphrase is not supported.
59     */
60    public static final int STATE_KEYPHRASE_UNSUPPORTED = -1;
61    /**
62     * Indicates that the given keyphrase is not enrolled.
63     */
64    public static final int STATE_KEYPHRASE_UNENROLLED = 1;
65    /**
66     * Indicates that the given keyphrase is currently enrolled and it's possible to start
67     * recognition for it.
68     */
69    public static final int STATE_KEYPHRASE_ENROLLED = 2;
70
71    /**
72     * Indicates that the detector isn't ready currently.
73     */
74    private static final int STATE_NOT_READY = 0;
75
76    // Keyphrase management actions. Used in getManageIntent() ----//
77    /** Indicates that we need to enroll. */
78    public static final int MANAGE_ACTION_ENROLL = 0;
79    /** Indicates that we need to re-enroll. */
80    public static final int MANAGE_ACTION_RE_ENROLL = 1;
81    /** Indicates that we need to un-enroll. */
82    public static final int MANAGE_ACTION_UN_ENROLL = 2;
83
84    /**
85     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
86     */
87    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
88    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
89
90    //-- Flags for startRecogntion    ----//
91    /** Empty flag for {@link #startRecognition(int)}. */
92    public static final int RECOGNITION_FLAG_NONE = 0;
93    /**
94     * Recognition flag for {@link #startRecognition(int)} that indicates
95     * whether the trigger audio for hotword needs to be captured.
96     */
97    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
98
99    //---- Recognition mode flags. Return codes for getSupportedRecognitionModes() ----//
100    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
101
102    /**
103     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
104     */
105    public static final int RECOGNITION_MODE_VOICE_TRIGGER
106            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
107    /**
108     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
109     */
110    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
111            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
112
113    static final String TAG = "AlwaysOnHotwordDetector";
114    // TODO: Set to false.
115    static final boolean DBG = true;
116
117    private static final int MSG_STATE_CHANGED = 1;
118    private static final int MSG_HOTWORD_DETECTED = 2;
119    private static final int MSG_DETECTION_STOPPED = 3;
120
121    private final String mText;
122    private final String mLocale;
123    /**
124     * The metadata of the Keyphrase, derived from the enrollment application.
125     * This may be null if this keyphrase isn't supported by the enrollment application.
126     */
127    private final KeyphraseMetadata mKeyphraseMetadata;
128    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
129    private final IVoiceInteractionService mVoiceInteractionService;
130    private final IVoiceInteractionManagerService mModelManagementService;
131    private final SoundTriggerListener mInternalCallback;
132    private final Callback mExternalCallback;
133    private final Object mLock = new Object();
134    private final Handler mHandler;
135
136    /**
137     * Indicates if there is a sound model enrolled for the keyphrase,
138     * derived from the model management service (IVoiceInteractionManagerService).
139     */
140    private boolean mIsEnrolledForDetection;
141    private int mAvailability = STATE_NOT_READY;
142
143    /**
144     * Callbacks for always-on hotword detection.
145     */
146    public interface Callback {
147        /**
148         * Called when the hotword availability changes.
149         * This indicates a change in the availability of recognition for the given keyphrase.
150         * It's called at least once with the initial availability.<p/>
151         *
152         * Availability implies whether the hardware on this system is capable of listening for
153         * the given keyphrase or not. <p/>
154         * If the return code is one of {@link #STATE_HARDWARE_UNAVAILABLE} or
155         * {@link #STATE_KEYPHRASE_UNSUPPORTED},
156         * detection is not possible and no further interaction should be
157         * performed with this detector. <br/>
158         * If it is {@link #STATE_KEYPHRASE_UNENROLLED} the caller may choose to begin
159         * an enrollment flow for the keyphrase. <br/>
160         * and for {@link #STATE_KEYPHRASE_ENROLLED} a recognition can be started as desired. <p/>
161         *
162         * If the return code is {@link #STATE_INVALID}, this detector is stale.
163         * A new detector should be obtained for use in the future.
164         */
165        void onAvailabilityChanged(int status);
166        /**
167         * Called when the keyphrase is spoken.
168         *
169         * @param data Optional trigger audio data, if it was requested during
170         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
171         */
172        void onDetected(byte[] data);
173        /**
174         * Called when the detection for the associated keyphrase stops.
175         */
176        void onDetectionStopped();
177    }
178
179    /**
180     * @param text The keyphrase text to get the detector for.
181     * @param locale The java locale for the detector.
182     * @param callback A non-null Callback for receiving the recognition events.
183     * @param voiceInteractionService The current voice interaction service.
184     * @param modelManagementService A service that allows management of sound models.
185     *
186     * @hide
187     */
188    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
189            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
190            IVoiceInteractionService voiceInteractionService,
191            IVoiceInteractionManagerService modelManagementService) {
192        mText = text;
193        mLocale = locale;
194        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
195        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
196        mExternalCallback = callback;
197        mHandler = new MyHandler();
198        mInternalCallback = new SoundTriggerListener(mHandler);
199        mVoiceInteractionService = voiceInteractionService;
200        mModelManagementService = modelManagementService;
201        new RefreshAvailabiltyTask().execute();
202    }
203
204    /**
205     * Gets the recognition modes supported by the associated keyphrase.
206     *
207     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
208     *         Callers should only call this method after a supported state callback on
209     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
210     */
211    public int getSupportedRecognitionModes() {
212        synchronized (mLock) {
213            return getSupportedRecognitionModesLocked();
214        }
215    }
216
217    private int getSupportedRecognitionModesLocked() {
218        // This method only makes sense if we can actually support a recognition.
219        if (mAvailability != STATE_KEYPHRASE_ENROLLED
220                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
221            throw new UnsupportedOperationException(
222                    "Getting supported recognition modes for the keyphrase is not supported");
223        }
224
225        return mKeyphraseMetadata.recognitionModeFlags;
226    }
227
228    /**
229     * Starts recognition for the associated keyphrase.
230     *
231     * @param recognitionFlags The flags to control the recognition properties.
232     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
233     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
234     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
235     * @throws UnsupportedOperationException if the recognition isn't supported.
236     *         Callers should only call this method after a supported state callback on
237     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
238     */
239    public int startRecognition(int recognitionFlags) {
240        synchronized (mLock) {
241            return startRecognitionLocked(recognitionFlags);
242        }
243    }
244
245    private int startRecognitionLocked(int recognitionFlags) {
246        // This method only makes sense if we can start a recognition.
247        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
248            throw new UnsupportedOperationException(
249                    "Recognition for the given keyphrase is not supported");
250        }
251
252        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
253        // TODO: Do we need to do something about the confidence level here?
254        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
255                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
256        boolean captureTriggerAudio =
257                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
258        int code = STATUS_ERROR;
259        try {
260            code = mModelManagementService.startRecognition(mVoiceInteractionService,
261                    mKeyphraseMetadata.id, mInternalCallback,
262                    new RecognitionConfig(
263                            captureTriggerAudio, recognitionExtra, null /* additional data */));
264        } catch (RemoteException e) {
265            Slog.w(TAG, "RemoteException in startRecognition!");
266        }
267        if (code != STATUS_OK) {
268            Slog.w(TAG, "startRecognition() failed with error code " + code);
269        }
270        return code;
271    }
272
273    /**
274     * Stops recognition for the associated keyphrase.
275     *
276     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
277     * @throws UnsupportedOperationException if the recognition isn't supported.
278     *         Callers should only call this method after a supported state callback on
279     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
280     */
281    public int stopRecognition() {
282        synchronized (mLock) {
283            return stopRecognitionLocked();
284        }
285    }
286
287    private int stopRecognitionLocked() {
288        // This method only makes sense if we can start a recognition.
289        if (mAvailability != STATE_KEYPHRASE_ENROLLED) {
290            throw new UnsupportedOperationException(
291                    "Recognition for the given keyphrase is not supported");
292        }
293
294        int code = STATUS_ERROR;
295        try {
296            code = mModelManagementService.stopRecognition(
297                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
298        } catch (RemoteException e) {
299            Slog.w(TAG, "RemoteException in stopRecognition!");
300        }
301
302        if (code != STATUS_OK) {
303            Slog.w(TAG, "stopRecognition() failed with error code " + code);
304        }
305        return code;
306    }
307
308    /**
309     * Gets an intent to manage the associated keyphrase.
310     *
311     * @param action The manage action that needs to be performed.
312     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
313     *        {@link #MANAGE_ACTION_UN_ENROLL}.
314     * @return An {@link Intent} to manage the given keyphrase.
315     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
316     *         Callers should only call this method after a supported state callback on
317     *         {@link Callback#onAvailabilityChanged(int)} to avoid this exception.
318     */
319    public Intent getManageIntent(int action) {
320        // This method only makes sense if we can actually support a recognition.
321        if (mAvailability != STATE_KEYPHRASE_ENROLLED
322                && mAvailability != STATE_KEYPHRASE_UNENROLLED) {
323            throw new UnsupportedOperationException(
324                    "Managing the given keyphrase is not supported");
325        }
326        if (action != MANAGE_ACTION_ENROLL
327                && action != MANAGE_ACTION_RE_ENROLL
328                && action != MANAGE_ACTION_UN_ENROLL) {
329            throw new IllegalArgumentException("Invalid action specified " + action);
330        }
331
332        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
333    }
334
335    /**
336     * Invalidates this hotword detector so that any future calls to this result
337     * in an IllegalStateException.
338     *
339     * @hide
340     */
341    void invalidate() {
342        synchronized (mLock) {
343            mAvailability = STATE_INVALID;
344            notifyStateChangedLocked();
345        }
346    }
347
348    /**
349     * Reloads the sound models from the service.
350     *
351     * @hide
352     */
353    void onSoundModelsChanged() {
354        synchronized (mLock) {
355            // TODO: This should stop the recognition if it was using an enrolled sound model
356            // that's no longer available.
357            if (mAvailability == STATE_INVALID
358                    || mAvailability == STATE_HARDWARE_UNAVAILABLE
359                    || mAvailability == STATE_KEYPHRASE_UNSUPPORTED) {
360                Slog.w(TAG, "Received onSoundModelsChanged for an unsupported keyphrase/config");
361                return;
362            }
363
364            // Execute a refresh availability task - which should then notify of a change.
365            new RefreshAvailabiltyTask().execute();
366        }
367    }
368
369    private void notifyStateChangedLocked() {
370        Message message = Message.obtain(mHandler, MSG_STATE_CHANGED);
371        message.arg1 = mAvailability;
372        message.sendToTarget();
373    }
374
375    /** @hide */
376    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
377        private final Handler mHandler;
378
379        public SoundTriggerListener(Handler handler) {
380            mHandler = handler;
381        }
382
383        @Override
384        public void onDetected(RecognitionEvent recognitionEvent) {
385            Slog.i(TAG, "onDetected");
386            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
387            message.obj = recognitionEvent.data;
388            message.sendToTarget();
389        }
390
391        @Override
392        public void onDetectionStopped() {
393            Slog.i(TAG, "onDetectionStopped");
394            mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
395        }
396    }
397
398    class MyHandler extends Handler {
399        @Override
400        public void handleMessage(Message msg) {
401            switch (msg.what) {
402                case MSG_STATE_CHANGED:
403                    mExternalCallback.onAvailabilityChanged(msg.arg1);
404                    break;
405                case MSG_HOTWORD_DETECTED:
406                    mExternalCallback.onDetected((byte[]) msg.obj);
407                    break;
408                case MSG_DETECTION_STOPPED:
409                    mExternalCallback.onDetectionStopped();
410                default:
411                    super.handleMessage(msg);
412            }
413        }
414    }
415
416    class RefreshAvailabiltyTask extends AsyncTask<Void, Void, Void> {
417
418        @Override
419        public Void doInBackground(Void... params) {
420            int availability = internalGetInitialAvailability();
421            boolean enrolled = false;
422            // Fetch the sound model if the availability is one of the supported ones.
423            if (availability == STATE_NOT_READY
424                    || availability == STATE_KEYPHRASE_UNENROLLED
425                    || availability == STATE_KEYPHRASE_ENROLLED) {
426                enrolled = internalGetIsEnrolled(mKeyphraseMetadata.id);
427                if (!enrolled) {
428                    availability = STATE_KEYPHRASE_UNENROLLED;
429                } else {
430                    availability = STATE_KEYPHRASE_ENROLLED;
431                }
432            }
433
434            synchronized (mLock) {
435                if (DBG) {
436                    Slog.d(TAG, "Hotword availability changed from " + mAvailability
437                            + " -> " + availability);
438                }
439                mIsEnrolledForDetection = enrolled;
440                mAvailability = availability;
441                notifyStateChangedLocked();
442            }
443            return null;
444        }
445
446        /**
447         * @return The initial availability without checking the enrollment status.
448         */
449        private int internalGetInitialAvailability() {
450            synchronized (mLock) {
451                // This detector has already been invalidated.
452                if (mAvailability == STATE_INVALID) {
453                    return STATE_INVALID;
454                }
455            }
456
457            ModuleProperties dspModuleProperties = null;
458            try {
459                dspModuleProperties =
460                        mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
461            } catch (RemoteException e) {
462                Slog.w(TAG, "RemoteException in getDspProperties!");
463            }
464            // No DSP available
465            if (dspModuleProperties == null) {
466                return STATE_HARDWARE_UNAVAILABLE;
467            }
468            // No enrollment application supports this keyphrase/locale
469            if (mKeyphraseMetadata == null) {
470                return STATE_KEYPHRASE_UNSUPPORTED;
471            }
472            return STATE_NOT_READY;
473        }
474
475        /**
476         * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
477         */
478        private boolean internalGetIsEnrolled(int keyphraseId) {
479            try {
480                return mModelManagementService.isEnrolledForKeyphrase(
481                        mVoiceInteractionService, keyphraseId);
482            } catch (RemoteException e) {
483                Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
484            }
485            return false;
486        }
487    }
488}
489