AlwaysOnHotwordDetector.java revision 055897208d659e9734a82def88be4a806ff55448
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.os.RemoteException;
31import android.util.Slog;
32
33import com.android.internal.app.IVoiceInteractionManagerService;
34
35import java.util.List;
36
37/**
38 * A class that lets a VoiceInteractionService implementation interact with
39 * always-on keyphrase detection APIs.
40 */
41public class AlwaysOnHotwordDetector {
42    //---- States of Keyphrase availability ----//
43    /**
44     * Indicates that the given keyphrase is not available on the system because of the
45     * hardware configuration.
46     */
47    public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2;
48    /**
49     * Indicates that the given keyphrase is not supported.
50     */
51    public static final int KEYPHRASE_UNSUPPORTED = -1;
52    /**
53     * Indicates that the given keyphrase is not enrolled.
54     */
55    public static final int KEYPHRASE_UNENROLLED = 1;
56    /**
57     * Indicates that the given keyphrase is currently enrolled but not being actively listened for.
58     */
59    public static final int KEYPHRASE_ENROLLED = 2;
60
61    // Keyphrase management actions ----//
62    /** Indicates that we need to enroll. */
63    public static final int MANAGE_ACTION_ENROLL = 0;
64    /** Indicates that we need to re-enroll. */
65    public static final int MANAGE_ACTION_RE_ENROLL = 1;
66    /** Indicates that we need to un-enroll. */
67    public static final int MANAGE_ACTION_UN_ENROLL = 2;
68
69    /**
70     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
71     */
72    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
73    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
74
75    //---- Keyphrase recognition status ----//
76    /** Indicates that recognition is not available. */
77    public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 0x01;
78    /** Indicates that recognition has not been requested. */
79    public static final int RECOGNITION_STATUS_NOT_REQUESTED = 0x02;
80    /** Indicates that recognition has been requested. */
81    public static final int RECOGNITION_STATUS_REQUESTED = 0x04;
82    /** Indicates that recognition has been temporarily disabled. */
83    public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 0x08;
84    /** Indicates that recognition is currently active . */
85    public static final int RECOGNITION_STATUS_ACTIVE = 0x10;
86
87    //-- Flags for startRecogntion    ----//
88    /** Empty flag for {@link #startRecognition(int)}. */
89    public static final int RECOGNITION_FLAG_NONE = 0;
90    /**
91     * Recognition flag for {@link #startRecognition(int)} that indicates
92     * whether the trigger audio for hotword needs to be captured.
93     */
94    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
95
96    //---- Recognition mode flags ----//
97    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
98
99    /** Simple recognition of the key phrase. Returned by {@link #getRecognitionStatus()} */
100    public static final int RECOGNITION_MODE_VOICE_TRIGGER
101            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
102    /** Trigger only if one user is identified. Returned by {@link #getRecognitionStatus()} */
103    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
104            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
105
106    static final String TAG = "AlwaysOnHotwordDetector";
107
108    private final String mText;
109    private final String mLocale;
110    /**
111     * The metadata of the Keyphrase, derived from the enrollment application.
112     * This may be null if this keyphrase isn't supported by the enrollment application.
113     */
114    private final KeyphraseMetadata mKeyphraseMetadata;
115    /**
116     * The sound model for the keyphrase, derived from the model management service
117     * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet.
118     */
119    private final KeyphraseSoundModel mEnrolledSoundModel;
120    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
121    private final int mAvailability;
122    private final IVoiceInteractionService mVoiceInteractionService;
123    private final IVoiceInteractionManagerService mModelManagementService;
124    private final SoundTriggerListener mInternalCallback;
125
126    private int mRecognitionState;
127
128    /**
129     * Callbacks for always-on hotword detection.
130     */
131    public interface Callback {
132        /**
133         * Called when the keyphrase is spoken.
134         *
135         * @param data Optional trigger audio data, if it was requested during
136         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
137         */
138        void onDetected(byte[] data);
139        /**
140         * Called when the detection for the associated keyphrase starts.
141         */
142        void onDetectionStarted();
143        /**
144         * Called when the detection for the associated keyphrase stops.
145         */
146        void onDetectionStopped();
147    }
148
149    /**
150     * @param text The keyphrase text to get the detector for.
151     * @param locale The java locale for the detector.
152     * @param callback A non-null Callback for receiving the recognition events.
153     * @param voiceInteractionService The current voice interaction service.
154     * @param modelManagementService A service that allows management of sound models.
155     *
156     * @hide
157     */
158    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
159            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
160            IVoiceInteractionService voiceInteractionService,
161            IVoiceInteractionManagerService modelManagementService) {
162        mText = text;
163        mLocale = locale;
164        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
165        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
166        mInternalCallback = new SoundTriggerListener(callback);
167        mVoiceInteractionService = voiceInteractionService;
168        mModelManagementService = modelManagementService;
169        if (mKeyphraseMetadata != null) {
170            mEnrolledSoundModel = internalGetKeyphraseSoundModel(mKeyphraseMetadata.id);
171        } else {
172            mEnrolledSoundModel = null;
173        }
174        mAvailability = internalGetAvailability();
175    }
176
177    /**
178     * Gets the state of always-on hotword detection for the given keyphrase and locale
179     * on this system.
180     * Availability implies that the hardware on this system is capable of listening for
181     * the given keyphrase or not.
182     *
183     * @return Indicates if always-on hotword detection is available for the given keyphrase.
184     *         The return code is one of {@link #KEYPHRASE_HARDWARE_UNAVAILABLE},
185     *         {@link #KEYPHRASE_UNSUPPORTED}, {@link #KEYPHRASE_UNENROLLED} or
186     *         {@link #KEYPHRASE_ENROLLED}.
187     */
188    public int getAvailability() {
189        return mAvailability;
190    }
191
192    /**
193     * Gets the recognition modes supported by the associated keyphrase.
194     *
195     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
196     *         Callers should check the availability by calling {@link #getAvailability()}
197     *         before calling this method to avoid this exception.
198     */
199    public int getSupportedRecognitionModes() {
200        if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE
201                || mAvailability == KEYPHRASE_UNSUPPORTED) {
202            throw new UnsupportedOperationException(
203                    "Getting supported recognition modes for the keyphrase is not supported");
204        }
205
206        return mKeyphraseMetadata.recognitionModeFlags;
207    }
208
209    /**
210     * Gets the status of the recognition.
211     * @return A flag comprised of {@link #RECOGNITION_STATUS_NOT_AVAILABLE},
212     *         {@link #RECOGNITION_STATUS_NOT_REQUESTED}, {@link #RECOGNITION_STATUS_REQUESTED},
213     *         {@link #RECOGNITION_STATUS_DISABLED_TEMPORARILY} and
214     *         {@link #RECOGNITION_STATUS_ACTIVE}.
215     */
216    public int getRecognitionStatus() {
217        return mRecognitionState;
218    }
219
220    /**
221     * Starts recognition for the associated keyphrase.
222     *
223     * @param recognitionFlags The flags to control the recognition properties.
224     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
225     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
226     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
227     * @throws UnsupportedOperationException if the recognition isn't supported.
228     *         Callers should check the availability by calling {@link #getAvailability()}
229     *         before calling this method to avoid this exception.
230     */
231    public int startRecognition(int recognitionFlags) {
232        if (mAvailability != KEYPHRASE_ENROLLED
233                || (mRecognitionState&RECOGNITION_STATUS_NOT_AVAILABLE) != 0) {
234            throw new UnsupportedOperationException(
235                    "Recognition for the given keyphrase is not supported");
236        }
237
238        mRecognitionState &= RECOGNITION_STATUS_REQUESTED;
239        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
240        // TODO: Do we need to do something about the confidence level here?
241        // TODO: Take in captureTriggerAudio as a method param here.
242        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
243                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
244        boolean captureTriggerAudio =
245                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
246        int code = STATUS_ERROR;
247        try {
248            code = mModelManagementService.startRecognition(mVoiceInteractionService,
249                    mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback,
250                    new RecognitionConfig(
251                            captureTriggerAudio, recognitionExtra, null /* additional data */));
252        } catch (RemoteException e) {
253            Slog.w(TAG, "RemoteException in startRecognition!");
254        }
255        if (code != STATUS_OK) {
256            Slog.w(TAG, "startRecognition() failed with error code " + code);
257        }
258        return code;
259    }
260
261    /**
262     * Stops recognition for the associated keyphrase.
263     *
264     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
265     * @throws UnsupportedOperationException if the recognition isn't supported.
266     *         Callers should check the availability by calling {@link #getAvailability()}
267     *         before calling this method to avoid this exception.
268     */
269    public int stopRecognition() {
270        if (mAvailability != KEYPHRASE_ENROLLED) {
271            throw new UnsupportedOperationException(
272                    "Recognition for the given keyphrase is not supported");
273        }
274
275        mRecognitionState &= ~RECOGNITION_STATUS_NOT_REQUESTED;
276        int code = STATUS_ERROR;
277        try {
278            code = mModelManagementService.stopRecognition(
279                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
280        } catch (RemoteException e) {
281            Slog.w(TAG, "RemoteException in stopRecognition!");
282        }
283
284        if (code != STATUS_OK) {
285            Slog.w(TAG, "stopRecognition() failed with error code " + code);
286        }
287        return code;
288    }
289
290    /**
291     * Gets an intent to manage the associated keyphrase.
292     *
293     * @param action The manage action that needs to be performed.
294     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
295     *        {@link #MANAGE_ACTION_UN_ENROLL}.
296     * @return An {@link Intent} to manage the given keyphrase.
297     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
298     *         Callers should check the availability by calling {@link #getAvailability()}
299     *         before calling this method to avoid this exception.
300     */
301    public Intent getManageIntent(int action) {
302        if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE
303                || mAvailability == KEYPHRASE_UNSUPPORTED) {
304            throw new UnsupportedOperationException(
305                    "Managing the given keyphrase is not supported");
306        }
307        if (action != MANAGE_ACTION_ENROLL
308                && action != MANAGE_ACTION_RE_ENROLL
309                && action != MANAGE_ACTION_UN_ENROLL) {
310            throw new IllegalArgumentException("Invalid action specified " + action);
311        }
312
313        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
314    }
315
316    private int internalGetAvailability() {
317        ModuleProperties dspModuleProperties = null;
318        try {
319            dspModuleProperties =
320                    mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
321        } catch (RemoteException e) {
322            Slog.w(TAG, "RemoteException in getDspProperties!");
323        }
324        // No DSP available
325        if (dspModuleProperties == null) {
326            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
327            return KEYPHRASE_HARDWARE_UNAVAILABLE;
328        }
329        // No enrollment application supports this keyphrase/locale
330        if (mKeyphraseMetadata == null) {
331            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
332            return KEYPHRASE_UNSUPPORTED;
333        }
334        // This keyphrase hasn't been enrolled.
335        if (mEnrolledSoundModel == null) {
336            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
337            return KEYPHRASE_UNENROLLED;
338        }
339        // Mark recognition as available
340        mRecognitionState &= ~RECOGNITION_STATUS_NOT_AVAILABLE;
341        return KEYPHRASE_ENROLLED;
342    }
343
344    /**
345     * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
346     */
347    private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) {
348        List<KeyphraseSoundModel> soundModels;
349        try {
350            soundModels = mModelManagementService
351                    .listRegisteredKeyphraseSoundModels(mVoiceInteractionService);
352            if (soundModels == null || soundModels.isEmpty()) {
353                Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId);
354                return null;
355            }
356            for (KeyphraseSoundModel soundModel : soundModels) {
357                if (soundModel.keyphrases == null) {
358                    continue;
359                }
360                for (Keyphrase keyphrase : soundModel.keyphrases) {
361                    // TODO: Check the user handle here to only load a model for the current user.
362                    if (keyphrase.id == keyphraseId) {
363                        return soundModel;
364                    }
365                }
366            }
367        } catch (RemoteException e) {
368            Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
369        }
370        return null;
371    }
372
373    /** @hide */
374    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
375        private final Callback mCallback;
376
377        public SoundTriggerListener(Callback callback) {
378            this.mCallback = callback;
379        }
380
381        @Override
382        public void onDetected(byte[] data) {
383            Slog.i(TAG, "onKeyphraseSpoken");
384            mCallback.onDetected(data);
385        }
386
387        @Override
388        public void onDetectionStarted() {
389            // TODO: Set the RECOGNITION_STATUS_ACTIVE flag here.
390            mCallback.onDetectionStarted();
391        }
392
393        @Override
394        public void onDetectionStopped() {
395            // TODO: Unset the RECOGNITION_STATUS_ACTIVE flag here.
396            mCallback.onDetectionStopped();
397        }
398    }
399}
400