AlwaysOnHotwordDetector.java revision e6cd2476aa9d07df0de0a0081ab66d8401a7e228
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.Keyphrase;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.KeyphraseSoundModel;
24import android.hardware.soundtrigger.SoundTrigger;
25import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTriggerHelper;
28import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
29import android.os.RemoteException;
30import android.util.Slog;
31
32import com.android.internal.app.IVoiceInteractionManagerService;
33
34import java.util.List;
35
36/**
37 * A class that lets a VoiceInteractionService implementation interact with
38 * always-on keyphrase detection APIs.
39 */
40public class AlwaysOnHotwordDetector {
41    //---- States of Keyphrase availability ----//
42    /**
43     * Indicates that the given keyphrase is not available on the system because of the
44     * hardware configuration.
45     */
46    public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2;
47    /**
48     * Indicates that the given keyphrase is not supported.
49     */
50    public static final int KEYPHRASE_UNSUPPORTED = -1;
51    /**
52     * Indicates that the given keyphrase is not enrolled.
53     */
54    public static final int KEYPHRASE_UNENROLLED = 1;
55    /**
56     * Indicates that the given keyphrase is currently enrolled but not being actively listened for.
57     */
58    public static final int KEYPHRASE_ENROLLED = 2;
59
60    // Keyphrase management actions ----//
61    /** Indicates that we need to enroll. */
62    public static final int MANAGE_ACTION_ENROLL = 0;
63    /** Indicates that we need to re-enroll. */
64    public static final int MANAGE_ACTION_RE_ENROLL = 1;
65    /** Indicates that we need to un-enroll. */
66    public static final int MANAGE_ACTION_UN_ENROLL = 2;
67
68    /**
69     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
70     */
71    public static final int STATUS_ERROR = Integer.MIN_VALUE;
72    public static final int STATUS_OK = 1;
73
74    //---- Keyphrase recognition status ----//
75    /** Indicates that recognition is not available. */
76    public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 0x01;
77    /** Indicates that recognition has not been requested. */
78    public static final int RECOGNITION_STATUS_NOT_REQUESTED = 0x02;
79    /** Indicates that recognition has been requested. */
80    public static final int RECOGNITION_STATUS_REQUESTED = 0x04;
81    /** Indicates that recognition has been temporarily disabled. */
82    public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 0x08;
83    /** Indicates that recognition is currently active . */
84    public static final int RECOGNITION_STATUS_ACTIVE = 0x10;
85
86    //-- Flags for startRecogntion    ----//
87    /** Empty flag for {@link #startRecognition(int)}. */
88    public static final int RECOGNITION_FLAG_NONE = 0;
89    /**
90     * Recognition flag for {@link #startRecognition(int)} that indicates
91     * whether the trigger audio for hotword needs to be captured.
92     */
93    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
94
95    //---- Recognition mode flags ----//
96    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
97
98    /** Simple recognition of the key phrase. Returned by {@link #getRecognitionStatus()} */
99    public static final int RECOGNITION_MODE_VOICE_TRIGGER
100            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
101    /** Trigger only if one user is identified. Returned by {@link #getRecognitionStatus()} */
102    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
103            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
104
105    static final String TAG = "AlwaysOnHotwordDetector";
106
107    private final String mText;
108    private final String mLocale;
109    /**
110     * The metadata of the Keyphrase, derived from the enrollment application.
111     * This may be null if this keyphrase isn't supported by the enrollment application.
112     */
113    private final KeyphraseMetadata mKeyphraseMetadata;
114    /**
115     * The sound model for the keyphrase, derived from the model management service
116     * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet.
117     */
118    private final KeyphraseSoundModel mEnrolledSoundModel;
119    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
120    private final SoundTriggerHelper mSoundTriggerHelper;
121    private final SoundTriggerHelper.Listener mListener;
122    private final int mAvailability;
123    private final IVoiceInteractionService mVoiceInteractionService;
124    private final IVoiceInteractionManagerService mModelManagementService;
125
126    private int mRecognitionState;
127
128    /**
129     * Callbacks for always-on hotword detection.
130     */
131    public interface Callback {
132        /**
133         * Called when the keyphrase is spoken.
134         *
135         * @param data Optional trigger audio data, if it was requested during
136         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
137         */
138        void onDetected(byte[] data);
139        /**
140         * Called when the detection for the associated keyphrase starts.
141         */
142        void onDetectionStarted();
143        /**
144         * Called when the detection for the associated keyphrase stops.
145         */
146        void onDetectionStopped();
147    }
148
149    /**
150     * @param text The keyphrase text to get the detector for.
151     * @param locale The java locale for the detector.
152     * @param callback A non-null Callback for receiving the recognition events.
153     * @param voiceInteractionService The current voice interaction service.
154     * @param modelManagementService A service that allows management of sound models.
155     *
156     * @hide
157     */
158    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
159            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
160            SoundTriggerHelper soundTriggerHelper,
161            IVoiceInteractionService voiceInteractionService,
162            IVoiceInteractionManagerService modelManagementService) {
163        mText = text;
164        mLocale = locale;
165        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
166        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
167        mListener = new SoundTriggerListener(callback);
168        mSoundTriggerHelper = soundTriggerHelper;
169        mVoiceInteractionService = voiceInteractionService;
170        mModelManagementService = modelManagementService;
171        if (mKeyphraseMetadata != null) {
172            mEnrolledSoundModel = internalGetKeyphraseSoundModel(mKeyphraseMetadata.id);
173        } else {
174            mEnrolledSoundModel = null;
175        }
176        mAvailability = internalGetAvailability();
177    }
178
179    /**
180     * Gets the state of always-on hotword detection for the given keyphrase and locale
181     * on this system.
182     * Availability implies that the hardware on this system is capable of listening for
183     * the given keyphrase or not.
184     *
185     * @return Indicates if always-on hotword detection is available for the given keyphrase.
186     *         The return code is one of {@link #KEYPHRASE_HARDWARE_UNAVAILABLE},
187     *         {@link #KEYPHRASE_UNSUPPORTED}, {@link #KEYPHRASE_UNENROLLED} or
188     *         {@link #KEYPHRASE_ENROLLED}.
189     */
190    public int getAvailability() {
191        return mAvailability;
192    }
193
194    /**
195     * Gets the recognition modes supported by the associated keyphrase.
196     *
197     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
198     *         Callers should check the availability by calling {@link #getAvailability()}
199     *         before calling this method to avoid this exception.
200     */
201    public int getSupportedRecognitionModes() {
202        if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE
203                || mAvailability == KEYPHRASE_UNSUPPORTED) {
204            throw new UnsupportedOperationException(
205                    "Getting supported recognition modes for the keyphrase is not supported");
206        }
207
208        return mKeyphraseMetadata.recognitionModeFlags;
209    }
210
211    /**
212     * Gets the status of the recognition.
213     * @return A flag comprised of {@link #RECOGNITION_STATUS_NOT_AVAILABLE},
214     *         {@link #RECOGNITION_STATUS_NOT_REQUESTED}, {@link #RECOGNITION_STATUS_REQUESTED},
215     *         {@link #RECOGNITION_STATUS_DISABLED_TEMPORARILY} and
216     *         {@link #RECOGNITION_STATUS_ACTIVE}.
217     */
218    public int getRecognitionStatus() {
219        return mRecognitionState;
220    }
221
222    /**
223     * Starts recognition for the associated keyphrase.
224     *
225     * @param recognitionFlags The flags to control the recognition properties.
226     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
227     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
228     * @return One of {@link #STATUS_ERROR} or {@link #STATUS_OK}.
229     * @throws UnsupportedOperationException if the recognition isn't supported.
230     *         Callers should check the availability by calling {@link #getAvailability()}
231     *         before calling this method to avoid this exception.
232     */
233    public int startRecognition(int recognitionFlags) {
234        if (mAvailability != KEYPHRASE_ENROLLED
235                || (mRecognitionState&RECOGNITION_STATUS_NOT_AVAILABLE) != 0) {
236            throw new UnsupportedOperationException(
237                    "Recognition for the given keyphrase is not supported");
238        }
239
240        mRecognitionState &= RECOGNITION_STATUS_REQUESTED;
241        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
242        // TODO: Do we need to do something about the confidence level here?
243        // TODO: Take in captureTriggerAudio as a method param here.
244        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
245                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
246        boolean captureTriggerAudio =
247                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
248        int code = mSoundTriggerHelper.startRecognition(mKeyphraseMetadata.id,
249                mEnrolledSoundModel.convertToSoundTriggerKeyphraseSoundModel(), mListener,
250                new RecognitionConfig(
251                        captureTriggerAudio, recognitionExtra,null /* additional data */));
252        if (code != SoundTriggerHelper.STATUS_OK) {
253            Slog.w(TAG, "startRecognition() failed with error code " + code);
254            return STATUS_ERROR;
255        } else {
256            return STATUS_OK;
257        }
258    }
259
260    /**
261     * Stops recognition for the associated keyphrase.
262     *
263     * @return One of {@link #STATUS_ERROR} or {@link #STATUS_OK}.
264     * @throws UnsupportedOperationException if the recognition isn't supported.
265     *         Callers should check the availability by calling {@link #getAvailability()}
266     *         before calling this method to avoid this exception.
267     */
268    public int stopRecognition() {
269        if (mAvailability != KEYPHRASE_ENROLLED) {
270            throw new UnsupportedOperationException(
271                    "Recognition for the given keyphrase is not supported");
272        }
273
274        mRecognitionState &= ~RECOGNITION_STATUS_NOT_REQUESTED;
275        int code = mSoundTriggerHelper.stopRecognition(mKeyphraseMetadata.id, mListener);
276
277        if (code != SoundTriggerHelper.STATUS_OK) {
278            Slog.w(TAG, "stopRecognition() failed with error code " + code);
279            return STATUS_ERROR;
280        } else {
281            return STATUS_OK;
282        }
283    }
284
285    /**
286     * Gets an intent to manage the associated keyphrase.
287     *
288     * @param action The manage action that needs to be performed.
289     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
290     *        {@link #MANAGE_ACTION_UN_ENROLL}.
291     * @return An {@link Intent} to manage the given keyphrase.
292     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
293     *         Callers should check the availability by calling {@link #getAvailability()}
294     *         before calling this method to avoid this exception.
295     */
296    public Intent getManageIntent(int action) {
297        if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE
298                || mAvailability == KEYPHRASE_UNSUPPORTED) {
299            throw new UnsupportedOperationException(
300                    "Managing the given keyphrase is not supported");
301        }
302        if (action != MANAGE_ACTION_ENROLL
303                && action != MANAGE_ACTION_RE_ENROLL
304                && action != MANAGE_ACTION_UN_ENROLL) {
305            throw new IllegalArgumentException("Invalid action specified " + action);
306        }
307
308        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
309    }
310
311    private int internalGetAvailability() {
312        // No DSP available
313        if (mSoundTriggerHelper.dspInfo == null) {
314            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
315            return KEYPHRASE_HARDWARE_UNAVAILABLE;
316        }
317        // No enrollment application supports this keyphrase/locale
318        if (mKeyphraseMetadata == null) {
319            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
320            return KEYPHRASE_UNSUPPORTED;
321        }
322        // This keyphrase hasn't been enrolled.
323        if (mEnrolledSoundModel == null) {
324            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
325            return KEYPHRASE_UNENROLLED;
326        }
327        // Mark recognition as available
328        mRecognitionState &= ~RECOGNITION_STATUS_NOT_AVAILABLE;
329        return KEYPHRASE_ENROLLED;
330    }
331
332    /**
333     * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
334     */
335    private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) {
336        List<KeyphraseSoundModel> soundModels;
337        try {
338            soundModels = mModelManagementService
339                    .listRegisteredKeyphraseSoundModels(mVoiceInteractionService);
340            if (soundModels == null || soundModels.isEmpty()) {
341                Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId);
342                return null;
343            }
344            for (KeyphraseSoundModel soundModel : soundModels) {
345                if (soundModel.keyphrases == null) {
346                    continue;
347                }
348                for (Keyphrase keyphrase : soundModel.keyphrases) {
349                    // TODO: Check the user handle here to only load a model for the current user.
350                    if (keyphrase.id == keyphraseId) {
351                        return soundModel;
352                    }
353                }
354            }
355        } catch (RemoteException e) {
356            Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
357        }
358        return null;
359    }
360
361    /** @hide */
362    static final class SoundTriggerListener implements SoundTriggerHelper.Listener {
363        private final Callback mCallback;
364
365        public SoundTriggerListener(Callback callback) {
366            this.mCallback = callback;
367        }
368
369        @Override
370        public void onKeyphraseSpoken(byte[] data) {
371            Slog.i(TAG, "onKeyphraseSpoken");
372            mCallback.onDetected(data);
373        }
374
375        @Override
376        public void onListeningStateChanged(int state) {
377            Slog.i(TAG, "onListeningStateChanged: state=" + state);
378            // TODO: Set/unset the RECOGNITION_STATUS_ACTIVE flag here.
379            if (state == SoundTriggerHelper.STATE_STARTED) {
380                mCallback.onDetectionStarted();
381            } else if (state == SoundTriggerHelper.STATE_STOPPED) {
382                mCallback.onDetectionStopped();
383            }
384        }
385    }
386}
387