AlwaysOnHotwordDetector.java revision 110f569b47bc21fb38ec25b6110ee302ce137e06
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.service.voice;
18
19import android.content.Intent;
20import android.hardware.soundtrigger.IRecognitionStatusCallback;
21import android.hardware.soundtrigger.KeyphraseEnrollmentInfo;
22import android.hardware.soundtrigger.KeyphraseMetadata;
23import android.hardware.soundtrigger.SoundTrigger;
24import android.hardware.soundtrigger.SoundTrigger.ConfidenceLevel;
25import android.hardware.soundtrigger.SoundTrigger.Keyphrase;
26import android.hardware.soundtrigger.SoundTrigger.KeyphraseRecognitionExtra;
27import android.hardware.soundtrigger.SoundTrigger.KeyphraseSoundModel;
28import android.hardware.soundtrigger.SoundTrigger.ModuleProperties;
29import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
30import android.os.Handler;
31import android.os.Message;
32import android.os.RemoteException;
33import android.util.Slog;
34
35import com.android.internal.app.IVoiceInteractionManagerService;
36
37import java.util.List;
38
39/**
40 * A class that lets a VoiceInteractionService implementation interact with
41 * always-on keyphrase detection APIs.
42 */
43public class AlwaysOnHotwordDetector {
44    //---- States of Keyphrase availability ----//
45    /**
46     * Indicates that the given keyphrase is not available on the system because of the
47     * hardware configuration.
48     */
49    public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2;
50    /**
51     * Indicates that the given keyphrase is not supported.
52     */
53    public static final int KEYPHRASE_UNSUPPORTED = -1;
54    /**
55     * Indicates that the given keyphrase is not enrolled.
56     */
57    public static final int KEYPHRASE_UNENROLLED = 1;
58    /**
59     * Indicates that the given keyphrase is currently enrolled but not being actively listened for.
60     */
61    public static final int KEYPHRASE_ENROLLED = 2;
62
63    // Keyphrase management actions ----//
64    /** Indicates that we need to enroll. */
65    public static final int MANAGE_ACTION_ENROLL = 0;
66    /** Indicates that we need to re-enroll. */
67    public static final int MANAGE_ACTION_RE_ENROLL = 1;
68    /** Indicates that we need to un-enroll. */
69    public static final int MANAGE_ACTION_UN_ENROLL = 2;
70
71    /**
72     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
73     */
74    public static final int STATUS_ERROR = SoundTrigger.STATUS_ERROR;
75    public static final int STATUS_OK = SoundTrigger.STATUS_OK;
76
77    //-- Flags for startRecogntion    ----//
78    /** Empty flag for {@link #startRecognition(int)}. */
79    public static final int RECOGNITION_FLAG_NONE = 0;
80    /**
81     * Recognition flag for {@link #startRecognition(int)} that indicates
82     * whether the trigger audio for hotword needs to be captured.
83     */
84    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
85
86    //---- Recognition mode flags ----//
87    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.
88
89    /**
90     * Simple recognition of the key phrase. Returned by {@link #getSupportedRecognitionModes()}
91     */
92    public static final int RECOGNITION_MODE_VOICE_TRIGGER
93            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
94    /**
95     * Trigger only if one user is identified. Returned by {@link #getSupportedRecognitionModes()}
96     */
97    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
98            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;
99
100    static final String TAG = "AlwaysOnHotwordDetector";
101
102    private static final int MSG_HOTWORD_DETECTED = 1;
103    private static final int MSG_DETECTION_STOPPED = 2;
104
105    private final String mText;
106    private final String mLocale;
107    /**
108     * The metadata of the Keyphrase, derived from the enrollment application.
109     * This may be null if this keyphrase isn't supported by the enrollment application.
110     */
111    private final KeyphraseMetadata mKeyphraseMetadata;
112    /**
113     * The sound model for the keyphrase, derived from the model management service
114     * (IVoiceInteractionManagerService). May be null if the keyphrase isn't enrolled yet.
115     */
116    private final KeyphraseSoundModel mEnrolledSoundModel;
117    private final KeyphraseEnrollmentInfo mKeyphraseEnrollmentInfo;
118    private final IVoiceInteractionService mVoiceInteractionService;
119    private final IVoiceInteractionManagerService mModelManagementService;
120    private final SoundTriggerListener mInternalCallback;
121    private final Callback mExternalCallback;
122    private final boolean mDisabled;
123
124    /**
125     * Callbacks for always-on hotword detection.
126     */
127    public interface Callback {
128        /**
129         * Called when the keyphrase is spoken.
130         *
131         * @param data Optional trigger audio data, if it was requested during
132         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
133         */
134        void onDetected(byte[] data);
135        /**
136         * Called when the detection for the associated keyphrase stops.
137         */
138        void onDetectionStopped();
139    }
140
141    /**
142     * @param text The keyphrase text to get the detector for.
143     * @param locale The java locale for the detector.
144     * @param callback A non-null Callback for receiving the recognition events.
145     * @param voiceInteractionService The current voice interaction service.
146     * @param modelManagementService A service that allows management of sound models.
147     *
148     * @hide
149     */
150    public AlwaysOnHotwordDetector(String text, String locale, Callback callback,
151            KeyphraseEnrollmentInfo keyphraseEnrollmentInfo,
152            IVoiceInteractionService voiceInteractionService,
153            IVoiceInteractionManagerService modelManagementService) {
154        mText = text;
155        mLocale = locale;
156        mKeyphraseEnrollmentInfo = keyphraseEnrollmentInfo;
157        mKeyphraseMetadata = mKeyphraseEnrollmentInfo.getKeyphraseMetadata(text, locale);
158        mExternalCallback = callback;
159        mInternalCallback = new SoundTriggerListener(new MyHandler());
160        mVoiceInteractionService = voiceInteractionService;
161        mModelManagementService = modelManagementService;
162        if (mKeyphraseMetadata != null) {
163            mEnrolledSoundModel = internalGetKeyphraseSoundModel(mKeyphraseMetadata.id);
164        } else {
165            mEnrolledSoundModel = null;
166        }
167        int initialAvailability = internalGetAvailabilityLocked();
168        mDisabled = (initialAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE)
169                || (initialAvailability == KEYPHRASE_UNSUPPORTED);
170    }
171
172    /**
173     * Gets the state of always-on hotword detection for the given keyphrase and locale
174     * on this system.
175     * Availability implies that the hardware on this system is capable of listening for
176     * the given keyphrase or not.
177     *
178     * @return Indicates if always-on hotword detection is available for the given keyphrase.
179     *         The return code is one of {@link #KEYPHRASE_HARDWARE_UNAVAILABLE},
180     *         {@link #KEYPHRASE_UNSUPPORTED}, {@link #KEYPHRASE_UNENROLLED} or
181     *         {@link #KEYPHRASE_ENROLLED}.
182     */
183    public int getAvailability() {
184        synchronized (this) {
185            return internalGetAvailabilityLocked();
186        }
187    }
188
189    /**
190     * Gets the recognition modes supported by the associated keyphrase.
191     *
192     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
193     *         Callers should check the availability by calling {@link #getAvailability()}
194     *         before calling this method to avoid this exception.
195     */
196    public int getSupportedRecognitionModes() {
197        synchronized (this) {
198            return getSupportedRecognitionModesLocked();
199        }
200    }
201
202    private int getSupportedRecognitionModesLocked() {
203        if (mDisabled) {
204            throw new UnsupportedOperationException(
205                    "Getting supported recognition modes for the keyphrase is not supported");
206        }
207
208        return mKeyphraseMetadata.recognitionModeFlags;
209    }
210
211    /**
212     * Starts recognition for the associated keyphrase.
213     *
214     * @param recognitionFlags The flags to control the recognition properties.
215     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
216     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
217     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
218     * @throws UnsupportedOperationException if the recognition isn't supported.
219     *         Callers should check the availability by calling {@link #getAvailability()}
220     *         before calling this method to avoid this exception.
221     */
222    public int startRecognition(int recognitionFlags) {
223        synchronized (this) {
224            return startRecognitionLocked(recognitionFlags);
225        }
226    }
227
228    private int startRecognitionLocked(int recognitionFlags) {
229        if (internalGetAvailabilityLocked() != KEYPHRASE_ENROLLED) {
230            throw new UnsupportedOperationException(
231                    "Recognition for the given keyphrase is not supported");
232        }
233
234        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
235        // TODO: Do we need to do something about the confidence level here?
236        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
237                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
238        boolean captureTriggerAudio =
239                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
240        int code = STATUS_ERROR;
241        try {
242            code = mModelManagementService.startRecognition(mVoiceInteractionService,
243                    mKeyphraseMetadata.id, mEnrolledSoundModel, mInternalCallback,
244                    new RecognitionConfig(
245                            captureTriggerAudio, recognitionExtra, null /* additional data */));
246        } catch (RemoteException e) {
247            Slog.w(TAG, "RemoteException in startRecognition!");
248        }
249        if (code != STATUS_OK) {
250            Slog.w(TAG, "startRecognition() failed with error code " + code);
251        }
252        return code;
253    }
254
255    /**
256     * Stops recognition for the associated keyphrase.
257     *
258     * @return {@link #STATUS_OK} if the call succeeds, an error code otherwise.
259     * @throws UnsupportedOperationException if the recognition isn't supported.
260     *         Callers should check the availability by calling {@link #getAvailability()}
261     *         before calling this method to avoid this exception.
262     */
263    public int stopRecognition() {
264        synchronized (this) {
265            return stopRecognitionLocked();
266        }
267    }
268
269    private synchronized int stopRecognitionLocked() {
270        if (internalGetAvailabilityLocked() != KEYPHRASE_ENROLLED) {
271            throw new UnsupportedOperationException(
272                    "Recognition for the given keyphrase is not supported");
273        }
274
275        int code = STATUS_ERROR;
276        try {
277            code = mModelManagementService.stopRecognition(
278                    mVoiceInteractionService, mKeyphraseMetadata.id, mInternalCallback);
279        } catch (RemoteException e) {
280            Slog.w(TAG, "RemoteException in stopRecognition!");
281        }
282
283        if (code != STATUS_OK) {
284            Slog.w(TAG, "stopRecognition() failed with error code " + code);
285        }
286        return code;
287    }
288
289    /**
290     * Gets an intent to manage the associated keyphrase.
291     *
292     * @param action The manage action that needs to be performed.
293     *        One of {@link #MANAGE_ACTION_ENROLL}, {@link #MANAGE_ACTION_RE_ENROLL} or
294     *        {@link #MANAGE_ACTION_UN_ENROLL}.
295     * @return An {@link Intent} to manage the given keyphrase.
296     * @throws UnsupportedOperationException if managing they keyphrase isn't supported.
297     *         Callers should check the availability by calling {@link #getAvailability()}
298     *         before calling this method to avoid this exception.
299     */
300    public Intent getManageIntent(int action) {
301        if (mDisabled) {
302            throw new UnsupportedOperationException(
303                    "Managing the given keyphrase is not supported");
304        }
305        if (action != MANAGE_ACTION_ENROLL
306                && action != MANAGE_ACTION_RE_ENROLL
307                && action != MANAGE_ACTION_UN_ENROLL) {
308            throw new IllegalArgumentException("Invalid action specified " + action);
309        }
310
311        return mKeyphraseEnrollmentInfo.getManageKeyphraseIntent(action, mText, mLocale);
312    }
313
314    private int internalGetAvailabilityLocked() {
315        ModuleProperties dspModuleProperties = null;
316        try {
317            dspModuleProperties =
318                    mModelManagementService.getDspModuleProperties(mVoiceInteractionService);
319        } catch (RemoteException e) {
320            Slog.w(TAG, "RemoteException in getDspProperties!");
321        }
322        // No DSP available
323        if (dspModuleProperties == null) {
324            return KEYPHRASE_HARDWARE_UNAVAILABLE;
325        }
326        // No enrollment application supports this keyphrase/locale
327        if (mKeyphraseMetadata == null) {
328            return KEYPHRASE_UNSUPPORTED;
329        }
330        // This keyphrase hasn't been enrolled.
331        if (mEnrolledSoundModel == null) {
332            return KEYPHRASE_UNENROLLED;
333        }
334        return KEYPHRASE_ENROLLED;
335    }
336
337    /**
338     * @return The corresponding {@link KeyphraseSoundModel} or null if none is found.
339     */
340    private KeyphraseSoundModel internalGetKeyphraseSoundModel(int keyphraseId) {
341        List<KeyphraseSoundModel> soundModels;
342        try {
343            soundModels = mModelManagementService
344                    .listRegisteredKeyphraseSoundModels(mVoiceInteractionService);
345            if (soundModels == null || soundModels.isEmpty()) {
346                Slog.i(TAG, "No available sound models for keyphrase ID: " + keyphraseId);
347                return null;
348            }
349            for (KeyphraseSoundModel soundModel : soundModels) {
350                if (soundModel.keyphrases == null) {
351                    continue;
352                }
353                for (Keyphrase keyphrase : soundModel.keyphrases) {
354                    if (keyphrase.id == keyphraseId) {
355                        return soundModel;
356                    }
357                }
358            }
359        } catch (RemoteException e) {
360            Slog.w(TAG, "RemoteException in listRegisteredKeyphraseSoundModels!");
361        }
362        return null;
363    }
364
365    /** @hide */
366    static final class SoundTriggerListener extends IRecognitionStatusCallback.Stub {
367        private final Handler mHandler;
368
369        public SoundTriggerListener(Handler handler) {
370            mHandler = handler;
371        }
372
373        @Override
374        public void onDetected(byte[] data) {
375            Slog.i(TAG, "onDetected");
376            Message message = Message.obtain(mHandler, MSG_HOTWORD_DETECTED);
377            message.obj = data;
378            message.sendToTarget();
379        }
380
381        @Override
382        public void onDetectionStopped() {
383            Slog.i(TAG, "onDetectionStopped");
384            mHandler.sendEmptyMessage(MSG_DETECTION_STOPPED);
385        }
386    }
387
388    class MyHandler extends Handler {
389        @Override
390        public void handleMessage(Message msg) {
391            switch (msg.what) {
392                case MSG_HOTWORD_DETECTED:
393                    mExternalCallback.onDetected((byte[]) msg.obj);
394                    break;
395                case MSG_DETECTION_STOPPED:
396                    mExternalCallback.onDetectionStopped();
397                default:
398                    super.handleMessage(msg);
399            }
400        }
401    }
402}
403