1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.media.soundtrigger;
18import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK;
19
20import android.annotation.IntDef;
21import android.annotation.NonNull;
22import android.annotation.Nullable;
23import android.annotation.RequiresPermission;
24import android.annotation.SystemApi;
25import android.hardware.soundtrigger.IRecognitionStatusCallback;
26import android.hardware.soundtrigger.SoundTrigger;
27import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig;
28import android.media.AudioFormat;
29import android.os.Handler;
30import android.os.Looper;
31import android.os.Message;
32import android.os.ParcelUuid;
33import android.os.RemoteException;
34import android.util.Slog;
35
36import com.android.internal.app.ISoundTriggerService;
37
38import java.io.PrintWriter;
39import java.lang.annotation.Retention;
40import java.lang.annotation.RetentionPolicy;
41import java.util.UUID;
42
43/**
44 * A class that allows interaction with the actual sound trigger detection on the system.
45 * Sound trigger detection refers to a detectors that match generic sound patterns that are
46 * not voice-based. The voice-based recognition models should utilize the {@link
47 * VoiceInteractionService} instead. Access to this class is protected by a permission
48 * granted only to system or privileged apps.
49 *
50 * @hide
51 */
52@SystemApi
53public final class SoundTriggerDetector {
54    private static final boolean DBG = false;
55    private static final String TAG = "SoundTriggerDetector";
56
57    private static final int MSG_AVAILABILITY_CHANGED = 1;
58    private static final int MSG_SOUND_TRIGGER_DETECTED = 2;
59    private static final int MSG_DETECTION_ERROR = 3;
60    private static final int MSG_DETECTION_PAUSE = 4;
61    private static final int MSG_DETECTION_RESUME = 5;
62
63    private final Object mLock = new Object();
64
65    private final ISoundTriggerService mSoundTriggerService;
66    private final UUID mSoundModelId;
67    private final Callback mCallback;
68    private final Handler mHandler;
69    private final RecognitionCallback mRecognitionCallback;
70
71    /** @hide */
72    @Retention(RetentionPolicy.SOURCE)
73    @IntDef(flag = true,
74            value = {
75                RECOGNITION_FLAG_NONE,
76                RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO,
77                RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS
78            })
79    public @interface RecognitionFlags {}
80
81    /**
82     * Empty flag for {@link #startRecognition(int)}.
83     *
84     *  @hide
85     */
86    public static final int RECOGNITION_FLAG_NONE = 0;
87
88    /**
89     * Recognition flag for {@link #startRecognition(int)} that indicates
90     * whether the trigger audio for hotword needs to be captured.
91     */
92    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;
93
94    /**
95     * Recognition flag for {@link #startRecognition(int)} that indicates
96     * whether the recognition should keep going on even after the
97     * model triggers.
98     * If this flag is specified, it's possible to get multiple
99     * triggers after a call to {@link #startRecognition(int)}, if the model
100     * triggers multiple times.
101     * When this isn't specified, the default behavior is to stop recognition once the
102     * trigger happenss, till the caller starts recognition again.
103     */
104    public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2;
105
106    /**
107     * Additional payload for {@link Callback#onDetected}.
108     */
109    public static class EventPayload {
110        private final boolean mTriggerAvailable;
111
112        // Indicates if {@code captureSession} can be used to continue capturing more audio
113        // from the DSP hardware.
114        private final boolean mCaptureAvailable;
115        // The session to use when attempting to capture more audio from the DSP hardware.
116        private final int mCaptureSession;
117        private final AudioFormat mAudioFormat;
118        // Raw data associated with the event.
119        // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true.
120        private final byte[] mData;
121
122        private EventPayload(boolean triggerAvailable, boolean captureAvailable,
123                AudioFormat audioFormat, int captureSession, byte[] data) {
124            mTriggerAvailable = triggerAvailable;
125            mCaptureAvailable = captureAvailable;
126            mCaptureSession = captureSession;
127            mAudioFormat = audioFormat;
128            mData = data;
129        }
130
131        /**
132         * Gets the format of the audio obtained using {@link #getTriggerAudio()}.
133         * May be null if there's no audio present.
134         */
135        @Nullable
136        public AudioFormat getCaptureAudioFormat() {
137            return mAudioFormat;
138        }
139
140        /**
141         * Gets the raw audio that triggered the detector.
142         * This may be null if the trigger audio isn't available.
143         * If non-null, the format of the audio can be obtained by calling
144         * {@link #getCaptureAudioFormat()}.
145         *
146         * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO
147         */
148        @Nullable
149        public byte[] getTriggerAudio() {
150            if (mTriggerAvailable) {
151                return mData;
152            } else {
153                return null;
154            }
155        }
156
157        /**
158         * Gets the opaque data passed from the detection engine for the event.
159         * This may be null if it was not populated by the engine, or if the data is known to
160         * contain the trigger audio.
161         *
162         * @see #getTriggerAudio
163         *
164         * @hide
165         */
166        @Nullable
167        public byte[] getData() {
168            if (!mTriggerAvailable) {
169                return mData;
170            } else {
171                return null;
172            }
173        }
174
175        /**
176         * Gets the session ID to start a capture from the DSP.
177         * This may be null if streaming capture isn't possible.
178         * If non-null, the format of the audio that can be captured can be
179         * obtained using {@link #getCaptureAudioFormat()}.
180         *
181         * TODO: Candidate for Public API when the API to start capture with a session ID
182         * is made public.
183         *
184         * TODO: Add this to {@link #getCaptureAudioFormat()}:
185         * "Gets the format of the audio obtained using {@link #getTriggerAudio()}
186         * or {@link #getCaptureSession()}. May be null if no audio can be obtained
187         * for either the trigger or a streaming session."
188         *
189         * TODO: Should this return a known invalid value instead?
190         *
191         * @hide
192         */
193        @Nullable
194        public Integer getCaptureSession() {
195            if (mCaptureAvailable) {
196                return mCaptureSession;
197            } else {
198                return null;
199            }
200        }
201    }
202
203    public static abstract class Callback {
204        /**
205         * Called when the availability of the sound model changes.
206         */
207        public abstract void onAvailabilityChanged(int status);
208
209        /**
210         * Called when the sound model has triggered (such as when it matched a
211         * given sound pattern).
212         */
213        public abstract void onDetected(@NonNull EventPayload eventPayload);
214
215        /**
216         *  Called when the detection fails due to an error.
217         */
218        public abstract void onError();
219
220        /**
221         * Called when the recognition is paused temporarily for some reason.
222         * This is an informational callback, and the clients shouldn't be doing anything here
223         * except showing an indication on their UI if they have to.
224         */
225        public abstract void onRecognitionPaused();
226
227        /**
228         * Called when the recognition is resumed after it was temporarily paused.
229         * This is an informational callback, and the clients shouldn't be doing anything here
230         * except showing an indication on their UI if they have to.
231         */
232        public abstract void onRecognitionResumed();
233    }
234
235    /**
236     * This class should be constructed by the {@link SoundTriggerManager}.
237     * @hide
238     */
239    SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId,
240            @NonNull Callback callback, @Nullable Handler handler) {
241        mSoundTriggerService = soundTriggerService;
242        mSoundModelId = soundModelId;
243        mCallback = callback;
244        if (handler == null) {
245            mHandler = new MyHandler();
246        } else {
247            mHandler = new MyHandler(handler.getLooper());
248        }
249        mRecognitionCallback = new RecognitionCallback();
250    }
251
252    /**
253     * Starts recognition on the associated sound model. Result is indicated via the
254     * {@link Callback}.
255     * @return Indicates whether the call succeeded or not.
256     */
257    @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
258    public boolean startRecognition(@RecognitionFlags int recognitionFlags) {
259        if (DBG) {
260            Slog.d(TAG, "startRecognition()");
261        }
262        boolean captureTriggerAudio =
263                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
264
265        boolean allowMultipleTriggers =
266                (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0;
267        int status = STATUS_OK;
268        try {
269            status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId),
270                    mRecognitionCallback, new RecognitionConfig(captureTriggerAudio,
271                        allowMultipleTriggers, null, null));
272        } catch (RemoteException e) {
273            return false;
274        }
275        return status == STATUS_OK;
276    }
277
278    /**
279     * Stops recognition for the associated model.
280     */
281    @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER)
282    public boolean stopRecognition() {
283        int status = STATUS_OK;
284        try {
285            status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId),
286                    mRecognitionCallback);
287        } catch (RemoteException e) {
288            return false;
289        }
290        return status == STATUS_OK;
291    }
292
293    /**
294     * @hide
295     */
296    public void dump(String prefix, PrintWriter pw) {
297        synchronized (mLock) {
298            // TODO: Dump useful debug information.
299        }
300    }
301
302    /**
303     * Callback that handles events from the lower sound trigger layer.
304     *
305     * Note that these callbacks will be called synchronously from the SoundTriggerService
306     * layer and thus should do minimal work (such as sending a message on a handler to do
307     * the real work).
308     * @hide
309     */
310    private class RecognitionCallback extends IRecognitionStatusCallback.Stub {
311
312        /**
313         * @hide
314         */
315        @Override
316        public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) {
317            Slog.d(TAG, "onGenericSoundTriggerDetected()" + event);
318            Message.obtain(mHandler,
319                    MSG_SOUND_TRIGGER_DETECTED,
320                    new EventPayload(event.triggerInData, event.captureAvailable,
321                            event.captureFormat, event.captureSession, event.data))
322                    .sendToTarget();
323        }
324
325        @Override
326        public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) {
327            Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event);
328        }
329
330        /**
331         * @hide
332         */
333        @Override
334        public void onError(int status) {
335            Slog.d(TAG, "onError()" + status);
336            mHandler.sendEmptyMessage(MSG_DETECTION_ERROR);
337        }
338
339        /**
340         * @hide
341         */
342        @Override
343        public void onRecognitionPaused() {
344            Slog.d(TAG, "onRecognitionPaused()");
345            mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE);
346        }
347
348        /**
349         * @hide
350         */
351        @Override
352        public void onRecognitionResumed() {
353            Slog.d(TAG, "onRecognitionResumed()");
354            mHandler.sendEmptyMessage(MSG_DETECTION_RESUME);
355        }
356    }
357
358    private class MyHandler extends Handler {
359
360        MyHandler() {
361            super();
362        }
363
364        MyHandler(Looper looper) {
365            super(looper);
366        }
367
368        @Override
369        public void handleMessage(Message msg) {
370            if (mCallback == null) {
371                  Slog.w(TAG, "Received message: " + msg.what + " for NULL callback.");
372                  return;
373            }
374            switch (msg.what) {
375                case MSG_SOUND_TRIGGER_DETECTED:
376                    mCallback.onDetected((EventPayload) msg.obj);
377                    break;
378                case MSG_DETECTION_ERROR:
379                    mCallback.onError();
380                    break;
381                case MSG_DETECTION_PAUSE:
382                    mCallback.onRecognitionPaused();
383                    break;
384                case MSG_DETECTION_RESUME:
385                    mCallback.onRecognitionResumed();
386                    break;
387                default:
388                    super.handleMessage(msg);
389
390            }
391        }
392    }
393}
394