1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.media.soundtrigger; 18import static android.hardware.soundtrigger.SoundTrigger.STATUS_OK; 19 20import android.annotation.IntDef; 21import android.annotation.NonNull; 22import android.annotation.Nullable; 23import android.annotation.RequiresPermission; 24import android.annotation.SystemApi; 25import android.hardware.soundtrigger.IRecognitionStatusCallback; 26import android.hardware.soundtrigger.SoundTrigger; 27import android.hardware.soundtrigger.SoundTrigger.RecognitionConfig; 28import android.media.AudioFormat; 29import android.os.Handler; 30import android.os.Looper; 31import android.os.Message; 32import android.os.ParcelUuid; 33import android.os.RemoteException; 34import android.util.Slog; 35 36import com.android.internal.app.ISoundTriggerService; 37 38import java.io.PrintWriter; 39import java.lang.annotation.Retention; 40import java.lang.annotation.RetentionPolicy; 41import java.util.UUID; 42 43/** 44 * A class that allows interaction with the actual sound trigger detection on the system. 45 * Sound trigger detection refers to a detectors that match generic sound patterns that are 46 * not voice-based. The voice-based recognition models should utilize the {@link 47 * VoiceInteractionService} instead. Access to this class is protected by a permission 48 * granted only to system or privileged apps. 49 * 50 * @hide 51 */ 52@SystemApi 53public final class SoundTriggerDetector { 54 private static final boolean DBG = false; 55 private static final String TAG = "SoundTriggerDetector"; 56 57 private static final int MSG_AVAILABILITY_CHANGED = 1; 58 private static final int MSG_SOUND_TRIGGER_DETECTED = 2; 59 private static final int MSG_DETECTION_ERROR = 3; 60 private static final int MSG_DETECTION_PAUSE = 4; 61 private static final int MSG_DETECTION_RESUME = 5; 62 63 private final Object mLock = new Object(); 64 65 private final ISoundTriggerService mSoundTriggerService; 66 private final UUID mSoundModelId; 67 private final Callback mCallback; 68 private final Handler mHandler; 69 private final RecognitionCallback mRecognitionCallback; 70 71 /** @hide */ 72 @Retention(RetentionPolicy.SOURCE) 73 @IntDef(flag = true, 74 value = { 75 RECOGNITION_FLAG_NONE, 76 RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO, 77 RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS 78 }) 79 public @interface RecognitionFlags {} 80 81 /** 82 * Empty flag for {@link #startRecognition(int)}. 83 * 84 * @hide 85 */ 86 public static final int RECOGNITION_FLAG_NONE = 0; 87 88 /** 89 * Recognition flag for {@link #startRecognition(int)} that indicates 90 * whether the trigger audio for hotword needs to be captured. 91 */ 92 public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1; 93 94 /** 95 * Recognition flag for {@link #startRecognition(int)} that indicates 96 * whether the recognition should keep going on even after the 97 * model triggers. 98 * If this flag is specified, it's possible to get multiple 99 * triggers after a call to {@link #startRecognition(int)}, if the model 100 * triggers multiple times. 101 * When this isn't specified, the default behavior is to stop recognition once the 102 * trigger happenss, till the caller starts recognition again. 103 */ 104 public static final int RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS = 0x2; 105 106 /** 107 * Additional payload for {@link Callback#onDetected}. 108 */ 109 public static class EventPayload { 110 private final boolean mTriggerAvailable; 111 112 // Indicates if {@code captureSession} can be used to continue capturing more audio 113 // from the DSP hardware. 114 private final boolean mCaptureAvailable; 115 // The session to use when attempting to capture more audio from the DSP hardware. 116 private final int mCaptureSession; 117 private final AudioFormat mAudioFormat; 118 // Raw data associated with the event. 119 // This is the audio that triggered the keyphrase if {@code isTriggerAudio} is true. 120 private final byte[] mData; 121 122 private EventPayload(boolean triggerAvailable, boolean captureAvailable, 123 AudioFormat audioFormat, int captureSession, byte[] data) { 124 mTriggerAvailable = triggerAvailable; 125 mCaptureAvailable = captureAvailable; 126 mCaptureSession = captureSession; 127 mAudioFormat = audioFormat; 128 mData = data; 129 } 130 131 /** 132 * Gets the format of the audio obtained using {@link #getTriggerAudio()}. 133 * May be null if there's no audio present. 134 */ 135 @Nullable 136 public AudioFormat getCaptureAudioFormat() { 137 return mAudioFormat; 138 } 139 140 /** 141 * Gets the raw audio that triggered the detector. 142 * This may be null if the trigger audio isn't available. 143 * If non-null, the format of the audio can be obtained by calling 144 * {@link #getCaptureAudioFormat()}. 145 * 146 * @see AlwaysOnHotwordDetector#RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO 147 */ 148 @Nullable 149 public byte[] getTriggerAudio() { 150 if (mTriggerAvailable) { 151 return mData; 152 } else { 153 return null; 154 } 155 } 156 157 /** 158 * Gets the opaque data passed from the detection engine for the event. 159 * This may be null if it was not populated by the engine, or if the data is known to 160 * contain the trigger audio. 161 * 162 * @see #getTriggerAudio 163 * 164 * @hide 165 */ 166 @Nullable 167 public byte[] getData() { 168 if (!mTriggerAvailable) { 169 return mData; 170 } else { 171 return null; 172 } 173 } 174 175 /** 176 * Gets the session ID to start a capture from the DSP. 177 * This may be null if streaming capture isn't possible. 178 * If non-null, the format of the audio that can be captured can be 179 * obtained using {@link #getCaptureAudioFormat()}. 180 * 181 * TODO: Candidate for Public API when the API to start capture with a session ID 182 * is made public. 183 * 184 * TODO: Add this to {@link #getCaptureAudioFormat()}: 185 * "Gets the format of the audio obtained using {@link #getTriggerAudio()} 186 * or {@link #getCaptureSession()}. May be null if no audio can be obtained 187 * for either the trigger or a streaming session." 188 * 189 * TODO: Should this return a known invalid value instead? 190 * 191 * @hide 192 */ 193 @Nullable 194 public Integer getCaptureSession() { 195 if (mCaptureAvailable) { 196 return mCaptureSession; 197 } else { 198 return null; 199 } 200 } 201 } 202 203 public static abstract class Callback { 204 /** 205 * Called when the availability of the sound model changes. 206 */ 207 public abstract void onAvailabilityChanged(int status); 208 209 /** 210 * Called when the sound model has triggered (such as when it matched a 211 * given sound pattern). 212 */ 213 public abstract void onDetected(@NonNull EventPayload eventPayload); 214 215 /** 216 * Called when the detection fails due to an error. 217 */ 218 public abstract void onError(); 219 220 /** 221 * Called when the recognition is paused temporarily for some reason. 222 * This is an informational callback, and the clients shouldn't be doing anything here 223 * except showing an indication on their UI if they have to. 224 */ 225 public abstract void onRecognitionPaused(); 226 227 /** 228 * Called when the recognition is resumed after it was temporarily paused. 229 * This is an informational callback, and the clients shouldn't be doing anything here 230 * except showing an indication on their UI if they have to. 231 */ 232 public abstract void onRecognitionResumed(); 233 } 234 235 /** 236 * This class should be constructed by the {@link SoundTriggerManager}. 237 * @hide 238 */ 239 SoundTriggerDetector(ISoundTriggerService soundTriggerService, UUID soundModelId, 240 @NonNull Callback callback, @Nullable Handler handler) { 241 mSoundTriggerService = soundTriggerService; 242 mSoundModelId = soundModelId; 243 mCallback = callback; 244 if (handler == null) { 245 mHandler = new MyHandler(); 246 } else { 247 mHandler = new MyHandler(handler.getLooper()); 248 } 249 mRecognitionCallback = new RecognitionCallback(); 250 } 251 252 /** 253 * Starts recognition on the associated sound model. Result is indicated via the 254 * {@link Callback}. 255 * @return Indicates whether the call succeeded or not. 256 */ 257 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) 258 public boolean startRecognition(@RecognitionFlags int recognitionFlags) { 259 if (DBG) { 260 Slog.d(TAG, "startRecognition()"); 261 } 262 boolean captureTriggerAudio = 263 (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0; 264 265 boolean allowMultipleTriggers = 266 (recognitionFlags & RECOGNITION_FLAG_ALLOW_MULTIPLE_TRIGGERS) != 0; 267 int status = STATUS_OK; 268 try { 269 status = mSoundTriggerService.startRecognition(new ParcelUuid(mSoundModelId), 270 mRecognitionCallback, new RecognitionConfig(captureTriggerAudio, 271 allowMultipleTriggers, null, null)); 272 } catch (RemoteException e) { 273 return false; 274 } 275 return status == STATUS_OK; 276 } 277 278 /** 279 * Stops recognition for the associated model. 280 */ 281 @RequiresPermission(android.Manifest.permission.MANAGE_SOUND_TRIGGER) 282 public boolean stopRecognition() { 283 int status = STATUS_OK; 284 try { 285 status = mSoundTriggerService.stopRecognition(new ParcelUuid(mSoundModelId), 286 mRecognitionCallback); 287 } catch (RemoteException e) { 288 return false; 289 } 290 return status == STATUS_OK; 291 } 292 293 /** 294 * @hide 295 */ 296 public void dump(String prefix, PrintWriter pw) { 297 synchronized (mLock) { 298 // TODO: Dump useful debug information. 299 } 300 } 301 302 /** 303 * Callback that handles events from the lower sound trigger layer. 304 * 305 * Note that these callbacks will be called synchronously from the SoundTriggerService 306 * layer and thus should do minimal work (such as sending a message on a handler to do 307 * the real work). 308 * @hide 309 */ 310 private class RecognitionCallback extends IRecognitionStatusCallback.Stub { 311 312 /** 313 * @hide 314 */ 315 @Override 316 public void onGenericSoundTriggerDetected(SoundTrigger.GenericRecognitionEvent event) { 317 Slog.d(TAG, "onGenericSoundTriggerDetected()" + event); 318 Message.obtain(mHandler, 319 MSG_SOUND_TRIGGER_DETECTED, 320 new EventPayload(event.triggerInData, event.captureAvailable, 321 event.captureFormat, event.captureSession, event.data)) 322 .sendToTarget(); 323 } 324 325 @Override 326 public void onKeyphraseDetected(SoundTrigger.KeyphraseRecognitionEvent event) { 327 Slog.e(TAG, "Ignoring onKeyphraseDetected() called for " + event); 328 } 329 330 /** 331 * @hide 332 */ 333 @Override 334 public void onError(int status) { 335 Slog.d(TAG, "onError()" + status); 336 mHandler.sendEmptyMessage(MSG_DETECTION_ERROR); 337 } 338 339 /** 340 * @hide 341 */ 342 @Override 343 public void onRecognitionPaused() { 344 Slog.d(TAG, "onRecognitionPaused()"); 345 mHandler.sendEmptyMessage(MSG_DETECTION_PAUSE); 346 } 347 348 /** 349 * @hide 350 */ 351 @Override 352 public void onRecognitionResumed() { 353 Slog.d(TAG, "onRecognitionResumed()"); 354 mHandler.sendEmptyMessage(MSG_DETECTION_RESUME); 355 } 356 } 357 358 private class MyHandler extends Handler { 359 360 MyHandler() { 361 super(); 362 } 363 364 MyHandler(Looper looper) { 365 super(looper); 366 } 367 368 @Override 369 public void handleMessage(Message msg) { 370 if (mCallback == null) { 371 Slog.w(TAG, "Received message: " + msg.what + " for NULL callback."); 372 return; 373 } 374 switch (msg.what) { 375 case MSG_SOUND_TRIGGER_DETECTED: 376 mCallback.onDetected((EventPayload) msg.obj); 377 break; 378 case MSG_DETECTION_ERROR: 379 mCallback.onError(); 380 break; 381 case MSG_DETECTION_PAUSE: 382 mCallback.onRecognitionPaused(); 383 break; 384 case MSG_DETECTION_RESUME: 385 mCallback.onRecognitionResumed(); 386 break; 387 default: 388 super.handleMessage(msg); 389 390 } 391 } 392 } 393} 394