SoundTrigger.java revision d7018200312e4e4dc3f67cf33dc90bf7ce585844
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.hardware.soundtrigger; 18 19import android.os.Handler; 20 21import java.util.ArrayList; 22import java.util.UUID; 23 24/** 25 * The SoundTrigger class provides access via JNI to the native service managing 26 * the sound trigger HAL. 27 * 28 * @hide 29 */ 30public class SoundTrigger { 31 32 public static final int STATUS_OK = 0; 33 public static final int STATUS_ERROR = Integer.MIN_VALUE; 34 public static final int STATUS_PERMISSION_DENIED = -1; 35 public static final int STATUS_NO_INIT = -19; 36 public static final int STATUS_BAD_VALUE = -22; 37 public static final int STATUS_DEAD_OBJECT = -32; 38 public static final int STATUS_INVALID_OPERATION = -38; 39 40 /***************************************************************************** 41 * A ModuleProperties describes a given sound trigger hardware module 42 * managed by the native sound trigger service. Each module has a unique 43 * ID used to target any API call to this paricular module. Module 44 * properties are returned by listModules() method. 45 ****************************************************************************/ 46 public static class ModuleProperties { 47 /** Unique module ID provided by the native service */ 48 public final int id; 49 50 /** human readable voice detection engine implementor */ 51 public final String implementor; 52 53 /** human readable voice detection engine description */ 54 public final String description; 55 56 /** Unique voice engine Id (changes with each version) */ 57 public final UUID uuid; 58 59 /** Voice detection engine version */ 60 public final int version; 61 62 /** Maximum number of active sound models */ 63 public final int maxSoundModels; 64 65 /** Maximum number of key phrases */ 66 public final int maxKeyphrases; 67 68 /** Maximum number of users per key phrase */ 69 public final int maxUsers; 70 71 /** Supported recognition modes (bit field, RECOGNITION_MODE_VOICE_TRIGGER ...) */ 72 public final int recognitionModes; 73 74 /** Supports seamless transition to capture mode after recognition */ 75 public final boolean supportsCaptureTransition; 76 77 /** Maximum buffering capacity in ms if supportsCaptureTransition() is true */ 78 public final int maxBufferMs; 79 80 /** Supports capture by other use cases while detection is active */ 81 public final boolean supportsConcurrentCapture; 82 83 /** Rated power consumption when detection is active with TDB silence/sound/speech ratio */ 84 public final int powerConsumptionMw; 85 86 ModuleProperties(int id, String implementor, String description, 87 String uuid, int version, int maxSoundModels, int maxKeyphrases, 88 int maxUsers, int recognitionModes, boolean supportsCaptureTransition, 89 int maxBufferMs, boolean supportsConcurrentCapture, 90 int powerConsumptionMw) { 91 this.id = id; 92 this.implementor = implementor; 93 this.description = description; 94 this.uuid = UUID.fromString(uuid); 95 this.version = version; 96 this.maxSoundModels = maxSoundModels; 97 this.maxKeyphrases = maxKeyphrases; 98 this.maxUsers = maxUsers; 99 this.recognitionModes = recognitionModes; 100 this.supportsCaptureTransition = supportsCaptureTransition; 101 this.maxBufferMs = maxBufferMs; 102 this.supportsConcurrentCapture = supportsConcurrentCapture; 103 this.powerConsumptionMw = powerConsumptionMw; 104 } 105 } 106 107 /***************************************************************************** 108 * A SoundModel describes the attributes and contains the binary data used by the hardware 109 * implementation to detect a particular sound pattern. 110 * A specialized version {@link KeyphraseSoundModel} is defined for key phrase 111 * sound models. 112 ****************************************************************************/ 113 public static class SoundModel { 114 /** Undefined sound model type */ 115 public static final int TYPE_UNKNOWN = -1; 116 117 /** Keyphrase sound model */ 118 public static final int TYPE_KEYPHRASE = 0; 119 120 /** Unique sound model identifier */ 121 public final UUID uuid; 122 123 /** Sound model type (e.g. TYPE_KEYPHRASE); */ 124 public final int type; 125 126 /** Opaque data. For use by vendor implementation and enrollment application */ 127 public final byte[] data; 128 129 public SoundModel(UUID uuid, int type, byte[] data) { 130 this.uuid = uuid; 131 this.type = type; 132 this.data = data; 133 } 134 } 135 136 /***************************************************************************** 137 * A Keyphrase describes a key phrase that can be detected by a 138 * {@link KeyphraseSoundModel} 139 ****************************************************************************/ 140 public static class Keyphrase { 141 /** Unique identifier for this keyphrase */ 142 public final int id; 143 144 /** Recognition modes supported for this key phrase in the model */ 145 public final int recognitionModes; 146 147 /** Locale of the keyphrase. JAVA Locale string e.g en_US */ 148 public final String locale; 149 150 /** Key phrase text */ 151 public final String text; 152 153 /** Users this key phrase has been trained for. countains sound trigger specific user IDs 154 * derived from system user IDs {@link android.os.UserHandle#getIdentifier()}. */ 155 public final int[] users; 156 157 public Keyphrase(int id, int recognitionModes, String locale, String text, int[] users) { 158 this.id = id; 159 this.recognitionModes = recognitionModes; 160 this.locale = locale; 161 this.text = text; 162 this.users = users; 163 } 164 } 165 166 /***************************************************************************** 167 * A KeyphraseSoundModel is a specialized {@link SoundModel} for key phrases. 168 * It contains data needed by the hardware to detect a certain number of key phrases 169 * and the list of corresponding {@link Keyphrase} descriptors. 170 ****************************************************************************/ 171 public static class KeyphraseSoundModel extends SoundModel { 172 /** Key phrases in this sound model */ 173 public final Keyphrase[] keyphrases; // keyword phrases in model 174 175 public KeyphraseSoundModel(UUID id, byte[] data, Keyphrase[] keyphrases) { 176 super(id, TYPE_KEYPHRASE, data); 177 this.keyphrases = keyphrases; 178 } 179 } 180 181 /** 182 * Modes for key phrase recognition 183 */ 184 /** Simple recognition of the key phrase */ 185 public static final int RECOGNITION_MODE_VOICE_TRIGGER = 0x1; 186 /** Trigger only if one user is identified */ 187 public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 0x2; 188 /** Trigger only if one user is authenticated */ 189 public static final int RECOGNITION_MODE_USER_AUTHENTICATION = 0x4; 190 191 /** 192 * Status codes for {@link RecognitionEvent} 193 */ 194 /** Recognition success */ 195 public static final int RECOGNITION_STATUS_SUCCESS = 0; 196 /** Recognition aborted (e.g. capture preempted by anotehr use case */ 197 public static final int RECOGNITION_STATUS_ABORT = 1; 198 /** Recognition failure */ 199 public static final int RECOGNITION_STATUS_FAILURE = 2; 200 201 /** 202 * A RecognitionEvent is provided by the 203 * {@link StatusListener#onRecognition(RecognitionEvent)} 204 * callback upon recognition success or failure. 205 */ 206 public static class RecognitionEvent { 207 /** Recognition status e.g {@link #RECOGNITION_STATUS_SUCCESS} */ 208 public final int status; 209 /** Sound Model corresponding to this event callback */ 210 public final int soundModelHandle; 211 /** True if it is possible to capture audio from this utterance buffered by the hardware */ 212 public final boolean captureAvailable; 213 /** Audio session ID to be used when capturing the utterance with an AudioRecord 214 * if captureAvailable() is true. */ 215 public final int captureSession; 216 /** Delay in ms between end of model detection and start of audio available for capture. 217 * A negative value is possible (e.g. if keyphrase is also available for capture) */ 218 public final int captureDelayMs; 219 /** Duration in ms of audio captured before the start of the trigger. 0 if none. */ 220 public final int capturePreambleMs; 221 /** Opaque data for use by system applications who know about voice engine internals, 222 * typically during enrollment. */ 223 public final byte[] data; 224 225 RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable, 226 int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data) { 227 this.status = status; 228 this.soundModelHandle = soundModelHandle; 229 this.captureAvailable = captureAvailable; 230 this.captureSession = captureSession; 231 this.captureDelayMs = captureDelayMs; 232 this.capturePreambleMs = capturePreambleMs; 233 this.data = data; 234 } 235 } 236 237 /** 238 * A RecognitionConfig is provided to 239 * {@link SoundTriggerModule#startRecognition(int, RecognitionConfig)} to configure the 240 * recognition request. 241 */ 242 public static class RecognitionConfig { 243 /** True if the DSP should capture the trigger sound and make it available for further 244 * capture. */ 245 public final boolean captureRequested; 246 /** List of all keyphrases in the sound model for which recognition should be performed with 247 * options for each keyphrase. */ 248 public final KeyphraseRecognitionExtra keyphrases[]; 249 /** Opaque data for use by system applications who know about voice engine internals, 250 * typically during enrollment. */ 251 public final byte[] data; 252 253 public RecognitionConfig(boolean captureRequested, 254 KeyphraseRecognitionExtra keyphrases[], byte[] data) { 255 this.captureRequested = captureRequested; 256 this.keyphrases = keyphrases; 257 this.data = data; 258 } 259 } 260 261 /** 262 * Confidence level for users defined in a keyphrase. 263 * - The confidence level is expressed in percent (0% -100%). 264 * When used in a {@link KeyphraseRecognitionEvent} it indicates the detected confidence level 265 * When used in a {@link RecognitionConfig} it indicates the minimum confidence level that 266 * should trigger a recognition. 267 * - The user ID is derived from the system ID {@link android.os.UserHandle#getIdentifier()}. 268 */ 269 public static class ConfidenceLevel { 270 public final int userId; 271 public final int confidenceLevel; 272 273 public ConfidenceLevel(int userId, int confidenceLevel) { 274 this.userId = userId; 275 this.confidenceLevel = confidenceLevel; 276 } 277 } 278 279 /** 280 * Additional data conveyed by a {@link KeyphraseRecognitionEvent} 281 * for a key phrase detection. 282 */ 283 public static class KeyphraseRecognitionExtra { 284 /** The keyphrse ID */ 285 public final int id; 286 287 /** Recognition modes matched for this event */ 288 public final int recognitionModes; 289 290 /** Confidence levels for all users recognized (KeyphraseRecognitionEvent) or to 291 * be recognized (RecognitionConfig) */ 292 public final ConfidenceLevel[] confidenceLevels; 293 294 public KeyphraseRecognitionExtra(int id, int recognitionModes, 295 ConfidenceLevel[] confidenceLevels) { 296 this.id = id; 297 this.recognitionModes = recognitionModes; 298 this.confidenceLevels = confidenceLevels; 299 } 300 } 301 302 /** 303 * Specialized {@link RecognitionEvent} for a key phrase detection. 304 */ 305 public static class KeyphraseRecognitionEvent extends RecognitionEvent { 306 /** Indicates if the key phrase is present in the buffered audio available for capture */ 307 public final KeyphraseRecognitionExtra[] keyphraseExtras; 308 309 /** Additional data available for each recognized key phrases in the model */ 310 public final boolean keyphraseInCapture; 311 312 KeyphraseRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable, 313 int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data, 314 boolean keyphraseInCapture, KeyphraseRecognitionExtra[] keyphraseExtras) { 315 super(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs, 316 capturePreambleMs, data); 317 this.keyphraseInCapture = keyphraseInCapture; 318 this.keyphraseExtras = keyphraseExtras; 319 } 320 } 321 322 /** 323 * Returns a list of descriptors for all harware modules loaded. 324 * @param modules A ModuleProperties array where the list will be returned. 325 * @return - {@link #STATUS_OK} in case of success 326 * - {@link #STATUS_ERROR} in case of unspecified error 327 * - {@link #STATUS_PERMISSION_DENIED} if the caller does not have system permission 328 * - {@link #STATUS_NO_INIT} if the native service cannot be reached 329 * - {@link #STATUS_BAD_VALUE} if modules is null 330 * - {@link #STATUS_DEAD_OBJECT} if the binder transaction to the native service fails 331 */ 332 public static native int listModules(ArrayList <ModuleProperties> modules); 333 334 /** 335 * Get an interface on a hardware module to control sound models and recognition on 336 * this module. 337 * @param moduleId Sound module system identifier {@link ModuleProperties#id}. mandatory. 338 * @param listener {@link StatusListener} interface. Mandatory. 339 * @param handler the Handler that will receive the callabcks. Can be null if default handler 340 * is OK. 341 * @return a valid sound module in case of success or null in case of error. 342 */ 343 public static SoundTriggerModule attachModule(int moduleId, 344 StatusListener listener, 345 Handler handler) { 346 if (listener == null) { 347 return null; 348 } 349 SoundTriggerModule module = new SoundTriggerModule(moduleId, listener, handler); 350 return module; 351 } 352 353 /** 354 * Interface provided by the client application when attaching to a {@link SoundTriggerModule} 355 * to received recognition and error notifications. 356 */ 357 public static interface StatusListener { 358 /** 359 * Called when recognition succeeds of fails 360 */ 361 public abstract void onRecognition(RecognitionEvent event); 362 363 /** 364 * Called when the sound trigger native service dies 365 */ 366 public abstract void onServiceDied(); 367 } 368} 369