SoundTrigger.java revision 055897208d659e9734a82def88be4a806ff55448
1/** 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.hardware.soundtrigger; 18 19import android.os.Handler; 20import android.os.Parcel; 21import android.os.Parcelable; 22 23import java.util.ArrayList; 24import java.util.Arrays; 25import java.util.UUID; 26 27/** 28 * The SoundTrigger class provides access via JNI to the native service managing 29 * the sound trigger HAL. 30 * 31 * @hide 32 */ 33public class SoundTrigger { 34 35 public static final int STATUS_OK = 0; 36 public static final int STATUS_ERROR = Integer.MIN_VALUE; 37 public static final int STATUS_PERMISSION_DENIED = -1; 38 public static final int STATUS_NO_INIT = -19; 39 public static final int STATUS_BAD_VALUE = -22; 40 public static final int STATUS_DEAD_OBJECT = -32; 41 public static final int STATUS_INVALID_OPERATION = -38; 42 43 /***************************************************************************** 44 * A ModuleProperties describes a given sound trigger hardware module 45 * managed by the native sound trigger service. Each module has a unique 46 * ID used to target any API call to this paricular module. Module 47 * properties are returned by listModules() method. 48 ****************************************************************************/ 49 public static class ModuleProperties implements Parcelable { 50 /** Unique module ID provided by the native service */ 51 public final int id; 52 53 /** human readable voice detection engine implementor */ 54 public final String implementor; 55 56 /** human readable voice detection engine description */ 57 public final String description; 58 59 /** Unique voice engine Id (changes with each version) */ 60 public final UUID uuid; 61 62 /** Voice detection engine version */ 63 public final int version; 64 65 /** Maximum number of active sound models */ 66 public final int maxSoundModels; 67 68 /** Maximum number of key phrases */ 69 public final int maxKeyphrases; 70 71 /** Maximum number of users per key phrase */ 72 public final int maxUsers; 73 74 /** Supported recognition modes (bit field, RECOGNITION_MODE_VOICE_TRIGGER ...) */ 75 public final int recognitionModes; 76 77 /** Supports seamless transition to capture mode after recognition */ 78 public final boolean supportsCaptureTransition; 79 80 /** Maximum buffering capacity in ms if supportsCaptureTransition() is true */ 81 public final int maxBufferMs; 82 83 /** Supports capture by other use cases while detection is active */ 84 public final boolean supportsConcurrentCapture; 85 86 /** Rated power consumption when detection is active with TDB silence/sound/speech ratio */ 87 public final int powerConsumptionMw; 88 89 ModuleProperties(int id, String implementor, String description, 90 String uuid, int version, int maxSoundModels, int maxKeyphrases, 91 int maxUsers, int recognitionModes, boolean supportsCaptureTransition, 92 int maxBufferMs, boolean supportsConcurrentCapture, 93 int powerConsumptionMw) { 94 this.id = id; 95 this.implementor = implementor; 96 this.description = description; 97 this.uuid = UUID.fromString(uuid); 98 this.version = version; 99 this.maxSoundModels = maxSoundModels; 100 this.maxKeyphrases = maxKeyphrases; 101 this.maxUsers = maxUsers; 102 this.recognitionModes = recognitionModes; 103 this.supportsCaptureTransition = supportsCaptureTransition; 104 this.maxBufferMs = maxBufferMs; 105 this.supportsConcurrentCapture = supportsConcurrentCapture; 106 this.powerConsumptionMw = powerConsumptionMw; 107 } 108 109 public static final Parcelable.Creator<ModuleProperties> CREATOR 110 = new Parcelable.Creator<ModuleProperties>() { 111 public ModuleProperties createFromParcel(Parcel in) { 112 return ModuleProperties.fromParcel(in); 113 } 114 115 public ModuleProperties[] newArray(int size) { 116 return new ModuleProperties[size]; 117 } 118 }; 119 120 private static ModuleProperties fromParcel(Parcel in) { 121 int id = in.readInt(); 122 String implementor = in.readString(); 123 String description = in.readString(); 124 String uuid = in.readString(); 125 int version = in.readInt(); 126 int maxSoundModels = in.readInt(); 127 int maxKeyphrases = in.readInt(); 128 int maxUsers = in.readInt(); 129 int recognitionModes = in.readInt(); 130 boolean supportsCaptureTransition = in.readByte() == 1; 131 int maxBufferMs = in.readInt(); 132 boolean supportsConcurrentCapture = in.readByte() == 1; 133 int powerConsumptionMw = in.readInt(); 134 return new ModuleProperties(id, implementor, description, uuid, version, 135 maxSoundModels, maxKeyphrases, maxUsers, recognitionModes, 136 supportsCaptureTransition, maxBufferMs, supportsConcurrentCapture, 137 powerConsumptionMw); 138 } 139 140 @Override 141 public void writeToParcel(Parcel dest, int flags) { 142 dest.writeInt(id); 143 dest.writeString(implementor); 144 dest.writeString(description); 145 dest.writeString(uuid.toString()); 146 dest.writeInt(version); 147 dest.writeInt(maxSoundModels); 148 dest.writeInt(maxKeyphrases); 149 dest.writeInt(maxUsers); 150 dest.writeInt(recognitionModes); 151 dest.writeByte((byte) (supportsCaptureTransition ? 1 : 0)); 152 dest.writeInt(maxBufferMs); 153 dest.writeByte((byte) (supportsConcurrentCapture ? 1 : 0)); 154 dest.writeInt(powerConsumptionMw); 155 } 156 157 @Override 158 public int describeContents() { 159 return 0; 160 } 161 162 @Override 163 public String toString() { 164 return "ModuleProperties [id=" + id + ", implementor=" + implementor + ", description=" 165 + description + ", uuid=" + uuid + ", version=" + version + ", maxSoundModels=" 166 + maxSoundModels + ", maxKeyphrases=" + maxKeyphrases + ", maxUsers=" 167 + maxUsers + ", recognitionModes=" + recognitionModes 168 + ", supportsCaptureTransition=" + supportsCaptureTransition + ", maxBufferMs=" 169 + maxBufferMs + ", supportsConcurrentCapture=" + supportsConcurrentCapture 170 + ", powerConsumptionMw=" + powerConsumptionMw + "]"; 171 } 172 } 173 174 /***************************************************************************** 175 * A SoundModel describes the attributes and contains the binary data used by the hardware 176 * implementation to detect a particular sound pattern. 177 * A specialized version {@link KeyphraseSoundModel} is defined for key phrase 178 * sound models. 179 ****************************************************************************/ 180 public static class SoundModel { 181 /** Undefined sound model type */ 182 public static final int TYPE_UNKNOWN = -1; 183 184 /** Keyphrase sound model */ 185 public static final int TYPE_KEYPHRASE = 0; 186 187 /** Unique sound model identifier */ 188 public final UUID uuid; 189 190 /** Sound model type (e.g. TYPE_KEYPHRASE); */ 191 public final int type; 192 193 /** Opaque data. For use by vendor implementation and enrollment application */ 194 public final byte[] data; 195 196 public SoundModel(UUID uuid, int type, byte[] data) { 197 this.uuid = uuid; 198 this.type = type; 199 this.data = data; 200 } 201 } 202 203 /***************************************************************************** 204 * A Keyphrase describes a key phrase that can be detected by a 205 * {@link KeyphraseSoundModel} 206 ****************************************************************************/ 207 public static class Keyphrase implements Parcelable { 208 /** Unique identifier for this keyphrase */ 209 public final int id; 210 211 /** Recognition modes supported for this key phrase in the model */ 212 public final int recognitionModes; 213 214 /** Locale of the keyphrase. JAVA Locale string e.g en_US */ 215 public final String locale; 216 217 /** Key phrase text */ 218 public final String text; 219 220 /** Users this key phrase has been trained for. countains sound trigger specific user IDs 221 * derived from system user IDs {@link android.os.UserHandle#getIdentifier()}. */ 222 public final int[] users; 223 224 public Keyphrase(int id, int recognitionModes, String locale, String text, int[] users) { 225 this.id = id; 226 this.recognitionModes = recognitionModes; 227 this.locale = locale; 228 this.text = text; 229 this.users = users; 230 } 231 232 public static final Parcelable.Creator<Keyphrase> CREATOR 233 = new Parcelable.Creator<Keyphrase>() { 234 public Keyphrase createFromParcel(Parcel in) { 235 return Keyphrase.fromParcel(in); 236 } 237 238 public Keyphrase[] newArray(int size) { 239 return new Keyphrase[size]; 240 } 241 }; 242 243 private static Keyphrase fromParcel(Parcel in) { 244 int id = in.readInt(); 245 int recognitionModes = in.readInt(); 246 String locale = in.readString(); 247 String text = in.readString(); 248 int[] users = null; 249 int numUsers = in.readInt(); 250 if (numUsers > 0) { 251 users = new int[numUsers]; 252 in.readIntArray(users); 253 } 254 return new Keyphrase(id, recognitionModes, locale, text, users); 255 } 256 257 @Override 258 public void writeToParcel(Parcel dest, int flags) { 259 dest.writeInt(id); 260 dest.writeInt(recognitionModes); 261 dest.writeString(locale); 262 dest.writeString(text); 263 if (users != null) { 264 dest.writeInt(users.length); 265 dest.writeIntArray(users); 266 } else { 267 dest.writeInt(0); 268 } 269 } 270 271 @Override 272 public int describeContents() { 273 return 0; 274 } 275 276 @Override 277 public int hashCode() { 278 final int prime = 31; 279 int result = 1; 280 result = prime * result + ((text == null) ? 0 : text.hashCode()); 281 result = prime * result + id; 282 result = prime * result + ((locale == null) ? 0 : locale.hashCode()); 283 result = prime * result + recognitionModes; 284 result = prime * result + Arrays.hashCode(users); 285 return result; 286 } 287 288 @Override 289 public boolean equals(Object obj) { 290 if (this == obj) 291 return true; 292 if (obj == null) 293 return false; 294 if (getClass() != obj.getClass()) 295 return false; 296 Keyphrase other = (Keyphrase) obj; 297 if (text == null) { 298 if (other.text != null) 299 return false; 300 } else if (!text.equals(other.text)) 301 return false; 302 if (id != other.id) 303 return false; 304 if (locale == null) { 305 if (other.locale != null) 306 return false; 307 } else if (!locale.equals(other.locale)) 308 return false; 309 if (recognitionModes != other.recognitionModes) 310 return false; 311 if (!Arrays.equals(users, other.users)) 312 return false; 313 return true; 314 } 315 316 @Override 317 public String toString() { 318 return "Keyphrase [id=" + id + ", recognitionModes=" + recognitionModes + ", locale=" 319 + locale + ", text=" + text + ", users=" + Arrays.toString(users) + "]"; 320 } 321 } 322 323 /***************************************************************************** 324 * A KeyphraseSoundModel is a specialized {@link SoundModel} for key phrases. 325 * It contains data needed by the hardware to detect a certain number of key phrases 326 * and the list of corresponding {@link Keyphrase} descriptors. 327 ****************************************************************************/ 328 public static class KeyphraseSoundModel extends SoundModel implements Parcelable { 329 /** Key phrases in this sound model */ 330 public final Keyphrase[] keyphrases; // keyword phrases in model 331 332 public KeyphraseSoundModel(UUID id, byte[] data, Keyphrase[] keyphrases) { 333 super(id, TYPE_KEYPHRASE, data); 334 this.keyphrases = keyphrases; 335 } 336 337 public static final Parcelable.Creator<KeyphraseSoundModel> CREATOR 338 = new Parcelable.Creator<KeyphraseSoundModel>() { 339 public KeyphraseSoundModel createFromParcel(Parcel in) { 340 return KeyphraseSoundModel.fromParcel(in); 341 } 342 343 public KeyphraseSoundModel[] newArray(int size) { 344 return new KeyphraseSoundModel[size]; 345 } 346 }; 347 348 private static KeyphraseSoundModel fromParcel(Parcel in) { 349 UUID uuid = UUID.fromString(in.readString()); 350 byte[] data = null; 351 int dataLength = in.readInt(); 352 if (dataLength > 0) { 353 data = new byte[dataLength]; 354 in.readByteArray(data); 355 } 356 Keyphrase[] keyphrases = in.createTypedArray(Keyphrase.CREATOR); 357 return new KeyphraseSoundModel(uuid, data, keyphrases); 358 } 359 360 @Override 361 public int describeContents() { 362 return 0; 363 } 364 365 @Override 366 public void writeToParcel(Parcel dest, int flags) { 367 dest.writeString(uuid.toString()); 368 if (data != null) { 369 dest.writeInt(data.length); 370 dest.writeByteArray(data); 371 } else { 372 dest.writeInt(0); 373 } 374 dest.writeTypedArray(keyphrases, 0); 375 } 376 } 377 378 /** 379 * Modes for key phrase recognition 380 */ 381 /** Simple recognition of the key phrase */ 382 public static final int RECOGNITION_MODE_VOICE_TRIGGER = 0x1; 383 /** Trigger only if one user is identified */ 384 public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 0x2; 385 /** Trigger only if one user is authenticated */ 386 public static final int RECOGNITION_MODE_USER_AUTHENTICATION = 0x4; 387 388 /** 389 * Status codes for {@link RecognitionEvent} 390 */ 391 /** Recognition success */ 392 public static final int RECOGNITION_STATUS_SUCCESS = 0; 393 /** Recognition aborted (e.g. capture preempted by anotehr use case */ 394 public static final int RECOGNITION_STATUS_ABORT = 1; 395 /** Recognition failure */ 396 public static final int RECOGNITION_STATUS_FAILURE = 2; 397 398 /** 399 * A RecognitionEvent is provided by the 400 * {@link StatusListener#onRecognition(RecognitionEvent)} 401 * callback upon recognition success or failure. 402 */ 403 public static class RecognitionEvent { 404 /** Recognition status e.g {@link #RECOGNITION_STATUS_SUCCESS} */ 405 public final int status; 406 /** Sound Model corresponding to this event callback */ 407 public final int soundModelHandle; 408 /** True if it is possible to capture audio from this utterance buffered by the hardware */ 409 public final boolean captureAvailable; 410 /** Audio session ID to be used when capturing the utterance with an AudioRecord 411 * if captureAvailable() is true. */ 412 public final int captureSession; 413 /** Delay in ms between end of model detection and start of audio available for capture. 414 * A negative value is possible (e.g. if keyphrase is also available for capture) */ 415 public final int captureDelayMs; 416 /** Duration in ms of audio captured before the start of the trigger. 0 if none. */ 417 public final int capturePreambleMs; 418 /** Opaque data for use by system applications who know about voice engine internals, 419 * typically during enrollment. */ 420 public final byte[] data; 421 422 RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable, 423 int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data) { 424 this.status = status; 425 this.soundModelHandle = soundModelHandle; 426 this.captureAvailable = captureAvailable; 427 this.captureSession = captureSession; 428 this.captureDelayMs = captureDelayMs; 429 this.capturePreambleMs = capturePreambleMs; 430 this.data = data; 431 } 432 } 433 434 /** 435 * A RecognitionConfig is provided to 436 * {@link SoundTriggerModule#startRecognition(int, RecognitionConfig)} to configure the 437 * recognition request. 438 */ 439 public static class RecognitionConfig implements Parcelable { 440 /** True if the DSP should capture the trigger sound and make it available for further 441 * capture. */ 442 public final boolean captureRequested; 443 /** List of all keyphrases in the sound model for which recognition should be performed with 444 * options for each keyphrase. */ 445 public final KeyphraseRecognitionExtra keyphrases[]; 446 /** Opaque data for use by system applications who know about voice engine internals, 447 * typically during enrollment. */ 448 public final byte[] data; 449 450 public RecognitionConfig(boolean captureRequested, 451 KeyphraseRecognitionExtra keyphrases[], byte[] data) { 452 this.captureRequested = captureRequested; 453 this.keyphrases = keyphrases; 454 this.data = data; 455 } 456 457 public static final Parcelable.Creator<RecognitionConfig> CREATOR 458 = new Parcelable.Creator<RecognitionConfig>() { 459 public RecognitionConfig createFromParcel(Parcel in) { 460 return RecognitionConfig.fromParcel(in); 461 } 462 463 public RecognitionConfig[] newArray(int size) { 464 return new RecognitionConfig[size]; 465 } 466 }; 467 468 private static RecognitionConfig fromParcel(Parcel in) { 469 boolean captureRequested = in.readByte() == 1; 470 KeyphraseRecognitionExtra[] keyphrases = 471 in.createTypedArray(KeyphraseRecognitionExtra.CREATOR); 472 byte[] data = null; 473 int dataLength = in.readInt(); 474 if (dataLength > 0) { 475 data = new byte[dataLength]; 476 in.readByteArray(data); 477 } 478 return new RecognitionConfig(captureRequested, keyphrases, data); 479 } 480 481 @Override 482 public void writeToParcel(Parcel dest, int flags) { 483 dest.writeByte((byte) (captureRequested ? 1 : 0)); 484 dest.writeTypedArray(keyphrases, 0); 485 if (data != null) { 486 dest.writeInt(data.length); 487 dest.writeByteArray(data); 488 } else { 489 dest.writeInt(0); 490 } 491 } 492 493 @Override 494 public int describeContents() { 495 return 0; 496 } 497 } 498 499 /** 500 * Confidence level for users defined in a keyphrase. 501 * - The confidence level is expressed in percent (0% -100%). 502 * When used in a {@link KeyphraseRecognitionEvent} it indicates the detected confidence level 503 * When used in a {@link RecognitionConfig} it indicates the minimum confidence level that 504 * should trigger a recognition. 505 * - The user ID is derived from the system ID {@link android.os.UserHandle#getIdentifier()}. 506 */ 507 public static class ConfidenceLevel implements Parcelable { 508 public final int userId; 509 public final int confidenceLevel; 510 511 public ConfidenceLevel(int userId, int confidenceLevel) { 512 this.userId = userId; 513 this.confidenceLevel = confidenceLevel; 514 } 515 516 public static final Parcelable.Creator<ConfidenceLevel> CREATOR 517 = new Parcelable.Creator<ConfidenceLevel>() { 518 public ConfidenceLevel createFromParcel(Parcel in) { 519 return ConfidenceLevel.fromParcel(in); 520 } 521 522 public ConfidenceLevel[] newArray(int size) { 523 return new ConfidenceLevel[size]; 524 } 525 }; 526 527 private static ConfidenceLevel fromParcel(Parcel in) { 528 int userId = in.readInt(); 529 int confidenceLevel = in.readInt(); 530 return new ConfidenceLevel(userId, confidenceLevel); 531 } 532 533 @Override 534 public void writeToParcel(Parcel dest, int flags) { 535 dest.writeInt(userId); 536 dest.writeInt(confidenceLevel); 537 } 538 539 @Override 540 public int describeContents() { 541 return 0; 542 } 543 } 544 545 /** 546 * Additional data conveyed by a {@link KeyphraseRecognitionEvent} 547 * for a key phrase detection. 548 */ 549 public static class KeyphraseRecognitionExtra implements Parcelable { 550 /** The keyphrase ID */ 551 public final int id; 552 553 /** Recognition modes matched for this event */ 554 public final int recognitionModes; 555 556 /** Confidence levels for all users recognized (KeyphraseRecognitionEvent) or to 557 * be recognized (RecognitionConfig) */ 558 public final ConfidenceLevel[] confidenceLevels; 559 560 public KeyphraseRecognitionExtra(int id, int recognitionModes, 561 ConfidenceLevel[] confidenceLevels) { 562 this.id = id; 563 this.recognitionModes = recognitionModes; 564 this.confidenceLevels = confidenceLevels; 565 } 566 567 public static final Parcelable.Creator<KeyphraseRecognitionExtra> CREATOR 568 = new Parcelable.Creator<KeyphraseRecognitionExtra>() { 569 public KeyphraseRecognitionExtra createFromParcel(Parcel in) { 570 return KeyphraseRecognitionExtra.fromParcel(in); 571 } 572 573 public KeyphraseRecognitionExtra[] newArray(int size) { 574 return new KeyphraseRecognitionExtra[size]; 575 } 576 }; 577 578 private static KeyphraseRecognitionExtra fromParcel(Parcel in) { 579 int id = in.readInt(); 580 int recognitionModes = in.readInt(); 581 ConfidenceLevel[] confidenceLevels = in.createTypedArray(ConfidenceLevel.CREATOR); 582 return new KeyphraseRecognitionExtra(id, recognitionModes, confidenceLevels); 583 } 584 585 @Override 586 public void writeToParcel(Parcel dest, int flags) { 587 dest.writeInt(id); 588 dest.writeInt(recognitionModes); 589 dest.writeTypedArray(confidenceLevels, 0); 590 } 591 592 @Override 593 public int describeContents() { 594 return 0; 595 } 596 } 597 598 /** 599 * Specialized {@link RecognitionEvent} for a key phrase detection. 600 */ 601 public static class KeyphraseRecognitionEvent extends RecognitionEvent { 602 /** Indicates if the key phrase is present in the buffered audio available for capture */ 603 public final KeyphraseRecognitionExtra[] keyphraseExtras; 604 605 /** Additional data available for each recognized key phrases in the model */ 606 public final boolean keyphraseInCapture; 607 608 KeyphraseRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable, 609 int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data, 610 boolean keyphraseInCapture, KeyphraseRecognitionExtra[] keyphraseExtras) { 611 super(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs, 612 capturePreambleMs, data); 613 this.keyphraseInCapture = keyphraseInCapture; 614 this.keyphraseExtras = keyphraseExtras; 615 } 616 } 617 618 /** 619 * Returns a list of descriptors for all harware modules loaded. 620 * @param modules A ModuleProperties array where the list will be returned. 621 * @return - {@link #STATUS_OK} in case of success 622 * - {@link #STATUS_ERROR} in case of unspecified error 623 * - {@link #STATUS_PERMISSION_DENIED} if the caller does not have system permission 624 * - {@link #STATUS_NO_INIT} if the native service cannot be reached 625 * - {@link #STATUS_BAD_VALUE} if modules is null 626 * - {@link #STATUS_DEAD_OBJECT} if the binder transaction to the native service fails 627 */ 628 public static native int listModules(ArrayList <ModuleProperties> modules); 629 630 /** 631 * Get an interface on a hardware module to control sound models and recognition on 632 * this module. 633 * @param moduleId Sound module system identifier {@link ModuleProperties#id}. mandatory. 634 * @param listener {@link StatusListener} interface. Mandatory. 635 * @param handler the Handler that will receive the callabcks. Can be null if default handler 636 * is OK. 637 * @return a valid sound module in case of success or null in case of error. 638 */ 639 public static SoundTriggerModule attachModule(int moduleId, 640 StatusListener listener, 641 Handler handler) { 642 if (listener == null) { 643 return null; 644 } 645 SoundTriggerModule module = new SoundTriggerModule(moduleId, listener, handler); 646 return module; 647 } 648 649 /** 650 * Interface provided by the client application when attaching to a {@link SoundTriggerModule} 651 * to received recognition and error notifications. 652 */ 653 public static interface StatusListener { 654 /** 655 * Called when recognition succeeds of fails 656 */ 657 public abstract void onRecognition(RecognitionEvent event); 658 659 /** 660 * Called when the sound trigger native service dies 661 */ 662 public abstract void onServiceDied(); 663 } 664} 665