SoundTrigger.java revision 055897208d659e9734a82def88be4a806ff55448
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.hardware.soundtrigger;
18
19import android.os.Handler;
20import android.os.Parcel;
21import android.os.Parcelable;
22
23import java.util.ArrayList;
24import java.util.Arrays;
25import java.util.UUID;
26
27/**
28 * The SoundTrigger class provides access via JNI to the native service managing
29 * the sound trigger HAL.
30 *
31 * @hide
32 */
33public class SoundTrigger {
34
35    public static final int STATUS_OK = 0;
36    public static final int STATUS_ERROR = Integer.MIN_VALUE;
37    public static final int STATUS_PERMISSION_DENIED = -1;
38    public static final int STATUS_NO_INIT = -19;
39    public static final int STATUS_BAD_VALUE = -22;
40    public static final int STATUS_DEAD_OBJECT = -32;
41    public static final int STATUS_INVALID_OPERATION = -38;
42
43    /*****************************************************************************
44     * A ModuleProperties describes a given sound trigger hardware module
45     * managed by the native sound trigger service. Each module has a unique
46     * ID used to target any API call to this paricular module. Module
47     * properties are returned by listModules() method.
48     ****************************************************************************/
49    public static class ModuleProperties implements Parcelable {
50        /** Unique module ID provided by the native service */
51        public final int id;
52
53        /** human readable voice detection engine implementor */
54        public final String implementor;
55
56        /** human readable voice detection engine description */
57        public final String description;
58
59        /** Unique voice engine Id (changes with each version) */
60        public final UUID uuid;
61
62        /** Voice detection engine version */
63        public final int version;
64
65        /** Maximum number of active sound models */
66        public final int maxSoundModels;
67
68        /** Maximum number of key phrases */
69        public final int maxKeyphrases;
70
71        /** Maximum number of users per key phrase */
72        public final int maxUsers;
73
74        /** Supported recognition modes (bit field, RECOGNITION_MODE_VOICE_TRIGGER ...) */
75        public final int recognitionModes;
76
77        /** Supports seamless transition to capture mode after recognition */
78        public final boolean supportsCaptureTransition;
79
80        /** Maximum buffering capacity in ms if supportsCaptureTransition() is true */
81        public final int maxBufferMs;
82
83        /** Supports capture by other use cases while detection is active */
84        public final boolean supportsConcurrentCapture;
85
86        /** Rated power consumption when detection is active with TDB silence/sound/speech ratio */
87        public final int powerConsumptionMw;
88
89        ModuleProperties(int id, String implementor, String description,
90                String uuid, int version, int maxSoundModels, int maxKeyphrases,
91                int maxUsers, int recognitionModes, boolean supportsCaptureTransition,
92                int maxBufferMs, boolean supportsConcurrentCapture,
93                int powerConsumptionMw) {
94            this.id = id;
95            this.implementor = implementor;
96            this.description = description;
97            this.uuid = UUID.fromString(uuid);
98            this.version = version;
99            this.maxSoundModels = maxSoundModels;
100            this.maxKeyphrases = maxKeyphrases;
101            this.maxUsers = maxUsers;
102            this.recognitionModes = recognitionModes;
103            this.supportsCaptureTransition = supportsCaptureTransition;
104            this.maxBufferMs = maxBufferMs;
105            this.supportsConcurrentCapture = supportsConcurrentCapture;
106            this.powerConsumptionMw = powerConsumptionMw;
107        }
108
109        public static final Parcelable.Creator<ModuleProperties> CREATOR
110                = new Parcelable.Creator<ModuleProperties>() {
111            public ModuleProperties createFromParcel(Parcel in) {
112                return ModuleProperties.fromParcel(in);
113            }
114
115            public ModuleProperties[] newArray(int size) {
116                return new ModuleProperties[size];
117            }
118        };
119
120        private static ModuleProperties fromParcel(Parcel in) {
121            int id = in.readInt();
122            String implementor = in.readString();
123            String description = in.readString();
124            String uuid = in.readString();
125            int version = in.readInt();
126            int maxSoundModels = in.readInt();
127            int maxKeyphrases = in.readInt();
128            int maxUsers = in.readInt();
129            int recognitionModes = in.readInt();
130            boolean supportsCaptureTransition = in.readByte() == 1;
131            int maxBufferMs = in.readInt();
132            boolean supportsConcurrentCapture = in.readByte() == 1;
133            int powerConsumptionMw = in.readInt();
134            return new ModuleProperties(id, implementor, description, uuid, version,
135                    maxSoundModels, maxKeyphrases, maxUsers, recognitionModes,
136                    supportsCaptureTransition, maxBufferMs, supportsConcurrentCapture,
137                    powerConsumptionMw);
138        }
139
140        @Override
141        public void writeToParcel(Parcel dest, int flags) {
142            dest.writeInt(id);
143            dest.writeString(implementor);
144            dest.writeString(description);
145            dest.writeString(uuid.toString());
146            dest.writeInt(version);
147            dest.writeInt(maxSoundModels);
148            dest.writeInt(maxKeyphrases);
149            dest.writeInt(maxUsers);
150            dest.writeInt(recognitionModes);
151            dest.writeByte((byte) (supportsCaptureTransition ? 1 : 0));
152            dest.writeInt(maxBufferMs);
153            dest.writeByte((byte) (supportsConcurrentCapture ? 1 : 0));
154            dest.writeInt(powerConsumptionMw);
155        }
156
157        @Override
158        public int describeContents() {
159            return 0;
160        }
161
162        @Override
163        public String toString() {
164            return "ModuleProperties [id=" + id + ", implementor=" + implementor + ", description="
165                    + description + ", uuid=" + uuid + ", version=" + version + ", maxSoundModels="
166                    + maxSoundModels + ", maxKeyphrases=" + maxKeyphrases + ", maxUsers="
167                    + maxUsers + ", recognitionModes=" + recognitionModes
168                    + ", supportsCaptureTransition=" + supportsCaptureTransition + ", maxBufferMs="
169                    + maxBufferMs + ", supportsConcurrentCapture=" + supportsConcurrentCapture
170                    + ", powerConsumptionMw=" + powerConsumptionMw + "]";
171        }
172    }
173
174    /*****************************************************************************
175     * A SoundModel describes the attributes and contains the binary data used by the hardware
176     * implementation to detect a particular sound pattern.
177     * A specialized version {@link KeyphraseSoundModel} is defined for key phrase
178     * sound models.
179     ****************************************************************************/
180    public static class SoundModel {
181        /** Undefined sound model type */
182        public static final int TYPE_UNKNOWN = -1;
183
184        /** Keyphrase sound model */
185        public static final int TYPE_KEYPHRASE = 0;
186
187        /** Unique sound model identifier */
188        public final UUID uuid;
189
190        /** Sound model type (e.g. TYPE_KEYPHRASE); */
191        public final int type;
192
193        /** Opaque data. For use by vendor implementation and enrollment application */
194        public final byte[] data;
195
196        public SoundModel(UUID uuid, int type, byte[] data) {
197            this.uuid = uuid;
198            this.type = type;
199            this.data = data;
200        }
201    }
202
203    /*****************************************************************************
204     * A Keyphrase describes a key phrase that can be detected by a
205     * {@link KeyphraseSoundModel}
206     ****************************************************************************/
207    public static class Keyphrase implements Parcelable {
208        /** Unique identifier for this keyphrase */
209        public final int id;
210
211        /** Recognition modes supported for this key phrase in the model */
212        public final int recognitionModes;
213
214        /** Locale of the keyphrase. JAVA Locale string e.g en_US */
215        public final String locale;
216
217        /** Key phrase text */
218        public final String text;
219
220        /** Users this key phrase has been trained for. countains sound trigger specific user IDs
221         * derived from system user IDs {@link android.os.UserHandle#getIdentifier()}. */
222        public final int[] users;
223
224        public Keyphrase(int id, int recognitionModes, String locale, String text, int[] users) {
225            this.id = id;
226            this.recognitionModes = recognitionModes;
227            this.locale = locale;
228            this.text = text;
229            this.users = users;
230        }
231
232        public static final Parcelable.Creator<Keyphrase> CREATOR
233                = new Parcelable.Creator<Keyphrase>() {
234            public Keyphrase createFromParcel(Parcel in) {
235                return Keyphrase.fromParcel(in);
236            }
237
238            public Keyphrase[] newArray(int size) {
239                return new Keyphrase[size];
240            }
241        };
242
243        private static Keyphrase fromParcel(Parcel in) {
244            int id = in.readInt();
245            int recognitionModes = in.readInt();
246            String locale = in.readString();
247            String text = in.readString();
248            int[] users = null;
249            int numUsers = in.readInt();
250            if (numUsers > 0) {
251                users = new int[numUsers];
252                in.readIntArray(users);
253            }
254            return new Keyphrase(id, recognitionModes, locale, text, users);
255        }
256
257        @Override
258        public void writeToParcel(Parcel dest, int flags) {
259            dest.writeInt(id);
260            dest.writeInt(recognitionModes);
261            dest.writeString(locale);
262            dest.writeString(text);
263            if (users != null) {
264                dest.writeInt(users.length);
265                dest.writeIntArray(users);
266            } else {
267                dest.writeInt(0);
268            }
269        }
270
271        @Override
272        public int describeContents() {
273            return 0;
274        }
275
276        @Override
277        public int hashCode() {
278            final int prime = 31;
279            int result = 1;
280            result = prime * result + ((text == null) ? 0 : text.hashCode());
281            result = prime * result + id;
282            result = prime * result + ((locale == null) ? 0 : locale.hashCode());
283            result = prime * result + recognitionModes;
284            result = prime * result + Arrays.hashCode(users);
285            return result;
286        }
287
288        @Override
289        public boolean equals(Object obj) {
290            if (this == obj)
291                return true;
292            if (obj == null)
293                return false;
294            if (getClass() != obj.getClass())
295                return false;
296            Keyphrase other = (Keyphrase) obj;
297            if (text == null) {
298                if (other.text != null)
299                    return false;
300            } else if (!text.equals(other.text))
301                return false;
302            if (id != other.id)
303                return false;
304            if (locale == null) {
305                if (other.locale != null)
306                    return false;
307            } else if (!locale.equals(other.locale))
308                return false;
309            if (recognitionModes != other.recognitionModes)
310                return false;
311            if (!Arrays.equals(users, other.users))
312                return false;
313            return true;
314        }
315
316        @Override
317        public String toString() {
318            return "Keyphrase [id=" + id + ", recognitionModes=" + recognitionModes + ", locale="
319                    + locale + ", text=" + text + ", users=" + Arrays.toString(users) + "]";
320        }
321    }
322
323    /*****************************************************************************
324     * A KeyphraseSoundModel is a specialized {@link SoundModel} for key phrases.
325     * It contains data needed by the hardware to detect a certain number of key phrases
326     * and the list of corresponding {@link Keyphrase} descriptors.
327     ****************************************************************************/
328    public static class KeyphraseSoundModel extends SoundModel implements Parcelable {
329        /** Key phrases in this sound model */
330        public final Keyphrase[] keyphrases; // keyword phrases in model
331
332        public KeyphraseSoundModel(UUID id, byte[] data, Keyphrase[] keyphrases) {
333            super(id, TYPE_KEYPHRASE, data);
334            this.keyphrases = keyphrases;
335        }
336
337        public static final Parcelable.Creator<KeyphraseSoundModel> CREATOR
338                = new Parcelable.Creator<KeyphraseSoundModel>() {
339            public KeyphraseSoundModel createFromParcel(Parcel in) {
340                return KeyphraseSoundModel.fromParcel(in);
341            }
342
343            public KeyphraseSoundModel[] newArray(int size) {
344                return new KeyphraseSoundModel[size];
345            }
346        };
347
348        private static KeyphraseSoundModel fromParcel(Parcel in) {
349            UUID uuid = UUID.fromString(in.readString());
350            byte[] data = null;
351            int dataLength = in.readInt();
352            if (dataLength > 0) {
353                data = new byte[dataLength];
354                in.readByteArray(data);
355            }
356            Keyphrase[] keyphrases = in.createTypedArray(Keyphrase.CREATOR);
357            return new KeyphraseSoundModel(uuid, data, keyphrases);
358        }
359
360        @Override
361        public int describeContents() {
362            return 0;
363        }
364
365        @Override
366        public void writeToParcel(Parcel dest, int flags) {
367            dest.writeString(uuid.toString());
368            if (data != null) {
369                dest.writeInt(data.length);
370                dest.writeByteArray(data);
371            } else {
372                dest.writeInt(0);
373            }
374            dest.writeTypedArray(keyphrases, 0);
375        }
376    }
377
378    /**
379     *  Modes for key phrase recognition
380     */
381    /** Simple recognition of the key phrase */
382    public static final int RECOGNITION_MODE_VOICE_TRIGGER = 0x1;
383    /** Trigger only if one user is identified */
384    public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 0x2;
385    /** Trigger only if one user is authenticated */
386    public static final int RECOGNITION_MODE_USER_AUTHENTICATION = 0x4;
387
388    /**
389     *  Status codes for {@link RecognitionEvent}
390     */
391    /** Recognition success */
392    public static final int RECOGNITION_STATUS_SUCCESS = 0;
393    /** Recognition aborted (e.g. capture preempted by anotehr use case */
394    public static final int RECOGNITION_STATUS_ABORT = 1;
395    /** Recognition failure */
396    public static final int RECOGNITION_STATUS_FAILURE = 2;
397
398    /**
399     *  A RecognitionEvent is provided by the
400     *  {@link StatusListener#onRecognition(RecognitionEvent)}
401     *  callback upon recognition success or failure.
402     */
403    public static class RecognitionEvent {
404        /** Recognition status e.g {@link #RECOGNITION_STATUS_SUCCESS} */
405        public final int status;
406        /** Sound Model corresponding to this event callback */
407        public final int soundModelHandle;
408        /** True if it is possible to capture audio from this utterance buffered by the hardware */
409        public final boolean captureAvailable;
410        /** Audio session ID to be used when capturing the utterance with an AudioRecord
411         * if captureAvailable() is true. */
412        public final int captureSession;
413        /** Delay in ms between end of model detection and start of audio available for capture.
414         * A negative value is possible (e.g. if keyphrase is also available for capture) */
415        public final int captureDelayMs;
416        /** Duration in ms of audio captured before the start of the trigger. 0 if none. */
417        public final int capturePreambleMs;
418        /** Opaque data for use by system applications who know about voice engine internals,
419         * typically during enrollment. */
420        public final byte[] data;
421
422        RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
423                int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data) {
424            this.status = status;
425            this.soundModelHandle = soundModelHandle;
426            this.captureAvailable = captureAvailable;
427            this.captureSession = captureSession;
428            this.captureDelayMs = captureDelayMs;
429            this.capturePreambleMs = capturePreambleMs;
430            this.data = data;
431        }
432    }
433
434    /**
435     *  A RecognitionConfig is provided to
436     *  {@link SoundTriggerModule#startRecognition(int, RecognitionConfig)} to configure the
437     *  recognition request.
438     */
439    public static class RecognitionConfig implements Parcelable {
440        /** True if the DSP should capture the trigger sound and make it available for further
441         * capture. */
442        public final boolean captureRequested;
443        /** List of all keyphrases in the sound model for which recognition should be performed with
444         * options for each keyphrase. */
445        public final KeyphraseRecognitionExtra keyphrases[];
446        /** Opaque data for use by system applications who know about voice engine internals,
447         * typically during enrollment. */
448        public final byte[] data;
449
450        public RecognitionConfig(boolean captureRequested,
451                KeyphraseRecognitionExtra keyphrases[], byte[] data) {
452            this.captureRequested = captureRequested;
453            this.keyphrases = keyphrases;
454            this.data = data;
455        }
456
457        public static final Parcelable.Creator<RecognitionConfig> CREATOR
458                = new Parcelable.Creator<RecognitionConfig>() {
459            public RecognitionConfig createFromParcel(Parcel in) {
460                return RecognitionConfig.fromParcel(in);
461            }
462
463            public RecognitionConfig[] newArray(int size) {
464                return new RecognitionConfig[size];
465            }
466        };
467
468        private static RecognitionConfig fromParcel(Parcel in) {
469            boolean captureRequested = in.readByte() == 1;
470            KeyphraseRecognitionExtra[] keyphrases =
471                    in.createTypedArray(KeyphraseRecognitionExtra.CREATOR);
472            byte[] data = null;
473            int dataLength = in.readInt();
474            if (dataLength > 0) {
475                data = new byte[dataLength];
476                in.readByteArray(data);
477            }
478            return new RecognitionConfig(captureRequested, keyphrases, data);
479        }
480
481        @Override
482        public void writeToParcel(Parcel dest, int flags) {
483            dest.writeByte((byte) (captureRequested ? 1 : 0));
484            dest.writeTypedArray(keyphrases, 0);
485            if (data != null) {
486                dest.writeInt(data.length);
487                dest.writeByteArray(data);
488            } else {
489                dest.writeInt(0);
490            }
491        }
492
493        @Override
494        public int describeContents() {
495            return 0;
496        }
497    }
498
499    /**
500     * Confidence level for users defined in a keyphrase.
501     * - The confidence level is expressed in percent (0% -100%).
502     * When used in a {@link KeyphraseRecognitionEvent} it indicates the detected confidence level
503     * When used in a {@link RecognitionConfig} it indicates the minimum confidence level that
504     * should trigger a recognition.
505     * - The user ID is derived from the system ID {@link android.os.UserHandle#getIdentifier()}.
506     */
507    public static class ConfidenceLevel implements Parcelable {
508        public final int userId;
509        public final int confidenceLevel;
510
511        public ConfidenceLevel(int userId, int confidenceLevel) {
512            this.userId = userId;
513            this.confidenceLevel = confidenceLevel;
514        }
515
516        public static final Parcelable.Creator<ConfidenceLevel> CREATOR
517                = new Parcelable.Creator<ConfidenceLevel>() {
518            public ConfidenceLevel createFromParcel(Parcel in) {
519                return ConfidenceLevel.fromParcel(in);
520            }
521
522            public ConfidenceLevel[] newArray(int size) {
523                return new ConfidenceLevel[size];
524            }
525        };
526
527        private static ConfidenceLevel fromParcel(Parcel in) {
528            int userId = in.readInt();
529            int confidenceLevel = in.readInt();
530            return new ConfidenceLevel(userId, confidenceLevel);
531        }
532
533        @Override
534        public void writeToParcel(Parcel dest, int flags) {
535            dest.writeInt(userId);
536            dest.writeInt(confidenceLevel);
537        }
538
539        @Override
540        public int describeContents() {
541            return 0;
542        }
543    }
544
545    /**
546     *  Additional data conveyed by a {@link KeyphraseRecognitionEvent}
547     *  for a key phrase detection.
548     */
549    public static class KeyphraseRecognitionExtra implements Parcelable {
550        /** The keyphrase ID */
551        public final int id;
552
553        /** Recognition modes matched for this event */
554        public final int recognitionModes;
555
556        /** Confidence levels for all users recognized (KeyphraseRecognitionEvent) or to
557         * be recognized (RecognitionConfig) */
558        public final ConfidenceLevel[] confidenceLevels;
559
560        public KeyphraseRecognitionExtra(int id, int recognitionModes,
561                                  ConfidenceLevel[] confidenceLevels) {
562            this.id = id;
563            this.recognitionModes = recognitionModes;
564            this.confidenceLevels = confidenceLevels;
565        }
566
567        public static final Parcelable.Creator<KeyphraseRecognitionExtra> CREATOR
568                = new Parcelable.Creator<KeyphraseRecognitionExtra>() {
569            public KeyphraseRecognitionExtra createFromParcel(Parcel in) {
570                return KeyphraseRecognitionExtra.fromParcel(in);
571            }
572
573            public KeyphraseRecognitionExtra[] newArray(int size) {
574                return new KeyphraseRecognitionExtra[size];
575            }
576        };
577
578        private static KeyphraseRecognitionExtra fromParcel(Parcel in) {
579            int id = in.readInt();
580            int recognitionModes = in.readInt();
581            ConfidenceLevel[] confidenceLevels = in.createTypedArray(ConfidenceLevel.CREATOR);
582            return new KeyphraseRecognitionExtra(id, recognitionModes, confidenceLevels);
583        }
584
585        @Override
586        public void writeToParcel(Parcel dest, int flags) {
587            dest.writeInt(id);
588            dest.writeInt(recognitionModes);
589            dest.writeTypedArray(confidenceLevels, 0);
590        }
591
592        @Override
593        public int describeContents() {
594            return 0;
595        }
596    }
597
598    /**
599     *  Specialized {@link RecognitionEvent} for a key phrase detection.
600     */
601    public static class KeyphraseRecognitionEvent extends RecognitionEvent {
602        /** Indicates if the key phrase is present in the buffered audio available for capture */
603        public final KeyphraseRecognitionExtra[] keyphraseExtras;
604
605        /** Additional data available for each recognized key phrases in the model */
606        public final boolean keyphraseInCapture;
607
608        KeyphraseRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
609               int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data,
610               boolean keyphraseInCapture, KeyphraseRecognitionExtra[] keyphraseExtras) {
611            super(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs,
612                  capturePreambleMs, data);
613            this.keyphraseInCapture = keyphraseInCapture;
614            this.keyphraseExtras = keyphraseExtras;
615        }
616    }
617
618    /**
619     * Returns a list of descriptors for all harware modules loaded.
620     * @param modules A ModuleProperties array where the list will be returned.
621     * @return - {@link #STATUS_OK} in case of success
622     *         - {@link #STATUS_ERROR} in case of unspecified error
623     *         - {@link #STATUS_PERMISSION_DENIED} if the caller does not have system permission
624     *         - {@link #STATUS_NO_INIT} if the native service cannot be reached
625     *         - {@link #STATUS_BAD_VALUE} if modules is null
626     *         - {@link #STATUS_DEAD_OBJECT} if the binder transaction to the native service fails
627     */
628    public static native int listModules(ArrayList <ModuleProperties> modules);
629
630    /**
631     * Get an interface on a hardware module to control sound models and recognition on
632     * this module.
633     * @param moduleId Sound module system identifier {@link ModuleProperties#id}. mandatory.
634     * @param listener {@link StatusListener} interface. Mandatory.
635     * @param handler the Handler that will receive the callabcks. Can be null if default handler
636     *                is OK.
637     * @return a valid sound module in case of success or null in case of error.
638     */
639    public static SoundTriggerModule attachModule(int moduleId,
640                                                  StatusListener listener,
641                                                  Handler handler) {
642        if (listener == null) {
643            return null;
644        }
645        SoundTriggerModule module = new SoundTriggerModule(moduleId, listener, handler);
646        return module;
647    }
648
649    /**
650     * Interface provided by the client application when attaching to a {@link SoundTriggerModule}
651     * to received recognition and error notifications.
652     */
653    public static interface StatusListener {
654        /**
655         * Called when recognition succeeds of fails
656         */
657        public abstract void onRecognition(RecognitionEvent event);
658
659        /**
660         * Called when the sound trigger native service dies
661         */
662        public abstract void onServiceDied();
663    }
664}
665