SoundTrigger.java revision d7018200312e4e4dc3f67cf33dc90bf7ce585844
1/**
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.hardware.soundtrigger;
18
19import android.os.Handler;
20
21import java.util.ArrayList;
22import java.util.UUID;
23
24/**
25 * The SoundTrigger class provides access via JNI to the native service managing
26 * the sound trigger HAL.
27 *
28 * @hide
29 */
30public class SoundTrigger {
31
32    public static final int STATUS_OK = 0;
33    public static final int STATUS_ERROR = Integer.MIN_VALUE;
34    public static final int STATUS_PERMISSION_DENIED = -1;
35    public static final int STATUS_NO_INIT = -19;
36    public static final int STATUS_BAD_VALUE = -22;
37    public static final int STATUS_DEAD_OBJECT = -32;
38    public static final int STATUS_INVALID_OPERATION = -38;
39
40    /*****************************************************************************
41     * A ModuleProperties describes a given sound trigger hardware module
42     * managed by the native sound trigger service. Each module has a unique
43     * ID used to target any API call to this paricular module. Module
44     * properties are returned by listModules() method.
45     ****************************************************************************/
46    public static class ModuleProperties {
47        /** Unique module ID provided by the native service */
48        public final int id;
49
50        /** human readable voice detection engine implementor */
51        public final String implementor;
52
53        /** human readable voice detection engine description */
54        public final String description;
55
56        /** Unique voice engine Id (changes with each version) */
57        public final UUID uuid;
58
59        /** Voice detection engine version */
60        public final int version;
61
62        /** Maximum number of active sound models */
63        public final int maxSoundModels;
64
65        /** Maximum number of key phrases */
66        public final int maxKeyphrases;
67
68        /** Maximum number of users per key phrase */
69        public final int maxUsers;
70
71        /** Supported recognition modes (bit field, RECOGNITION_MODE_VOICE_TRIGGER ...) */
72        public final int recognitionModes;
73
74        /** Supports seamless transition to capture mode after recognition */
75        public final boolean supportsCaptureTransition;
76
77        /** Maximum buffering capacity in ms if supportsCaptureTransition() is true */
78        public final int maxBufferMs;
79
80        /** Supports capture by other use cases while detection is active */
81        public final boolean supportsConcurrentCapture;
82
83        /** Rated power consumption when detection is active with TDB silence/sound/speech ratio */
84        public final int powerConsumptionMw;
85
86        ModuleProperties(int id, String implementor, String description,
87                String uuid, int version, int maxSoundModels, int maxKeyphrases,
88                int maxUsers, int recognitionModes, boolean supportsCaptureTransition,
89                int maxBufferMs, boolean supportsConcurrentCapture,
90                int powerConsumptionMw) {
91            this.id = id;
92            this.implementor = implementor;
93            this.description = description;
94            this.uuid = UUID.fromString(uuid);
95            this.version = version;
96            this.maxSoundModels = maxSoundModels;
97            this.maxKeyphrases = maxKeyphrases;
98            this.maxUsers = maxUsers;
99            this.recognitionModes = recognitionModes;
100            this.supportsCaptureTransition = supportsCaptureTransition;
101            this.maxBufferMs = maxBufferMs;
102            this.supportsConcurrentCapture = supportsConcurrentCapture;
103            this.powerConsumptionMw = powerConsumptionMw;
104        }
105    }
106
107    /*****************************************************************************
108     * A SoundModel describes the attributes and contains the binary data used by the hardware
109     * implementation to detect a particular sound pattern.
110     * A specialized version {@link KeyphraseSoundModel} is defined for key phrase
111     * sound models.
112     ****************************************************************************/
113    public static class SoundModel {
114        /** Undefined sound model type */
115        public static final int TYPE_UNKNOWN = -1;
116
117        /** Keyphrase sound model */
118        public static final int TYPE_KEYPHRASE = 0;
119
120        /** Unique sound model identifier */
121        public final UUID uuid;
122
123        /** Sound model type (e.g. TYPE_KEYPHRASE); */
124        public final int type;
125
126        /** Opaque data. For use by vendor implementation and enrollment application */
127        public final byte[] data;
128
129        public SoundModel(UUID uuid, int type, byte[] data) {
130            this.uuid = uuid;
131            this.type = type;
132            this.data = data;
133        }
134    }
135
136    /*****************************************************************************
137     * A Keyphrase describes a key phrase that can be detected by a
138     * {@link KeyphraseSoundModel}
139     ****************************************************************************/
140    public static class Keyphrase {
141        /** Unique identifier for this keyphrase */
142        public final int id;
143
144        /** Recognition modes supported for this key phrase in the model */
145        public final int recognitionModes;
146
147        /** Locale of the keyphrase. JAVA Locale string e.g en_US */
148        public final String locale;
149
150        /** Key phrase text */
151        public final String text;
152
153        /** Users this key phrase has been trained for. countains sound trigger specific user IDs
154         * derived from system user IDs {@link android.os.UserHandle#getIdentifier()}. */
155        public final int[] users;
156
157        public Keyphrase(int id, int recognitionModes, String locale, String text, int[] users) {
158            this.id = id;
159            this.recognitionModes = recognitionModes;
160            this.locale = locale;
161            this.text = text;
162            this.users = users;
163        }
164    }
165
166    /*****************************************************************************
167     * A KeyphraseSoundModel is a specialized {@link SoundModel} for key phrases.
168     * It contains data needed by the hardware to detect a certain number of key phrases
169     * and the list of corresponding {@link Keyphrase} descriptors.
170     ****************************************************************************/
171    public static class KeyphraseSoundModel extends SoundModel {
172        /** Key phrases in this sound model */
173        public final Keyphrase[] keyphrases; // keyword phrases in model
174
175        public KeyphraseSoundModel(UUID id, byte[] data, Keyphrase[] keyphrases) {
176            super(id, TYPE_KEYPHRASE, data);
177            this.keyphrases = keyphrases;
178        }
179    }
180
181    /**
182     *  Modes for key phrase recognition
183     */
184    /** Simple recognition of the key phrase */
185    public static final int RECOGNITION_MODE_VOICE_TRIGGER = 0x1;
186    /** Trigger only if one user is identified */
187    public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 0x2;
188    /** Trigger only if one user is authenticated */
189    public static final int RECOGNITION_MODE_USER_AUTHENTICATION = 0x4;
190
191    /**
192     *  Status codes for {@link RecognitionEvent}
193     */
194    /** Recognition success */
195    public static final int RECOGNITION_STATUS_SUCCESS = 0;
196    /** Recognition aborted (e.g. capture preempted by anotehr use case */
197    public static final int RECOGNITION_STATUS_ABORT = 1;
198    /** Recognition failure */
199    public static final int RECOGNITION_STATUS_FAILURE = 2;
200
201    /**
202     *  A RecognitionEvent is provided by the
203     *  {@link StatusListener#onRecognition(RecognitionEvent)}
204     *  callback upon recognition success or failure.
205     */
206    public static class RecognitionEvent {
207        /** Recognition status e.g {@link #RECOGNITION_STATUS_SUCCESS} */
208        public final int status;
209        /** Sound Model corresponding to this event callback */
210        public final int soundModelHandle;
211        /** True if it is possible to capture audio from this utterance buffered by the hardware */
212        public final boolean captureAvailable;
213        /** Audio session ID to be used when capturing the utterance with an AudioRecord
214         * if captureAvailable() is true. */
215        public final int captureSession;
216        /** Delay in ms between end of model detection and start of audio available for capture.
217         * A negative value is possible (e.g. if keyphrase is also available for capture) */
218        public final int captureDelayMs;
219        /** Duration in ms of audio captured before the start of the trigger. 0 if none. */
220        public final int capturePreambleMs;
221        /** Opaque data for use by system applications who know about voice engine internals,
222         * typically during enrollment. */
223        public final byte[] data;
224
225        RecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
226                int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data) {
227            this.status = status;
228            this.soundModelHandle = soundModelHandle;
229            this.captureAvailable = captureAvailable;
230            this.captureSession = captureSession;
231            this.captureDelayMs = captureDelayMs;
232            this.capturePreambleMs = capturePreambleMs;
233            this.data = data;
234        }
235    }
236
237    /**
238     *  A RecognitionConfig is provided to
239     *  {@link SoundTriggerModule#startRecognition(int, RecognitionConfig)} to configure the
240     *  recognition request.
241     */
242    public static class RecognitionConfig {
243        /** True if the DSP should capture the trigger sound and make it available for further
244         * capture. */
245        public final boolean captureRequested;
246        /** List of all keyphrases in the sound model for which recognition should be performed with
247         * options for each keyphrase. */
248        public final KeyphraseRecognitionExtra keyphrases[];
249        /** Opaque data for use by system applications who know about voice engine internals,
250         * typically during enrollment. */
251        public final byte[] data;
252
253        public RecognitionConfig(boolean captureRequested,
254                KeyphraseRecognitionExtra keyphrases[], byte[] data) {
255            this.captureRequested = captureRequested;
256            this.keyphrases = keyphrases;
257            this.data = data;
258        }
259    }
260
261    /**
262     * Confidence level for users defined in a keyphrase.
263     * - The confidence level is expressed in percent (0% -100%).
264     * When used in a {@link KeyphraseRecognitionEvent} it indicates the detected confidence level
265     * When used in a {@link RecognitionConfig} it indicates the minimum confidence level that
266     * should trigger a recognition.
267     * - The user ID is derived from the system ID {@link android.os.UserHandle#getIdentifier()}.
268     */
269    public static class ConfidenceLevel {
270        public final int userId;
271        public final int confidenceLevel;
272
273        public ConfidenceLevel(int userId, int confidenceLevel) {
274            this.userId = userId;
275            this.confidenceLevel = confidenceLevel;
276        }
277    }
278
279    /**
280     *  Additional data conveyed by a {@link KeyphraseRecognitionEvent}
281     *  for a key phrase detection.
282     */
283    public static class KeyphraseRecognitionExtra {
284        /** The keyphrse ID */
285        public final int id;
286
287        /** Recognition modes matched for this event */
288        public final int recognitionModes;
289
290        /** Confidence levels for all users recognized (KeyphraseRecognitionEvent) or to
291         * be recognized (RecognitionConfig) */
292        public final ConfidenceLevel[] confidenceLevels;
293
294        public KeyphraseRecognitionExtra(int id, int recognitionModes,
295                                  ConfidenceLevel[] confidenceLevels) {
296            this.id = id;
297            this.recognitionModes = recognitionModes;
298            this.confidenceLevels = confidenceLevels;
299        }
300    }
301
302    /**
303     *  Specialized {@link RecognitionEvent} for a key phrase detection.
304     */
305    public static class KeyphraseRecognitionEvent extends RecognitionEvent {
306        /** Indicates if the key phrase is present in the buffered audio available for capture */
307        public final KeyphraseRecognitionExtra[] keyphraseExtras;
308
309        /** Additional data available for each recognized key phrases in the model */
310        public final boolean keyphraseInCapture;
311
312        KeyphraseRecognitionEvent(int status, int soundModelHandle, boolean captureAvailable,
313               int captureSession, int captureDelayMs, int capturePreambleMs, byte[] data,
314               boolean keyphraseInCapture, KeyphraseRecognitionExtra[] keyphraseExtras) {
315            super(status, soundModelHandle, captureAvailable, captureSession, captureDelayMs,
316                  capturePreambleMs, data);
317            this.keyphraseInCapture = keyphraseInCapture;
318            this.keyphraseExtras = keyphraseExtras;
319        }
320    }
321
322    /**
323     * Returns a list of descriptors for all harware modules loaded.
324     * @param modules A ModuleProperties array where the list will be returned.
325     * @return - {@link #STATUS_OK} in case of success
326     *         - {@link #STATUS_ERROR} in case of unspecified error
327     *         - {@link #STATUS_PERMISSION_DENIED} if the caller does not have system permission
328     *         - {@link #STATUS_NO_INIT} if the native service cannot be reached
329     *         - {@link #STATUS_BAD_VALUE} if modules is null
330     *         - {@link #STATUS_DEAD_OBJECT} if the binder transaction to the native service fails
331     */
332    public static native int listModules(ArrayList <ModuleProperties> modules);
333
334    /**
335     * Get an interface on a hardware module to control sound models and recognition on
336     * this module.
337     * @param moduleId Sound module system identifier {@link ModuleProperties#id}. mandatory.
338     * @param listener {@link StatusListener} interface. Mandatory.
339     * @param handler the Handler that will receive the callabcks. Can be null if default handler
340     *                is OK.
341     * @return a valid sound module in case of success or null in case of error.
342     */
343    public static SoundTriggerModule attachModule(int moduleId,
344                                                  StatusListener listener,
345                                                  Handler handler) {
346        if (listener == null) {
347            return null;
348        }
349        SoundTriggerModule module = new SoundTriggerModule(moduleId, listener, handler);
350        return module;
351    }
352
353    /**
354     * Interface provided by the client application when attaching to a {@link SoundTriggerModule}
355     * to received recognition and error notifications.
356     */
357    public static interface StatusListener {
358        /**
359         * Called when recognition succeeds of fails
360         */
361        public abstract void onRecognition(RecognitionEvent event);
362
363        /**
364         * Called when the sound trigger native service dies
365         */
366        public abstract void onServiceDied();
367    }
368}
369