SynthesisCallback.java revision 04637f3d4d68f6e5a4820e207b444d93704649b6
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.annotation.IntDef;
19import android.annotation.IntRange;
20import android.media.AudioFormat;
21
22import java.lang.annotation.Retention;
23import java.lang.annotation.RetentionPolicy;
24
25/**
26 * A callback to return speech data synthesized by a text to speech engine.
27 *
28 * The engine can provide streaming audio by calling
29 * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
30 * {@link #done}.
31 *
32 * {@link #error} can be called at any stage in the synthesis process to
33 * indicate that an error has occurred, but if the call is made after a call
34 * to {@link #done}, it might be discarded.
35 *
36 * {@link #done} must be called at the end of synthesis, regardless of errors.
37 *
38 * All methods can be only called on the synthesis thread.
39 */
40public interface SynthesisCallback {
41
42    /** @hide */
43    @Retention(RetentionPolicy.SOURCE)
44    @IntDef({
45        AudioFormat.ENCODING_PCM_8BIT,
46        AudioFormat.ENCODING_PCM_16BIT,
47        AudioFormat.ENCODING_PCM_FLOAT
48    })
49    @interface SupportedAudioFormat {};
50
51    /**
52     * @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
53     *     #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
54     *     value will not succeed.
55     */
56    int getMaxBufferSize();
57
58  /**
59   * The service should call this when it starts to synthesize audio for this request.
60   *
61   * <p>This method should only be called on the synthesis thread, while in {@link
62   * TextToSpeechService#onSynthesizeText}.
63   *
64   * @param sampleRateInHz Sample rate in HZ of the generated audio.
65   * @param audioFormat Audio format of the generated audio. Must be one of {@link
66   *     AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
67   *     {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
68   * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
69   * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
70   *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
71   */
72  int start(
73      int sampleRateInHz,
74      @SupportedAudioFormat int audioFormat,
75      @IntRange(from = 1, to = 2) int channelCount);
76
77  /**
78   * The service should call this method when synthesized audio is ready for consumption.
79   *
80   * <p>This method should only be called on the synthesis thread, while in {@link
81   * TextToSpeechService#onSynthesizeText}.
82   *
83   * @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
84   *     caller is free to modify it after this method returns.
85   * @param offset The offset into {@code buffer} where the audio data starts.
86   * @param length The number of bytes of audio data in {@code buffer}. This must be less than or
87   *     equal to the return value of {@link #getMaxBufferSize}.
88   * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
89   *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
90   */
91  int audioAvailable(byte[] buffer, int offset, int length);
92
93  /**
94   * The service should call this method when all the synthesized audio for a request has been
95   * passed to {@link #audioAvailable}.
96   *
97   * <p>This method should only be called on the synthesis thread, while in {@link
98   * TextToSpeechService#onSynthesizeText}.
99   *
100   * <p>This method has to be called if {@link #start} and/or {@link #error} was called.
101   *
102   * @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
103   *     android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
104   */
105  int done();
106
107    /**
108     * The service should call this method if the speech synthesis fails.
109     *
110     * <p>This method should only be called on the synthesis thread, while in {@link
111     * TextToSpeechService#onSynthesizeText}.
112     */
113    void error();
114
115  /**
116   * The service should call this method if the speech synthesis fails.
117   *
118   * <p>This method should only be called on the synthesis thread, while in {@link
119   * TextToSpeechService#onSynthesizeText}.
120   *
121   * @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
122   *     android.speech.tts.TextToSpeech}
123   */
124  void error(@TextToSpeech.Error int errorCode);
125
126    /**
127     * Check if {@link #start} was called or not.
128     *
129     * <p>This method should only be called on the synthesis thread, while in {@link
130     * TextToSpeechService#onSynthesizeText}.
131     *
132     * <p>Useful for checking if a fallback from network request is possible.
133     */
134    boolean hasStarted();
135
136    /**
137     * Check if {@link #done} was called or not.
138     *
139     * <p>This method should only be called on the synthesis thread, while in {@link
140     * TextToSpeechService#onSynthesizeText}.
141     *
142     * <p>Useful for checking if a fallback from network request is possible.
143     */
144    boolean hasFinished();
145
146    /**
147     * The service may call this method to provide timing information about the spoken text.
148     *
149     * <p>Calling this method means that at the given audio frame, the given range of the input is
150     * about to be spoken. If this method is called the client will receive a callback on the
151     * listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
152     * reached by the playback head.
153     *
154     * <p>This information can be used by the client, for example, to highlight ranges of the text
155     * while it is spoken.
156     *
157     * <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
158     * the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
159     * definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
160     * for more information.
161     *
162     * <p>This method should only be called on the synthesis thread, while in {@link
163     * TextToSpeechService#onSynthesizeText}.
164     *
165     * @param markerInFrames The position in frames in the audio where this range is spoken.
166     * @param start The start index of the range in the input text.
167     * @param end The end index (exclusive) of the range in the input text.
168     */
169    default void rangeStart(int markerInFrames, int start, int end) {}
170}
171