FileSynthesisCallback.java revision c99ba1c3edf725e070383b27724c9ed63e1e5765
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.annotation.NonNull;
19import android.media.AudioFormat;
20import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
21import android.util.Log;
22
23import java.io.IOException;
24import java.nio.ByteBuffer;
25import java.nio.ByteOrder;
26import java.nio.channels.FileChannel;
27
28/**
29 * Speech synthesis request that writes the audio to a WAV file.
30 */
31class FileSynthesisCallback extends AbstractSynthesisCallback {
32
33    private static final String TAG = "FileSynthesisRequest";
34    private static final boolean DBG = false;
35
36    private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
37
38    private static final int WAV_HEADER_LENGTH = 44;
39    private static final short WAV_FORMAT_PCM = 0x0001;
40
41    private final Object mStateLock = new Object();
42
43    private int mSampleRateInHz;
44    private int mAudioFormat;
45    private int mChannelCount;
46
47    private FileChannel mFileChannel;
48
49    private final UtteranceProgressDispatcher mDispatcher;
50
51    private boolean mStarted = false;
52    private boolean mDone = false;
53
54    /** Status code of synthesis */
55    protected int mStatusCode;
56
57    FileSynthesisCallback(@NonNull FileChannel fileChannel,
58            @NonNull UtteranceProgressDispatcher dispatcher, boolean clientIsUsingV2) {
59        super(clientIsUsingV2);
60        mFileChannel = fileChannel;
61        mDispatcher = dispatcher;
62        mStatusCode = TextToSpeech.SUCCESS;
63    }
64
65    @Override
66    void stop() {
67        synchronized (mStateLock) {
68            if (mDone) {
69                return;
70            }
71            if (mStatusCode == TextToSpeech.STOPPED) {
72                return;
73            }
74
75            mStatusCode = TextToSpeech.STOPPED;
76            cleanUp();
77            mDispatcher.dispatchOnStop();
78        }
79    }
80
81    /**
82     * Must be called while holding the monitor on {@link #mStateLock}.
83     */
84    private void cleanUp() {
85        closeFile();
86    }
87
88    /**
89     * Must be called while holding the monitor on {@link #mStateLock}.
90     */
91    private void closeFile() {
92        // File will be closed by the SpeechItem in the speech service.
93        mFileChannel = null;
94    }
95
96    @Override
97    public int getMaxBufferSize() {
98        return MAX_AUDIO_BUFFER_SIZE;
99    }
100
101    @Override
102    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
103        if (DBG) {
104            Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
105                    + "," + channelCount + ")");
106        }
107        if (audioFormat != AudioFormat.ENCODING_PCM_8BIT &&
108            audioFormat != AudioFormat.ENCODING_PCM_16BIT &&
109            audioFormat != AudioFormat.ENCODING_PCM_FLOAT) {
110            Log.e(TAG, "Audio format encoding " + audioFormat + " not supported. Please use one " +
111                       "of AudioFormat.ENCODING_PCM_8BIT, AudioFormat.ENCODING_PCM_16BIT or " +
112                       "AudioFormat.ENCODING_PCM_FLOAT");
113        }
114        mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
115
116        FileChannel fileChannel = null;
117        synchronized (mStateLock) {
118            if (mStatusCode == TextToSpeech.STOPPED) {
119                if (DBG) Log.d(TAG, "Request has been aborted.");
120                return errorCodeOnStop();
121            }
122            if (mStatusCode != TextToSpeech.SUCCESS) {
123                if (DBG) Log.d(TAG, "Error was raised");
124                return TextToSpeech.ERROR;
125            }
126            if (mStarted) {
127                Log.e(TAG, "Start called twice");
128                return TextToSpeech.ERROR;
129            }
130            mStarted = true;
131            mSampleRateInHz = sampleRateInHz;
132            mAudioFormat = audioFormat;
133            mChannelCount = channelCount;
134
135            mDispatcher.dispatchOnStart();
136            fileChannel = mFileChannel;
137        }
138
139        try {
140            fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
141                return TextToSpeech.SUCCESS;
142        } catch (IOException ex) {
143            Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
144            synchronized (mStateLock) {
145                cleanUp();
146                mStatusCode = TextToSpeech.ERROR_OUTPUT;
147            }
148            return TextToSpeech.ERROR;
149        }
150    }
151
152    @Override
153    public int audioAvailable(byte[] buffer, int offset, int length) {
154        if (DBG) {
155            Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
156                    + "," + length + ")");
157        }
158        FileChannel fileChannel = null;
159        synchronized (mStateLock) {
160            if (mStatusCode == TextToSpeech.STOPPED) {
161                if (DBG) Log.d(TAG, "Request has been aborted.");
162                return errorCodeOnStop();
163            }
164            if (mStatusCode != TextToSpeech.SUCCESS) {
165                if (DBG) Log.d(TAG, "Error was raised");
166                return TextToSpeech.ERROR;
167            }
168            if (mFileChannel == null) {
169                Log.e(TAG, "File not open");
170                mStatusCode = TextToSpeech.ERROR_OUTPUT;
171                return TextToSpeech.ERROR;
172            }
173            if (!mStarted) {
174                Log.e(TAG, "Start method was not called");
175                return TextToSpeech.ERROR;
176            }
177            fileChannel = mFileChannel;
178        }
179
180        final byte[] bufferCopy = new byte[length];
181        System.arraycopy(buffer, offset, bufferCopy, 0, length);
182        mDispatcher.dispatchOnAudioAvailable(bufferCopy);
183
184        try {
185            fileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
186            return TextToSpeech.SUCCESS;
187        } catch (IOException ex) {
188            Log.e(TAG, "Failed to write to output file descriptor", ex);
189            synchronized (mStateLock) {
190                cleanUp();
191                mStatusCode = TextToSpeech.ERROR_OUTPUT;
192            }
193            return TextToSpeech.ERROR;
194        }
195    }
196
197    @Override
198    public int done() {
199        if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
200        FileChannel fileChannel = null;
201
202        int sampleRateInHz = 0;
203        int audioFormat = 0;
204        int channelCount = 0;
205
206        synchronized (mStateLock) {
207            if (mDone) {
208                Log.w(TAG, "Duplicate call to done()");
209                // This is not an error that would prevent synthesis. Hence no
210                // setStatusCode is set.
211                return TextToSpeech.ERROR;
212            }
213            if (mStatusCode == TextToSpeech.STOPPED) {
214                if (DBG) Log.d(TAG, "Request has been aborted.");
215                return errorCodeOnStop();
216            }
217            if (mStatusCode != TextToSpeech.SUCCESS && mStatusCode != TextToSpeech.STOPPED) {
218                mDispatcher.dispatchOnError(mStatusCode);
219                return TextToSpeech.ERROR;
220            }
221            if (mFileChannel == null) {
222                Log.e(TAG, "File not open");
223                return TextToSpeech.ERROR;
224            }
225            mDone = true;
226            fileChannel = mFileChannel;
227            sampleRateInHz = mSampleRateInHz;
228            audioFormat = mAudioFormat;
229            channelCount = mChannelCount;
230        }
231
232        try {
233            // Write WAV header at start of file
234            fileChannel.position(0);
235            int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
236            fileChannel.write(
237                    makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
238
239            synchronized (mStateLock) {
240                closeFile();
241                mDispatcher.dispatchOnSuccess();
242                return TextToSpeech.SUCCESS;
243            }
244        } catch (IOException ex) {
245            Log.e(TAG, "Failed to write to output file descriptor", ex);
246            synchronized (mStateLock) {
247                cleanUp();
248            }
249            return TextToSpeech.ERROR;
250        }
251    }
252
253    @Override
254    public void error() {
255        error(TextToSpeech.ERROR_SYNTHESIS);
256    }
257
258    @Override
259    public void error(int errorCode) {
260        if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
261        synchronized (mStateLock) {
262            if (mDone) {
263                return;
264            }
265            cleanUp();
266            mStatusCode = errorCode;
267        }
268    }
269
270    @Override
271    public boolean hasStarted() {
272        synchronized (mStateLock) {
273            return mStarted;
274        }
275    }
276
277    @Override
278    public boolean hasFinished() {
279        synchronized (mStateLock) {
280            return mDone;
281        }
282    }
283
284    private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
285            int dataLength) {
286        int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
287        int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
288        short blockAlign = (short) (sampleSizeInBytes * channelCount);
289        short bitsPerSample = (short) (sampleSizeInBytes * 8);
290
291        byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
292        ByteBuffer header = ByteBuffer.wrap(headerBuf);
293        header.order(ByteOrder.LITTLE_ENDIAN);
294
295        header.put(new byte[]{ 'R', 'I', 'F', 'F' });
296        header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
297        header.put(new byte[]{ 'W', 'A', 'V', 'E' });
298        header.put(new byte[]{ 'f', 'm', 't', ' ' });
299        header.putInt(16);  // size of fmt chunk
300        header.putShort(WAV_FORMAT_PCM);
301        header.putShort((short) channelCount);
302        header.putInt(sampleRateInHz);
303        header.putInt(byteRate);
304        header.putShort(blockAlign);
305        header.putShort(bitsPerSample);
306        header.put(new byte[]{ 'd', 'a', 't', 'a' });
307        header.putInt(dataLength);
308        header.flip();
309
310        return header;
311    }
312}
313