1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
20import android.util.Log;
21
22import java.io.IOException;
23import java.nio.ByteBuffer;
24import java.nio.ByteOrder;
25import java.nio.channels.FileChannel;
26
27/**
28 * Speech synthesis request that writes the audio to a WAV file.
29 */
30class FileSynthesisCallback extends AbstractSynthesisCallback {
31
32    private static final String TAG = "FileSynthesisRequest";
33    private static final boolean DBG = false;
34
35    private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
36
37    private static final int WAV_HEADER_LENGTH = 44;
38    private static final short WAV_FORMAT_PCM = 0x0001;
39
40    private final Object mStateLock = new Object();
41
42    private int mSampleRateInHz;
43    private int mAudioFormat;
44    private int mChannelCount;
45
46    private FileChannel mFileChannel;
47
48    private final UtteranceProgressDispatcher mDispatcher;
49    private final Object mCallerIdentity;
50
51    private boolean mStarted = false;
52    private boolean mDone = false;
53
54    /** Status code of synthesis */
55    protected int mStatusCode;
56
57    FileSynthesisCallback(FileChannel fileChannel, UtteranceProgressDispatcher dispatcher,
58            Object callerIdentity, boolean clientIsUsingV2) {
59        super(clientIsUsingV2);
60        mFileChannel = fileChannel;
61        mDispatcher = dispatcher;
62        mCallerIdentity = callerIdentity;
63        mStatusCode = TextToSpeech.SUCCESS;
64    }
65
66    @Override
67    void stop() {
68        synchronized (mStateLock) {
69            if (mDone) {
70                return;
71            }
72            if (mStatusCode == TextToSpeech.STOPPED) {
73                return;
74            }
75
76            mStatusCode = TextToSpeech.STOPPED;
77            cleanUp();
78            if (mDispatcher != null) {
79                mDispatcher.dispatchOnStop();
80            }
81        }
82    }
83
84    /**
85     * Must be called while holding the monitor on {@link #mStateLock}.
86     */
87    private void cleanUp() {
88        closeFile();
89    }
90
91    /**
92     * Must be called while holding the monitor on {@link #mStateLock}.
93     */
94    private void closeFile() {
95        // File will be closed by the SpeechItem in the speech service.
96        mFileChannel = null;
97    }
98
99    @Override
100    public int getMaxBufferSize() {
101        return MAX_AUDIO_BUFFER_SIZE;
102    }
103
104    @Override
105    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
106        if (DBG) {
107            Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
108                    + "," + channelCount + ")");
109        }
110        FileChannel fileChannel = null;
111        synchronized (mStateLock) {
112            if (mStatusCode == TextToSpeech.STOPPED) {
113                if (DBG) Log.d(TAG, "Request has been aborted.");
114                return errorCodeOnStop();
115            }
116            if (mStatusCode != TextToSpeech.SUCCESS) {
117                if (DBG) Log.d(TAG, "Error was raised");
118                return TextToSpeech.ERROR;
119            }
120            if (mStarted) {
121                Log.e(TAG, "Start called twice");
122                return TextToSpeech.ERROR;
123            }
124            mStarted = true;
125            mSampleRateInHz = sampleRateInHz;
126            mAudioFormat = audioFormat;
127            mChannelCount = channelCount;
128
129            if (mDispatcher != null) {
130                mDispatcher.dispatchOnStart();
131            }
132            fileChannel = mFileChannel;
133        }
134
135        try {
136            fileChannel.write(ByteBuffer.allocate(WAV_HEADER_LENGTH));
137                return TextToSpeech.SUCCESS;
138        } catch (IOException ex) {
139            Log.e(TAG, "Failed to write wav header to output file descriptor", ex);
140            synchronized (mStateLock) {
141                cleanUp();
142                mStatusCode = TextToSpeech.ERROR_OUTPUT;
143            }
144            return TextToSpeech.ERROR;
145        }
146    }
147
148    @Override
149    public int audioAvailable(byte[] buffer, int offset, int length) {
150        if (DBG) {
151            Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
152                    + "," + length + ")");
153        }
154        FileChannel fileChannel = null;
155        synchronized (mStateLock) {
156            if (mStatusCode == TextToSpeech.STOPPED) {
157                if (DBG) Log.d(TAG, "Request has been aborted.");
158                return errorCodeOnStop();
159            }
160            if (mStatusCode != TextToSpeech.SUCCESS) {
161                if (DBG) Log.d(TAG, "Error was raised");
162                return TextToSpeech.ERROR;
163            }
164            if (mFileChannel == null) {
165                Log.e(TAG, "File not open");
166                mStatusCode = TextToSpeech.ERROR_OUTPUT;
167                return TextToSpeech.ERROR;
168            }
169            if (!mStarted) {
170                Log.e(TAG, "Start method was not called");
171                return TextToSpeech.ERROR;
172            }
173            fileChannel = mFileChannel;
174        }
175
176        try {
177            fileChannel.write(ByteBuffer.wrap(buffer,  offset,  length));
178            return TextToSpeech.SUCCESS;
179        } catch (IOException ex) {
180            Log.e(TAG, "Failed to write to output file descriptor", ex);
181            synchronized (mStateLock) {
182                cleanUp();
183                mStatusCode = TextToSpeech.ERROR_OUTPUT;
184            }
185            return TextToSpeech.ERROR;
186        }
187    }
188
189    @Override
190    public int done() {
191        if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
192        FileChannel fileChannel = null;
193
194        int sampleRateInHz = 0;
195        int audioFormat = 0;
196        int channelCount = 0;
197
198        synchronized (mStateLock) {
199            if (mDone) {
200                Log.w(TAG, "Duplicate call to done()");
201                // This is not an error that would prevent synthesis. Hence no
202                // setStatusCode is set.
203                return TextToSpeech.ERROR;
204            }
205            if (mStatusCode == TextToSpeech.STOPPED) {
206                if (DBG) Log.d(TAG, "Request has been aborted.");
207                return errorCodeOnStop();
208            }
209            if (mDispatcher != null && mStatusCode != TextToSpeech.SUCCESS &&
210                    mStatusCode != TextToSpeech.STOPPED) {
211                mDispatcher.dispatchOnError(mStatusCode);
212                return TextToSpeech.ERROR;
213            }
214            if (mFileChannel == null) {
215                Log.e(TAG, "File not open");
216                return TextToSpeech.ERROR;
217            }
218            mDone = true;
219            fileChannel = mFileChannel;
220            sampleRateInHz = mSampleRateInHz;
221            audioFormat = mAudioFormat;
222            channelCount = mChannelCount;
223        }
224
225        try {
226            // Write WAV header at start of file
227            fileChannel.position(0);
228            int dataLength = (int) (fileChannel.size() - WAV_HEADER_LENGTH);
229            fileChannel.write(
230                    makeWavHeader(sampleRateInHz, audioFormat, channelCount, dataLength));
231
232            synchronized (mStateLock) {
233                closeFile();
234                if (mDispatcher != null) {
235                    mDispatcher.dispatchOnSuccess();
236                }
237                return TextToSpeech.SUCCESS;
238            }
239        } catch (IOException ex) {
240            Log.e(TAG, "Failed to write to output file descriptor", ex);
241            synchronized (mStateLock) {
242                cleanUp();
243            }
244            return TextToSpeech.ERROR;
245        }
246    }
247
248    @Override
249    public void error() {
250        error(TextToSpeech.ERROR_SYNTHESIS);
251    }
252
253    @Override
254    public void error(int errorCode) {
255        if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
256        synchronized (mStateLock) {
257            if (mDone) {
258                return;
259            }
260            cleanUp();
261            mStatusCode = errorCode;
262        }
263    }
264
265    @Override
266    public boolean hasStarted() {
267        synchronized (mStateLock) {
268            return mStarted;
269        }
270    }
271
272    @Override
273    public boolean hasFinished() {
274        synchronized (mStateLock) {
275            return mDone;
276        }
277    }
278
279    private ByteBuffer makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
280            int dataLength) {
281        int sampleSizeInBytes = AudioFormat.getBytesPerSample(audioFormat);
282        int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
283        short blockAlign = (short) (sampleSizeInBytes * channelCount);
284        short bitsPerSample = (short) (sampleSizeInBytes * 8);
285
286        byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
287        ByteBuffer header = ByteBuffer.wrap(headerBuf);
288        header.order(ByteOrder.LITTLE_ENDIAN);
289
290        header.put(new byte[]{ 'R', 'I', 'F', 'F' });
291        header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
292        header.put(new byte[]{ 'W', 'A', 'V', 'E' });
293        header.put(new byte[]{ 'f', 'm', 't', ' ' });
294        header.putInt(16);  // size of fmt chunk
295        header.putShort(WAV_FORMAT_PCM);
296        header.putShort((short) channelCount);
297        header.putInt(sampleRateInHz);
298        header.putInt(byteRate);
299        header.putShort(blockAlign);
300        header.putShort(bitsPerSample);
301        header.put(new byte[]{ 'd', 'a', 't', 'a' });
302        header.putInt(dataLength);
303        header.flip();
304
305        return header;
306    }
307}
308