1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16package android.speech.tts;
17
18import android.media.AudioFormat;
19import android.os.FileUtils;
20import android.util.Log;
21
22import java.io.File;
23import java.io.IOException;
24import java.io.RandomAccessFile;
25import java.nio.ByteBuffer;
26import java.nio.ByteOrder;
27
28/**
29 * Speech synthesis request that writes the audio to a WAV file.
30 */
31class FileSynthesisCallback extends AbstractSynthesisCallback {
32
33    private static final String TAG = "FileSynthesisRequest";
34    private static final boolean DBG = false;
35
36    private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
37
38    private static final int WAV_HEADER_LENGTH = 44;
39    private static final short WAV_FORMAT_PCM = 0x0001;
40
41    private final Object mStateLock = new Object();
42    private final File mFileName;
43    private int mSampleRateInHz;
44    private int mAudioFormat;
45    private int mChannelCount;
46    private RandomAccessFile mFile;
47    private boolean mStopped = false;
48    private boolean mDone = false;
49
50    FileSynthesisCallback(File fileName) {
51        mFileName = fileName;
52    }
53
54    @Override
55    void stop() {
56        synchronized (mStateLock) {
57            mStopped = true;
58            cleanUp();
59        }
60    }
61
62    /**
63     * Must be called while holding the monitor on {@link #mStateLock}.
64     */
65    private void cleanUp() {
66        closeFileAndWidenPermissions();
67        if (mFile != null) {
68            mFileName.delete();
69        }
70    }
71
72    /**
73     * Must be called while holding the monitor on {@link #mStateLock}.
74     */
75    private void closeFileAndWidenPermissions() {
76        try {
77            if (mFile != null) {
78                mFile.close();
79                mFile = null;
80            }
81        } catch (IOException ex) {
82            Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
83        }
84
85        try {
86            // Make the written file readable and writeable by everyone.
87            // This allows the app that requested synthesis to read the file.
88            //
89            // Note that the directory this file was written must have already
90            // been world writeable in order it to have been
91            // written to in the first place.
92            FileUtils.setPermissions(mFileName.getAbsolutePath(), 0666, -1, -1); //-rw-rw-rw
93        } catch (SecurityException se) {
94            Log.e(TAG, "Security exception setting rw permissions on : " + mFileName);
95        }
96    }
97
98    /**
99     * Checks whether a given file exists, and deletes it if it does.
100     */
101    private boolean maybeCleanupExistingFile(File file) {
102        if (file.exists()) {
103            Log.v(TAG, "File " + file + " exists, deleting.");
104            if (!file.delete()) {
105                Log.e(TAG, "Failed to delete " + file);
106                return false;
107            }
108        }
109
110        return true;
111    }
112
113
114    @Override
115    public int getMaxBufferSize() {
116        return MAX_AUDIO_BUFFER_SIZE;
117    }
118
119    @Override
120    boolean isDone() {
121        return mDone;
122    }
123
124    @Override
125    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
126        if (DBG) {
127            Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
128                    + "," + channelCount + ")");
129        }
130        synchronized (mStateLock) {
131            if (mStopped) {
132                if (DBG) Log.d(TAG, "Request has been aborted.");
133                return TextToSpeech.ERROR;
134            }
135            if (mFile != null) {
136                cleanUp();
137                throw new IllegalArgumentException("FileSynthesisRequest.start() called twice");
138            }
139
140            if (!maybeCleanupExistingFile(mFileName)) {
141                return TextToSpeech.ERROR;
142            }
143
144            mSampleRateInHz = sampleRateInHz;
145            mAudioFormat = audioFormat;
146            mChannelCount = channelCount;
147            try {
148                mFile = new RandomAccessFile(mFileName, "rw");
149                // Reserve space for WAV header
150                mFile.write(new byte[WAV_HEADER_LENGTH]);
151                return TextToSpeech.SUCCESS;
152            } catch (IOException ex) {
153                Log.e(TAG, "Failed to open " + mFileName + ": " + ex);
154                cleanUp();
155                return TextToSpeech.ERROR;
156            }
157        }
158    }
159
160    @Override
161    public int audioAvailable(byte[] buffer, int offset, int length) {
162        if (DBG) {
163            Log.d(TAG, "FileSynthesisRequest.audioAvailable(" + buffer + "," + offset
164                    + "," + length + ")");
165        }
166        synchronized (mStateLock) {
167            if (mStopped) {
168                if (DBG) Log.d(TAG, "Request has been aborted.");
169                return TextToSpeech.ERROR;
170            }
171            if (mFile == null) {
172                Log.e(TAG, "File not open");
173                return TextToSpeech.ERROR;
174            }
175            try {
176                mFile.write(buffer, offset, length);
177                return TextToSpeech.SUCCESS;
178            } catch (IOException ex) {
179                Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
180                cleanUp();
181                return TextToSpeech.ERROR;
182            }
183        }
184    }
185
186    @Override
187    public int done() {
188        if (DBG) Log.d(TAG, "FileSynthesisRequest.done()");
189        synchronized (mStateLock) {
190            if (mDone) {
191                if (DBG) Log.d(TAG, "Duplicate call to done()");
192                // This preserves existing behaviour. Earlier, if done was called twice
193                // we'd return ERROR because mFile == null and we'd add to logspam.
194                return TextToSpeech.ERROR;
195            }
196            if (mStopped) {
197                if (DBG) Log.d(TAG, "Request has been aborted.");
198                return TextToSpeech.ERROR;
199            }
200            if (mFile == null) {
201                Log.e(TAG, "File not open");
202                return TextToSpeech.ERROR;
203            }
204            try {
205                // Write WAV header at start of file
206                mFile.seek(0);
207                int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
208                mFile.write(
209                        makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
210                closeFileAndWidenPermissions();
211                mDone = true;
212                return TextToSpeech.SUCCESS;
213            } catch (IOException ex) {
214                Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
215                cleanUp();
216                return TextToSpeech.ERROR;
217            }
218        }
219    }
220
221    @Override
222    public void error() {
223        if (DBG) Log.d(TAG, "FileSynthesisRequest.error()");
224        synchronized (mStateLock) {
225            cleanUp();
226        }
227    }
228
229    private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
230            int dataLength) {
231        // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
232        int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
233        int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
234        short blockAlign = (short) (sampleSizeInBytes * channelCount);
235        short bitsPerSample = (short) (sampleSizeInBytes * 8);
236
237        byte[] headerBuf = new byte[WAV_HEADER_LENGTH];
238        ByteBuffer header = ByteBuffer.wrap(headerBuf);
239        header.order(ByteOrder.LITTLE_ENDIAN);
240
241        header.put(new byte[]{ 'R', 'I', 'F', 'F' });
242        header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
243        header.put(new byte[]{ 'W', 'A', 'V', 'E' });
244        header.put(new byte[]{ 'f', 'm', 't', ' ' });
245        header.putInt(16);  // size of fmt chunk
246        header.putShort(WAV_FORMAT_PCM);
247        header.putShort((short) channelCount);
248        header.putInt(sampleRateInHz);
249        header.putInt(byteRate);
250        header.putShort(blockAlign);
251        header.putShort(bitsPerSample);
252        header.put(new byte[]{ 'd', 'a', 't', 'a' });
253        header.putInt(dataLength);
254
255        return headerBuf;
256    }
257
258}
259