PlaybackSynthesisCallback.java revision 50e657bb2d005568f5dd8bc1d904d07b0d94018f
150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert/*
250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * Copyright (C) 2011 The Android Open Source Project
350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert *
450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * Licensed under the Apache License, Version 2.0 (the "License"); you may not
550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * use this file except in compliance with the License. You may obtain a copy of
650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * the License at
750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert *
850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * http://www.apache.org/licenses/LICENSE-2.0
950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert *
1050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * Unless required by applicable law or agreed to in writing, software
1150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
1250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * License for the specific language governing permissions and limitations under
1450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * the License.
1550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert */
1650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringertpackage android.speech.tts;
1750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
1850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringertimport android.media.AudioFormat;
1950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringertimport android.media.AudioTrack;
2050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringertimport android.util.Log;
2150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
2250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert/**
2350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert * Speech synthesis request that plays the audio as it is received.
2450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert */
2550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringertclass PlaybackSynthesisRequest extends SynthesisRequest {
2650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
2750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private static final String TAG = "PlaybackSynthesisRequest";
2850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private static final boolean DBG = false;
2950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
3050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;
3150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
3250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    /**
3350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * Audio stream type. Must be one of the STREAM_ contants defined in
3450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * {@link android.media.AudioManager}.
3550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     */
3650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private final int mStreamType;
3750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
3850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    /**
3950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * Volume, in the range [0.0f, 1.0f]. The default value is
4050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * {@link TextToSpeech.Engine#DEFAULT_VOLUME} (1.0f).
4150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     */
4250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private final float mVolume;
4350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
4450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    /**
4550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * Left/right position of the audio, in the range [-1.0f, 1.0f].
4650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     * The default value is {@link TextToSpeech.Engine#DEFAULT_PAN} (0.0f).
4750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert     */
4850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private final float mPan;
4950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
5050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private final Object mStateLock = new Object();
5150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private AudioTrack mAudioTrack = null;
5250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private boolean mStopped = false;
5350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
5450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    PlaybackSynthesisRequest(String text, int streamType, float volume, float pan) {
5550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        super(text);
5650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        mStreamType = streamType;
5750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        mVolume = volume;
5850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        mPan = pan;
5950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
6050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
6150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    @Override
6250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    void stop() {
6350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) Log.d(TAG, "stop()");
6450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        synchronized (mStateLock) {
6550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mStopped = true;
6650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            cleanUp();
6750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
6850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
6950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
7050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private void cleanUp() {
7150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) Log.d(TAG, "cleanUp()");
7250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (mAudioTrack != null) {
7350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mAudioTrack.flush();
7450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mAudioTrack.stop();
7550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            // TODO: do we need to wait for playback to finish before releasing?
7650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mAudioTrack.release();
7750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mAudioTrack = null;
7850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
7950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
8050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
8150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    // TODO: add a thread that writes to the AudioTrack?
8250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    @Override
8350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
8450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) {
8550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            Log.d(TAG, "start(" + sampleRateInHz + "," + audioFormat
8650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                    + "," + channelCount + ")");
8750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
8850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
8950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        int channelConfig;
9050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (channelCount == 1) {
9150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            channelConfig = AudioFormat.CHANNEL_OUT_MONO;
9250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        } else if (channelCount == 2){
9350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
9450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        } else {
9550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            Log.e(TAG, "Unsupported number of channels: " + channelCount);
9650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            return TextToSpeech.ERROR;
9750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
9850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
9950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        int minBufferSizeInBytes
10050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
10150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
10250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
10350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        synchronized (mStateLock) {
10450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mStopped) {
10550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                if (DBG) Log.d(TAG, "Request has been aborted.");
10650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
10750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
10850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mAudioTrack != null) {
10950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                Log.e(TAG, "start() called twice");
11050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                cleanUp();
11150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
11250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
11350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
11450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, audioFormat,
11550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                    bufferSizeInBytes, AudioTrack.MODE_STREAM);
11650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
11750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                cleanUp();
11850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
11950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
12050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
12150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            setupVolume();
12250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
12350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
12450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        return TextToSpeech.SUCCESS;
12550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
12650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
12750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private void setupVolume() {
12850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        float vol = clip(mVolume, 0.0f, 1.0f);
12950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        float panning = clip(mPan, -1.0f, 1.0f);
13050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        float volLeft = vol;
13150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        float volRight = vol;
13250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (panning > 0.0f) {
13350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            volLeft *= (1.0f - panning);
13450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        } else if (panning < 0.0f) {
13550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            volRight *= (1.0f + panning);
13650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
13750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
13850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (mAudioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
13950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            Log.e(TAG, "Failed to set volume");
14050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
14150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
14250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
14350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    private float clip(float value, float min, float max) {
14450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        return value > max ? max : (value < min ? min : value);
14550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
14650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
14750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    @Override
14850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    public int audioAvailable(byte[] buffer, int offset, int length) {
14950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) {
15050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            Log.d(TAG, "audioAvailable(byte[" + buffer.length + "],"
15150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                    + offset + "," + length + "), thread ID=" + android.os.Process.myTid());
15250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
15350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        synchronized (mStateLock) {
15450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mStopped) {
15550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                if (DBG) Log.d(TAG, "Request has been aborted.");
15650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
15750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
15850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mAudioTrack == null) {
15950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                Log.e(TAG, "audioAvailable(): Not started");
16050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
16150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
16250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            int playState = mAudioTrack.getPlayState();
16350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (playState == AudioTrack.PLAYSTATE_STOPPED) {
16450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                if (DBG) Log.d(TAG, "AudioTrack stopped, restarting");
16550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                mAudioTrack.play();
16650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
16750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            // TODO: loop until all data is written?
16850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (DBG) Log.d(TAG, "AudioTrack.write()");
16950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            int count = mAudioTrack.write(buffer, offset, length);
17050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (DBG) Log.d(TAG, "AudioTrack.write() returned " + count);
17150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (count < 0) {
17250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                Log.e(TAG, "Writing to AudioTrack failed: " + count);
17350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                cleanUp();
17450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
17550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            } else {
17650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.SUCCESS;
17750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
17850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
17950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
18050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert
18150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    @Override
18250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    public int done() {
18350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        if (DBG) Log.d(TAG, "done()");
18450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        synchronized (mStateLock) {
18550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mStopped) {
18650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                if (DBG) Log.d(TAG, "Request has been aborted.");
18750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
18850e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
18950e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            if (mAudioTrack == null) {
19050e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                Log.e(TAG, "done(): Not started");
19150e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert                return TextToSpeech.ERROR;
19250e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            }
19350e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert            cleanUp();
19450e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        }
19550e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert        return TextToSpeech.SUCCESS;
19650e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert    }
19750e657bb2d005568f5dd8bc1d904d07b0d94018fBjorn Bringert}