1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "WAVExtractor"
19#include <utils/Log.h>
20
21#include "include/WAVExtractor.h"
22
23#include <media/stagefright/foundation/ADebug.h>
24#include <media/stagefright/DataSource.h>
25#include <media/stagefright/MediaBufferGroup.h>
26#include <media/stagefright/MediaDefs.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaSource.h>
29#include <media/stagefright/MetaData.h>
30#include <utils/String8.h>
31#include <cutils/bitops.h>
32
33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0
34
35namespace android {
36
37enum {
38    WAVE_FORMAT_PCM        = 0x0001,
39    WAVE_FORMAT_ALAW       = 0x0006,
40    WAVE_FORMAT_MULAW      = 0x0007,
41    WAVE_FORMAT_EXTENSIBLE = 0xFFFE
42};
43
44static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
45
46
47static uint32_t U32_LE_AT(const uint8_t *ptr) {
48    return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
49}
50
51static uint16_t U16_LE_AT(const uint8_t *ptr) {
52    return ptr[1] << 8 | ptr[0];
53}
54
55struct WAVSource : public MediaSource {
56    WAVSource(
57            const sp<DataSource> &dataSource,
58            const sp<MetaData> &meta,
59            uint16_t waveFormat,
60            int32_t bitsPerSample,
61            off64_t offset, size_t size);
62
63    virtual status_t start(MetaData *params = NULL);
64    virtual status_t stop();
65    virtual sp<MetaData> getFormat();
66
67    virtual status_t read(
68            MediaBuffer **buffer, const ReadOptions *options = NULL);
69
70protected:
71    virtual ~WAVSource();
72
73private:
74    static const size_t kMaxFrameSize;
75
76    sp<DataSource> mDataSource;
77    sp<MetaData> mMeta;
78    uint16_t mWaveFormat;
79    int32_t mSampleRate;
80    int32_t mNumChannels;
81    int32_t mBitsPerSample;
82    off64_t mOffset;
83    size_t mSize;
84    bool mStarted;
85    MediaBufferGroup *mGroup;
86    off64_t mCurrentPos;
87
88    WAVSource(const WAVSource &);
89    WAVSource &operator=(const WAVSource &);
90};
91
92WAVExtractor::WAVExtractor(const sp<DataSource> &source)
93    : mDataSource(source),
94      mValidFormat(false),
95      mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
96    mInitCheck = init();
97}
98
99WAVExtractor::~WAVExtractor() {
100}
101
102sp<MetaData> WAVExtractor::getMetaData() {
103    sp<MetaData> meta = new MetaData;
104
105    if (mInitCheck != OK) {
106        return meta;
107    }
108
109    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
110
111    return meta;
112}
113
114size_t WAVExtractor::countTracks() {
115    return mInitCheck == OK ? 1 : 0;
116}
117
118sp<MediaSource> WAVExtractor::getTrack(size_t index) {
119    if (mInitCheck != OK || index > 0) {
120        return NULL;
121    }
122
123    return new WAVSource(
124            mDataSource, mTrackMeta,
125            mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
126}
127
128sp<MetaData> WAVExtractor::getTrackMetaData(
129        size_t index, uint32_t flags) {
130    if (mInitCheck != OK || index > 0) {
131        return NULL;
132    }
133
134    return mTrackMeta;
135}
136
137status_t WAVExtractor::init() {
138    uint8_t header[12];
139    if (mDataSource->readAt(
140                0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
141        return NO_INIT;
142    }
143
144    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
145        return NO_INIT;
146    }
147
148    size_t totalSize = U32_LE_AT(&header[4]);
149
150    off64_t offset = 12;
151    size_t remainingSize = totalSize;
152    while (remainingSize >= 8) {
153        uint8_t chunkHeader[8];
154        if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
155            return NO_INIT;
156        }
157
158        remainingSize -= 8;
159        offset += 8;
160
161        uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
162
163        if (chunkSize > remainingSize) {
164            return NO_INIT;
165        }
166
167        if (!memcmp(chunkHeader, "fmt ", 4)) {
168            if (chunkSize < 16) {
169                return NO_INIT;
170            }
171
172            uint8_t formatSpec[40];
173            if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
174                return NO_INIT;
175            }
176
177            mWaveFormat = U16_LE_AT(formatSpec);
178            if (mWaveFormat != WAVE_FORMAT_PCM
179                    && mWaveFormat != WAVE_FORMAT_ALAW
180                    && mWaveFormat != WAVE_FORMAT_MULAW
181                    && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
182                return ERROR_UNSUPPORTED;
183            }
184
185            uint8_t fmtSize = 16;
186            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
187                fmtSize = 40;
188            }
189            if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
190                return NO_INIT;
191            }
192
193            mNumChannels = U16_LE_AT(&formatSpec[2]);
194            if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
195                if (mNumChannels != 1 && mNumChannels != 2) {
196                    ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
197                            mNumChannels);
198                }
199            } else {
200                if (mNumChannels < 1 && mNumChannels > 8) {
201                    return ERROR_UNSUPPORTED;
202                }
203            }
204
205            mSampleRate = U32_LE_AT(&formatSpec[4]);
206
207            if (mSampleRate == 0) {
208                return ERROR_MALFORMED;
209            }
210
211            mBitsPerSample = U16_LE_AT(&formatSpec[14]);
212
213            if (mWaveFormat == WAVE_FORMAT_PCM
214                    || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
215                if (mBitsPerSample != 8 && mBitsPerSample != 16
216                    && mBitsPerSample != 24) {
217                    return ERROR_UNSUPPORTED;
218                }
219            } else {
220                CHECK(mWaveFormat == WAVE_FORMAT_MULAW
221                        || mWaveFormat == WAVE_FORMAT_ALAW);
222                if (mBitsPerSample != 8) {
223                    return ERROR_UNSUPPORTED;
224                }
225            }
226
227            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
228                uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
229                if (validBitsPerSample != mBitsPerSample) {
230                    if (validBitsPerSample != 0) {
231                        ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
232                                validBitsPerSample, mBitsPerSample);
233                        return ERROR_UNSUPPORTED;
234                    } else {
235                        // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
236                        // writers don't correctly set the valid bits value, and leave it at 0.
237                        ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
238                    }
239                }
240
241                mChannelMask = U32_LE_AT(&formatSpec[20]);
242                ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
243                if ((mChannelMask >> 18) != 0) {
244                    ALOGE("invalid channel mask 0x%x", mChannelMask);
245                    return ERROR_MALFORMED;
246                }
247
248                if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
249                        && (popcount(mChannelMask) != mNumChannels)) {
250                    ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
251                            popcount(mChannelMask), mChannelMask);
252                    return ERROR_MALFORMED;
253                }
254
255                // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
256                // the sample format, using the same definitions as a regular WAV header
257                mWaveFormat = U16_LE_AT(&formatSpec[24]);
258                if (mWaveFormat != WAVE_FORMAT_PCM
259                        && mWaveFormat != WAVE_FORMAT_ALAW
260                        && mWaveFormat != WAVE_FORMAT_MULAW) {
261                    return ERROR_UNSUPPORTED;
262                }
263                if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
264                    ALOGE("unsupported GUID");
265                    return ERROR_UNSUPPORTED;
266                }
267            }
268
269            mValidFormat = true;
270        } else if (!memcmp(chunkHeader, "data", 4)) {
271            if (mValidFormat) {
272                mDataOffset = offset;
273                mDataSize = chunkSize;
274
275                mTrackMeta = new MetaData;
276
277                switch (mWaveFormat) {
278                    case WAVE_FORMAT_PCM:
279                        mTrackMeta->setCString(
280                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
281                        break;
282                    case WAVE_FORMAT_ALAW:
283                        mTrackMeta->setCString(
284                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
285                        break;
286                    default:
287                        CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
288                        mTrackMeta->setCString(
289                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
290                        break;
291                }
292
293                mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
294                mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
295                mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
296
297                size_t bytesPerSample = mBitsPerSample >> 3;
298
299                int64_t durationUs =
300                    1000000LL * (mDataSize / (mNumChannels * bytesPerSample))
301                        / mSampleRate;
302
303                mTrackMeta->setInt64(kKeyDuration, durationUs);
304
305                return OK;
306            }
307        }
308
309        offset += chunkSize;
310    }
311
312    return NO_INIT;
313}
314
315const size_t WAVSource::kMaxFrameSize = 32768;
316
317WAVSource::WAVSource(
318        const sp<DataSource> &dataSource,
319        const sp<MetaData> &meta,
320        uint16_t waveFormat,
321        int32_t bitsPerSample,
322        off64_t offset, size_t size)
323    : mDataSource(dataSource),
324      mMeta(meta),
325      mWaveFormat(waveFormat),
326      mSampleRate(0),
327      mNumChannels(0),
328      mBitsPerSample(bitsPerSample),
329      mOffset(offset),
330      mSize(size),
331      mStarted(false),
332      mGroup(NULL) {
333    CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
334    CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
335
336    mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
337}
338
339WAVSource::~WAVSource() {
340    if (mStarted) {
341        stop();
342    }
343}
344
345status_t WAVSource::start(MetaData *params) {
346    ALOGV("WAVSource::start");
347
348    CHECK(!mStarted);
349
350    mGroup = new MediaBufferGroup;
351    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
352
353    if (mBitsPerSample == 8) {
354        // As a temporary buffer for 8->16 bit conversion.
355        mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
356    }
357
358    mCurrentPos = mOffset;
359
360    mStarted = true;
361
362    return OK;
363}
364
365status_t WAVSource::stop() {
366    ALOGV("WAVSource::stop");
367
368    CHECK(mStarted);
369
370    delete mGroup;
371    mGroup = NULL;
372
373    mStarted = false;
374
375    return OK;
376}
377
378sp<MetaData> WAVSource::getFormat() {
379    ALOGV("WAVSource::getFormat");
380
381    return mMeta;
382}
383
384status_t WAVSource::read(
385        MediaBuffer **out, const ReadOptions *options) {
386    *out = NULL;
387
388    int64_t seekTimeUs;
389    ReadOptions::SeekMode mode;
390    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
391        int64_t pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
392        if (pos > mSize) {
393            pos = mSize;
394        }
395        mCurrentPos = pos + mOffset;
396    }
397
398    MediaBuffer *buffer;
399    status_t err = mGroup->acquire_buffer(&buffer);
400    if (err != OK) {
401        return err;
402    }
403
404    size_t maxBytesToRead =
405        mBitsPerSample == 8 ? kMaxFrameSize / 2 : kMaxFrameSize;
406
407    size_t maxBytesAvailable =
408        (mCurrentPos - mOffset >= (off64_t)mSize)
409            ? 0 : mSize - (mCurrentPos - mOffset);
410
411    if (maxBytesToRead > maxBytesAvailable) {
412        maxBytesToRead = maxBytesAvailable;
413    }
414
415    ssize_t n = mDataSource->readAt(
416            mCurrentPos, buffer->data(),
417            maxBytesToRead);
418
419    if (n <= 0) {
420        buffer->release();
421        buffer = NULL;
422
423        return ERROR_END_OF_STREAM;
424    }
425
426    buffer->set_range(0, n);
427
428    if (mWaveFormat == WAVE_FORMAT_PCM) {
429        if (mBitsPerSample == 8) {
430            // Convert 8-bit unsigned samples to 16-bit signed.
431
432            MediaBuffer *tmp;
433            CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
434
435            // The new buffer holds the sample number of samples, but each
436            // one is 2 bytes wide.
437            tmp->set_range(0, 2 * n);
438
439            int16_t *dst = (int16_t *)tmp->data();
440            const uint8_t *src = (const uint8_t *)buffer->data();
441            ssize_t numBytes = n;
442
443            while (numBytes-- > 0) {
444                *dst++ = ((int16_t)(*src) - 128) * 256;
445                ++src;
446            }
447
448            buffer->release();
449            buffer = tmp;
450        } else if (mBitsPerSample == 24) {
451            // Convert 24-bit signed samples to 16-bit signed.
452
453            const uint8_t *src =
454                (const uint8_t *)buffer->data() + buffer->range_offset();
455            int16_t *dst = (int16_t *)src;
456
457            size_t numSamples = buffer->range_length() / 3;
458            for (size_t i = 0; i < numSamples; ++i) {
459                int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
460                x = (x << 8) >> 8;  // sign extension
461
462                x = x >> 8;
463                *dst++ = (int16_t)x;
464                src += 3;
465            }
466
467            buffer->set_range(buffer->range_offset(), 2 * numSamples);
468        }
469    }
470
471    size_t bytesPerSample = mBitsPerSample >> 3;
472
473    buffer->meta_data()->setInt64(
474            kKeyTime,
475            1000000LL * (mCurrentPos - mOffset)
476                / (mNumChannels * bytesPerSample) / mSampleRate);
477
478    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
479    mCurrentPos += n;
480
481    *out = buffer;
482
483    return OK;
484}
485
486////////////////////////////////////////////////////////////////////////////////
487
488bool SniffWAV(
489        const sp<DataSource> &source, String8 *mimeType, float *confidence,
490        sp<AMessage> *) {
491    char header[12];
492    if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
493        return false;
494    }
495
496    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
497        return false;
498    }
499
500    sp<MediaExtractor> extractor = new WAVExtractor(source);
501    if (extractor->countTracks() == 0) {
502        return false;
503    }
504
505    *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
506    *confidence = 0.3f;
507
508    return true;
509}
510
511}  // namespace android
512