WAVExtractor.cpp revision 78bd91b15ee8ea5aa2ab5a8cad7e892cb2d01c1b
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "WAVExtractor"
19#include <utils/Log.h>
20
21#include "include/WAVExtractor.h"
22
23#include <audio_utils/primitives.h>
24#include <media/stagefright/foundation/ADebug.h>
25#include <media/stagefright/DataSource.h>
26#include <media/stagefright/MediaBufferGroup.h>
27#include <media/stagefright/MediaDefs.h>
28#include <media/stagefright/MediaErrors.h>
29#include <media/stagefright/MediaSource.h>
30#include <media/stagefright/MetaData.h>
31#include <utils/String8.h>
32#include <cutils/bitops.h>
33
34#define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35
36namespace android {
37
38enum {
39    WAVE_FORMAT_PCM        = 0x0001,
40    WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41    WAVE_FORMAT_ALAW       = 0x0006,
42    WAVE_FORMAT_MULAW      = 0x0007,
43    WAVE_FORMAT_MSGSM      = 0x0031,
44    WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45};
46
47static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48
49
50static uint32_t U32_LE_AT(const uint8_t *ptr) {
51    return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52}
53
54static uint16_t U16_LE_AT(const uint8_t *ptr) {
55    return ptr[1] << 8 | ptr[0];
56}
57
58struct WAVSource : public MediaSource {
59    WAVSource(
60            const sp<DataSource> &dataSource,
61            const sp<MetaData> &meta,
62            uint16_t waveFormat,
63            int32_t bitsPerSample,
64            off64_t offset, size_t size);
65
66    virtual status_t start(MetaData *params = NULL);
67    virtual status_t stop();
68    virtual sp<MetaData> getFormat();
69
70    virtual status_t read(
71            MediaBuffer **buffer, const ReadOptions *options = NULL);
72
73protected:
74    virtual ~WAVSource();
75
76private:
77    static const size_t kMaxFrameSize;
78
79    sp<DataSource> mDataSource;
80    sp<MetaData> mMeta;
81    uint16_t mWaveFormat;
82    int32_t mSampleRate;
83    int32_t mNumChannels;
84    int32_t mBitsPerSample;
85    off64_t mOffset;
86    size_t mSize;
87    bool mStarted;
88    MediaBufferGroup *mGroup;
89    off64_t mCurrentPos;
90
91    WAVSource(const WAVSource &);
92    WAVSource &operator=(const WAVSource &);
93};
94
95WAVExtractor::WAVExtractor(const sp<DataSource> &source)
96    : mDataSource(source),
97      mValidFormat(false),
98      mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
99    mInitCheck = init();
100}
101
102WAVExtractor::~WAVExtractor() {
103}
104
105sp<MetaData> WAVExtractor::getMetaData() {
106    sp<MetaData> meta = new MetaData;
107
108    if (mInitCheck != OK) {
109        return meta;
110    }
111
112    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
113
114    return meta;
115}
116
117size_t WAVExtractor::countTracks() {
118    return mInitCheck == OK ? 1 : 0;
119}
120
121sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
122    if (mInitCheck != OK || index > 0) {
123        return NULL;
124    }
125
126    return new WAVSource(
127            mDataSource, mTrackMeta,
128            mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
129}
130
131sp<MetaData> WAVExtractor::getTrackMetaData(
132        size_t index, uint32_t /* flags */) {
133    if (mInitCheck != OK || index > 0) {
134        return NULL;
135    }
136
137    return mTrackMeta;
138}
139
140status_t WAVExtractor::init() {
141    uint8_t header[12];
142    if (mDataSource->readAt(
143                0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
144        return NO_INIT;
145    }
146
147    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
148        return NO_INIT;
149    }
150
151    size_t totalSize = U32_LE_AT(&header[4]);
152
153    off64_t offset = 12;
154    size_t remainingSize = totalSize;
155    while (remainingSize >= 8) {
156        uint8_t chunkHeader[8];
157        if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
158            return NO_INIT;
159        }
160
161        remainingSize -= 8;
162        offset += 8;
163
164        uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
165
166        if (chunkSize > remainingSize) {
167            return NO_INIT;
168        }
169
170        if (!memcmp(chunkHeader, "fmt ", 4)) {
171            if (chunkSize < 16) {
172                return NO_INIT;
173            }
174
175            uint8_t formatSpec[40];
176            if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
177                return NO_INIT;
178            }
179
180            mWaveFormat = U16_LE_AT(formatSpec);
181            if (mWaveFormat != WAVE_FORMAT_PCM
182                    && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
183                    && mWaveFormat != WAVE_FORMAT_ALAW
184                    && mWaveFormat != WAVE_FORMAT_MULAW
185                    && mWaveFormat != WAVE_FORMAT_MSGSM
186                    && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
187                return ERROR_UNSUPPORTED;
188            }
189
190            uint8_t fmtSize = 16;
191            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
192                fmtSize = 40;
193            }
194            if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
195                return NO_INIT;
196            }
197
198            mNumChannels = U16_LE_AT(&formatSpec[2]);
199
200            if (mNumChannels < 1 || mNumChannels > 8) {
201                ALOGE("Unsupported number of channels (%d)", mNumChannels);
202                return ERROR_UNSUPPORTED;
203            }
204
205            if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
206                if (mNumChannels != 1 && mNumChannels != 2) {
207                    ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
208                            mNumChannels);
209                }
210            }
211
212            mSampleRate = U32_LE_AT(&formatSpec[4]);
213
214            if (mSampleRate == 0) {
215                return ERROR_MALFORMED;
216            }
217
218            mBitsPerSample = U16_LE_AT(&formatSpec[14]);
219
220            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
221                uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
222                if (validBitsPerSample != mBitsPerSample) {
223                    if (validBitsPerSample != 0) {
224                        ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
225                                validBitsPerSample, mBitsPerSample);
226                        return ERROR_UNSUPPORTED;
227                    } else {
228                        // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
229                        // writers don't correctly set the valid bits value, and leave it at 0.
230                        ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
231                    }
232                }
233
234                mChannelMask = U32_LE_AT(&formatSpec[20]);
235                ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
236                if ((mChannelMask >> 18) != 0) {
237                    ALOGE("invalid channel mask 0x%x", mChannelMask);
238                    return ERROR_MALFORMED;
239                }
240
241                if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
242                        && (popcount(mChannelMask) != mNumChannels)) {
243                    ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
244                            popcount(mChannelMask), mChannelMask);
245                    return ERROR_MALFORMED;
246                }
247
248                // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
249                // the sample format, using the same definitions as a regular WAV header
250                mWaveFormat = U16_LE_AT(&formatSpec[24]);
251                if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
252                    ALOGE("unsupported GUID");
253                    return ERROR_UNSUPPORTED;
254                }
255            }
256
257            if (mWaveFormat == WAVE_FORMAT_PCM) {
258                if (mBitsPerSample != 8 && mBitsPerSample != 16
259                    && mBitsPerSample != 24 && mBitsPerSample != 32) {
260                    return ERROR_UNSUPPORTED;
261                }
262            } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
263                if (mBitsPerSample != 32) {  // TODO we don't support double
264                    return ERROR_UNSUPPORTED;
265                }
266            }
267            else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
268                if (mBitsPerSample != 0) {
269                    return ERROR_UNSUPPORTED;
270                }
271            } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
272                if (mBitsPerSample != 8) {
273                    return ERROR_UNSUPPORTED;
274                }
275            } else {
276                return ERROR_UNSUPPORTED;
277            }
278
279            mValidFormat = true;
280        } else if (!memcmp(chunkHeader, "data", 4)) {
281            if (mValidFormat) {
282                mDataOffset = offset;
283                mDataSize = chunkSize;
284
285                mTrackMeta = new MetaData;
286
287                switch (mWaveFormat) {
288                    case WAVE_FORMAT_PCM:
289                    case WAVE_FORMAT_IEEE_FLOAT:
290                        mTrackMeta->setCString(
291                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
292                        break;
293                    case WAVE_FORMAT_ALAW:
294                        mTrackMeta->setCString(
295                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
296                        break;
297                    case WAVE_FORMAT_MSGSM:
298                        mTrackMeta->setCString(
299                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
300                        break;
301                    default:
302                        CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
303                        mTrackMeta->setCString(
304                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
305                        break;
306                }
307
308                mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
309                mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
310                mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
311                mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
312
313                int64_t durationUs = 0;
314                if (mWaveFormat == WAVE_FORMAT_MSGSM) {
315                    // 65 bytes decode to 320 8kHz samples
316                    durationUs =
317                        1000000LL * (mDataSize / 65 * 320) / 8000;
318                } else {
319                    size_t bytesPerSample = mBitsPerSample >> 3;
320
321                    if (!bytesPerSample || !mNumChannels)
322                        return ERROR_MALFORMED;
323
324                    size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
325
326                    if (!mSampleRate)
327                        return ERROR_MALFORMED;
328
329                    durationUs =
330                        1000000LL * num_samples / mSampleRate;
331                }
332
333                mTrackMeta->setInt64(kKeyDuration, durationUs);
334
335                return OK;
336            }
337        }
338
339        offset += chunkSize;
340    }
341
342    return NO_INIT;
343}
344
345const size_t WAVSource::kMaxFrameSize = 32768;
346
347WAVSource::WAVSource(
348        const sp<DataSource> &dataSource,
349        const sp<MetaData> &meta,
350        uint16_t waveFormat,
351        int32_t bitsPerSample,
352        off64_t offset, size_t size)
353    : mDataSource(dataSource),
354      mMeta(meta),
355      mWaveFormat(waveFormat),
356      mSampleRate(0),
357      mNumChannels(0),
358      mBitsPerSample(bitsPerSample),
359      mOffset(offset),
360      mSize(size),
361      mStarted(false),
362      mGroup(NULL) {
363    CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
364    CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
365
366    mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
367}
368
369WAVSource::~WAVSource() {
370    if (mStarted) {
371        stop();
372    }
373}
374
375status_t WAVSource::start(MetaData * /* params */) {
376    ALOGV("WAVSource::start");
377
378    CHECK(!mStarted);
379
380    mGroup = new MediaBufferGroup;
381    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
382
383    if (mBitsPerSample == 8) {
384        // As a temporary buffer for 8->16 bit conversion.
385        mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
386    }
387
388    mCurrentPos = mOffset;
389
390    mStarted = true;
391
392    return OK;
393}
394
395status_t WAVSource::stop() {
396    ALOGV("WAVSource::stop");
397
398    CHECK(mStarted);
399
400    delete mGroup;
401    mGroup = NULL;
402
403    mStarted = false;
404
405    return OK;
406}
407
408sp<MetaData> WAVSource::getFormat() {
409    ALOGV("WAVSource::getFormat");
410
411    return mMeta;
412}
413
414status_t WAVSource::read(
415        MediaBuffer **out, const ReadOptions *options) {
416    *out = NULL;
417
418    int64_t seekTimeUs;
419    ReadOptions::SeekMode mode;
420    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
421        int64_t pos = 0;
422
423        if (mWaveFormat == WAVE_FORMAT_MSGSM) {
424            // 65 bytes decode to 320 8kHz samples
425            int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
426            int64_t framenumber = samplenumber / 320;
427            pos = framenumber * 65;
428        } else {
429            pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
430        }
431        if (pos > (off64_t)mSize) {
432            pos = mSize;
433        }
434        mCurrentPos = pos + mOffset;
435    }
436
437    MediaBuffer *buffer;
438    status_t err = mGroup->acquire_buffer(&buffer);
439    if (err != OK) {
440        return err;
441    }
442
443    // make sure that maxBytesToRead is multiple of 3, in 24-bit case
444    size_t maxBytesToRead =
445        mBitsPerSample == 8 ? kMaxFrameSize / 2 :
446        (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
447
448    size_t maxBytesAvailable =
449        (mCurrentPos - mOffset >= (off64_t)mSize)
450            ? 0 : mSize - (mCurrentPos - mOffset);
451
452    if (maxBytesToRead > maxBytesAvailable) {
453        maxBytesToRead = maxBytesAvailable;
454    }
455
456    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
457        // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
458        // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
459        if (maxBytesToRead > 1024) {
460            maxBytesToRead = 1024;
461        }
462        maxBytesToRead = (maxBytesToRead / 65) * 65;
463    } else {
464        // read only integral amounts of audio unit frames.
465        const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
466        maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
467    }
468
469    ssize_t n = mDataSource->readAt(
470            mCurrentPos, buffer->data(),
471            maxBytesToRead);
472
473    if (n <= 0) {
474        buffer->release();
475        buffer = NULL;
476
477        return ERROR_END_OF_STREAM;
478    }
479
480    buffer->set_range(0, n);
481
482    // TODO: add capability to return data as float PCM instead of 16 bit PCM.
483    if (mWaveFormat == WAVE_FORMAT_PCM) {
484        if (mBitsPerSample == 8) {
485            // Convert 8-bit unsigned samples to 16-bit signed.
486
487            // Create new buffer with 2 byte wide samples
488            MediaBuffer *tmp;
489            CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
490            tmp->set_range(0, 2 * n);
491
492            memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
493            buffer->release();
494            buffer = tmp;
495        } else if (mBitsPerSample == 24) {
496            // Convert 24-bit signed samples to 16-bit signed in place
497            const size_t numSamples = n / 3;
498
499            memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
500            buffer->set_range(0, 2 * numSamples);
501        }  else if (mBitsPerSample == 32) {
502            // Convert 32-bit signed samples to 16-bit signed in place
503            const size_t numSamples = n / 4;
504
505            memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
506            buffer->set_range(0, 2 * numSamples);
507        }
508    } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
509        if (mBitsPerSample == 32) {
510            // Convert 32-bit float samples to 16-bit signed in place
511            const size_t numSamples = n / 4;
512
513            memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
514            buffer->set_range(0, 2 * numSamples);
515        }
516    }
517
518    int64_t timeStampUs = 0;
519
520    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
521        timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
522    } else {
523        size_t bytesPerSample = mBitsPerSample >> 3;
524        timeStampUs = 1000000LL * (mCurrentPos - mOffset)
525                / (mNumChannels * bytesPerSample) / mSampleRate;
526    }
527
528    buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
529
530    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
531    mCurrentPos += n;
532
533    *out = buffer;
534
535    return OK;
536}
537
538////////////////////////////////////////////////////////////////////////////////
539
540bool SniffWAV(
541        const sp<DataSource> &source, String8 *mimeType, float *confidence,
542        sp<AMessage> *) {
543    char header[12];
544    if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
545        return false;
546    }
547
548    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
549        return false;
550    }
551
552    sp<MediaExtractor> extractor = new WAVExtractor(source);
553    if (extractor->countTracks() == 0) {
554        return false;
555    }
556
557    *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
558    *confidence = 0.3f;
559
560    return true;
561}
562
563}  // namespace android
564