WAVExtractor.cpp revision 9a40167c3dc32fccc72abd96f03df6ea5676793b
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "WAVExtractor"
19#include <utils/Log.h>
20
21#include "include/WAVExtractor.h"
22
23#include <media/stagefright/foundation/ADebug.h>
24#include <media/stagefright/DataSource.h>
25#include <media/stagefright/MediaBufferGroup.h>
26#include <media/stagefright/MediaDefs.h>
27#include <media/stagefright/MediaErrors.h>
28#include <media/stagefright/MediaSource.h>
29#include <media/stagefright/MetaData.h>
30#include <utils/String8.h>
31#include <cutils/bitops.h>
32
33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0
34
35namespace android {
36
37enum {
38    WAVE_FORMAT_PCM        = 0x0001,
39    WAVE_FORMAT_ALAW       = 0x0006,
40    WAVE_FORMAT_MULAW      = 0x0007,
41    WAVE_FORMAT_MSGSM      = 0x0031,
42    WAVE_FORMAT_EXTENSIBLE = 0xFFFE
43};
44
45static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
46
47
48static uint32_t U32_LE_AT(const uint8_t *ptr) {
49    return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
50}
51
52static uint16_t U16_LE_AT(const uint8_t *ptr) {
53    return ptr[1] << 8 | ptr[0];
54}
55
56struct WAVSource : public MediaSource {
57    WAVSource(
58            const sp<DataSource> &dataSource,
59            const sp<MetaData> &meta,
60            uint16_t waveFormat,
61            int32_t bitsPerSample,
62            off64_t offset, size_t size);
63
64    virtual status_t start(MetaData *params = NULL);
65    virtual status_t stop();
66    virtual sp<MetaData> getFormat();
67
68    virtual status_t read(
69            MediaBuffer **buffer, const ReadOptions *options = NULL);
70
71protected:
72    virtual ~WAVSource();
73
74private:
75    static const size_t kMaxFrameSize;
76
77    sp<DataSource> mDataSource;
78    sp<MetaData> mMeta;
79    uint16_t mWaveFormat;
80    int32_t mSampleRate;
81    int32_t mNumChannels;
82    int32_t mBitsPerSample;
83    off64_t mOffset;
84    size_t mSize;
85    bool mStarted;
86    MediaBufferGroup *mGroup;
87    off64_t mCurrentPos;
88
89    WAVSource(const WAVSource &);
90    WAVSource &operator=(const WAVSource &);
91};
92
93WAVExtractor::WAVExtractor(const sp<DataSource> &source)
94    : mDataSource(source),
95      mValidFormat(false),
96      mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
97    mInitCheck = init();
98}
99
100WAVExtractor::~WAVExtractor() {
101}
102
103sp<MetaData> WAVExtractor::getMetaData() {
104    sp<MetaData> meta = new MetaData;
105
106    if (mInitCheck != OK) {
107        return meta;
108    }
109
110    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111
112    return meta;
113}
114
115size_t WAVExtractor::countTracks() {
116    return mInitCheck == OK ? 1 : 0;
117}
118
119sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
120    if (mInitCheck != OK || index > 0) {
121        return NULL;
122    }
123
124    return new WAVSource(
125            mDataSource, mTrackMeta,
126            mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
127}
128
129sp<MetaData> WAVExtractor::getTrackMetaData(
130        size_t index, uint32_t /* flags */) {
131    if (mInitCheck != OK || index > 0) {
132        return NULL;
133    }
134
135    return mTrackMeta;
136}
137
138status_t WAVExtractor::init() {
139    uint8_t header[12];
140    if (mDataSource->readAt(
141                0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
142        return NO_INIT;
143    }
144
145    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
146        return NO_INIT;
147    }
148
149    size_t totalSize = U32_LE_AT(&header[4]);
150
151    off64_t offset = 12;
152    size_t remainingSize = totalSize;
153    while (remainingSize >= 8) {
154        uint8_t chunkHeader[8];
155        if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
156            return NO_INIT;
157        }
158
159        remainingSize -= 8;
160        offset += 8;
161
162        uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
163
164        if (chunkSize > remainingSize) {
165            return NO_INIT;
166        }
167
168        if (!memcmp(chunkHeader, "fmt ", 4)) {
169            if (chunkSize < 16) {
170                return NO_INIT;
171            }
172
173            uint8_t formatSpec[40];
174            if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
175                return NO_INIT;
176            }
177
178            mWaveFormat = U16_LE_AT(formatSpec);
179            if (mWaveFormat != WAVE_FORMAT_PCM
180                    && mWaveFormat != WAVE_FORMAT_ALAW
181                    && mWaveFormat != WAVE_FORMAT_MULAW
182                    && mWaveFormat != WAVE_FORMAT_MSGSM
183                    && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
184                return ERROR_UNSUPPORTED;
185            }
186
187            uint8_t fmtSize = 16;
188            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
189                fmtSize = 40;
190            }
191            if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
192                return NO_INIT;
193            }
194
195            mNumChannels = U16_LE_AT(&formatSpec[2]);
196            if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
197                if (mNumChannels == 0) {
198                    return ERROR_UNSUPPORTED;
199                } else if (mNumChannels != 1 && mNumChannels != 2) {
200                    ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
201                            mNumChannels);
202                }
203            } else {
204                if (mNumChannels < 1 || mNumChannels > 8) {
205                    return ERROR_UNSUPPORTED;
206                }
207            }
208
209            mSampleRate = U32_LE_AT(&formatSpec[4]);
210
211            if (mSampleRate == 0) {
212                return ERROR_MALFORMED;
213            }
214
215            mBitsPerSample = U16_LE_AT(&formatSpec[14]);
216
217            if (mWaveFormat == WAVE_FORMAT_PCM
218                    || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
219                if (mBitsPerSample != 8 && mBitsPerSample != 16
220                    && mBitsPerSample != 24) {
221                    return ERROR_UNSUPPORTED;
222                }
223            } else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
224                if (mBitsPerSample != 0) {
225                    return ERROR_UNSUPPORTED;
226                }
227            } else {
228                CHECK(mWaveFormat == WAVE_FORMAT_MULAW
229                        || mWaveFormat == WAVE_FORMAT_ALAW);
230                if (mBitsPerSample != 8) {
231                    return ERROR_UNSUPPORTED;
232                }
233            }
234
235            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
236                uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
237                if (validBitsPerSample != mBitsPerSample) {
238                    if (validBitsPerSample != 0) {
239                        ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
240                                validBitsPerSample, mBitsPerSample);
241                        return ERROR_UNSUPPORTED;
242                    } else {
243                        // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
244                        // writers don't correctly set the valid bits value, and leave it at 0.
245                        ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
246                    }
247                }
248
249                mChannelMask = U32_LE_AT(&formatSpec[20]);
250                ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
251                if ((mChannelMask >> 18) != 0) {
252                    ALOGE("invalid channel mask 0x%x", mChannelMask);
253                    return ERROR_MALFORMED;
254                }
255
256                if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
257                        && (popcount(mChannelMask) != mNumChannels)) {
258                    ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
259                            popcount(mChannelMask), mChannelMask);
260                    return ERROR_MALFORMED;
261                }
262
263                // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
264                // the sample format, using the same definitions as a regular WAV header
265                mWaveFormat = U16_LE_AT(&formatSpec[24]);
266                if (mWaveFormat != WAVE_FORMAT_PCM
267                        && mWaveFormat != WAVE_FORMAT_ALAW
268                        && mWaveFormat != WAVE_FORMAT_MULAW) {
269                    return ERROR_UNSUPPORTED;
270                }
271                if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
272                    ALOGE("unsupported GUID");
273                    return ERROR_UNSUPPORTED;
274                }
275            }
276
277            mValidFormat = true;
278        } else if (!memcmp(chunkHeader, "data", 4)) {
279            if (mValidFormat) {
280                mDataOffset = offset;
281                mDataSize = chunkSize;
282
283                mTrackMeta = new MetaData;
284
285                switch (mWaveFormat) {
286                    case WAVE_FORMAT_PCM:
287                        mTrackMeta->setCString(
288                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
289                        break;
290                    case WAVE_FORMAT_ALAW:
291                        mTrackMeta->setCString(
292                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
293                        break;
294                    case WAVE_FORMAT_MSGSM:
295                        mTrackMeta->setCString(
296                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
297                        break;
298                    default:
299                        CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
300                        mTrackMeta->setCString(
301                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
302                        break;
303                }
304
305                mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
306                mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
307                mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
308
309                int64_t durationUs = 0;
310                if (mWaveFormat == WAVE_FORMAT_MSGSM) {
311                    // 65 bytes decode to 320 8kHz samples
312                    durationUs =
313                        1000000LL * (mDataSize / 65 * 320) / 8000;
314                } else {
315                    size_t bytesPerSample = mBitsPerSample >> 3;
316
317                    if (!bytesPerSample || !mNumChannels)
318                        return ERROR_MALFORMED;
319
320                    size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
321
322                    if (!mSampleRate)
323                        return ERROR_MALFORMED;
324
325                    durationUs =
326                        1000000LL * num_samples / mSampleRate;
327                }
328
329                mTrackMeta->setInt64(kKeyDuration, durationUs);
330
331                return OK;
332            }
333        }
334
335        offset += chunkSize;
336    }
337
338    return NO_INIT;
339}
340
341const size_t WAVSource::kMaxFrameSize = 32768;
342
343WAVSource::WAVSource(
344        const sp<DataSource> &dataSource,
345        const sp<MetaData> &meta,
346        uint16_t waveFormat,
347        int32_t bitsPerSample,
348        off64_t offset, size_t size)
349    : mDataSource(dataSource),
350      mMeta(meta),
351      mWaveFormat(waveFormat),
352      mSampleRate(0),
353      mNumChannels(0),
354      mBitsPerSample(bitsPerSample),
355      mOffset(offset),
356      mSize(size),
357      mStarted(false),
358      mGroup(NULL) {
359    CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
360    CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
361
362    mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
363}
364
365WAVSource::~WAVSource() {
366    if (mStarted) {
367        stop();
368    }
369}
370
371status_t WAVSource::start(MetaData * /* params */) {
372    ALOGV("WAVSource::start");
373
374    CHECK(!mStarted);
375
376    mGroup = new MediaBufferGroup;
377    mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
378
379    if (mBitsPerSample == 8) {
380        // As a temporary buffer for 8->16 bit conversion.
381        mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
382    }
383
384    mCurrentPos = mOffset;
385
386    mStarted = true;
387
388    return OK;
389}
390
391status_t WAVSource::stop() {
392    ALOGV("WAVSource::stop");
393
394    CHECK(mStarted);
395
396    delete mGroup;
397    mGroup = NULL;
398
399    mStarted = false;
400
401    return OK;
402}
403
404sp<MetaData> WAVSource::getFormat() {
405    ALOGV("WAVSource::getFormat");
406
407    return mMeta;
408}
409
410status_t WAVSource::read(
411        MediaBuffer **out, const ReadOptions *options) {
412    *out = NULL;
413
414    int64_t seekTimeUs;
415    ReadOptions::SeekMode mode;
416    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
417        int64_t pos = 0;
418
419        if (mWaveFormat == WAVE_FORMAT_MSGSM) {
420            // 65 bytes decode to 320 8kHz samples
421            int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
422            int64_t framenumber = samplenumber / 320;
423            pos = framenumber * 65;
424        } else {
425            pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
426        }
427        if (pos > (off64_t)mSize) {
428            pos = mSize;
429        }
430        mCurrentPos = pos + mOffset;
431    }
432
433    MediaBuffer *buffer;
434    status_t err = mGroup->acquire_buffer(&buffer);
435    if (err != OK) {
436        return err;
437    }
438
439    // make sure that maxBytesToRead is multiple of 3, in 24-bit case
440    size_t maxBytesToRead =
441        mBitsPerSample == 8 ? kMaxFrameSize / 2 :
442        (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
443
444    size_t maxBytesAvailable =
445        (mCurrentPos - mOffset >= (off64_t)mSize)
446            ? 0 : mSize - (mCurrentPos - mOffset);
447
448    if (maxBytesToRead > maxBytesAvailable) {
449        maxBytesToRead = maxBytesAvailable;
450    }
451
452    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
453        // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
454        // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
455        if (maxBytesToRead > 1024) {
456            maxBytesToRead = 1024;
457        }
458        maxBytesToRead = (maxBytesToRead / 65) * 65;
459    } else {
460        // read only integral amounts of audio unit frames.
461        const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
462        maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
463    }
464
465    ssize_t n = mDataSource->readAt(
466            mCurrentPos, buffer->data(),
467            maxBytesToRead);
468
469    if (n <= 0) {
470        buffer->release();
471        buffer = NULL;
472
473        return ERROR_END_OF_STREAM;
474    }
475
476    buffer->set_range(0, n);
477
478    if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
479        if (mBitsPerSample == 8) {
480            // Convert 8-bit unsigned samples to 16-bit signed.
481
482            MediaBuffer *tmp;
483            CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
484
485            // The new buffer holds the sample number of samples, but each
486            // one is 2 bytes wide.
487            tmp->set_range(0, 2 * n);
488
489            int16_t *dst = (int16_t *)tmp->data();
490            const uint8_t *src = (const uint8_t *)buffer->data();
491            ssize_t numBytes = n;
492
493            while (numBytes-- > 0) {
494                *dst++ = ((int16_t)(*src) - 128) * 256;
495                ++src;
496            }
497
498            buffer->release();
499            buffer = tmp;
500        } else if (mBitsPerSample == 24) {
501            // Convert 24-bit signed samples to 16-bit signed.
502
503            const uint8_t *src =
504                (const uint8_t *)buffer->data() + buffer->range_offset();
505            int16_t *dst = (int16_t *)src;
506
507            size_t numSamples = buffer->range_length() / 3;
508            for (size_t i = 0; i < numSamples; ++i) {
509                int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
510                x = (x << 8) >> 8;  // sign extension
511
512                x = x >> 8;
513                *dst++ = (int16_t)x;
514                src += 3;
515            }
516
517            buffer->set_range(buffer->range_offset(), 2 * numSamples);
518        }
519    }
520
521    int64_t timeStampUs = 0;
522
523    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
524        timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
525    } else {
526        size_t bytesPerSample = mBitsPerSample >> 3;
527        timeStampUs = 1000000LL * (mCurrentPos - mOffset)
528                / (mNumChannels * bytesPerSample) / mSampleRate;
529    }
530
531    buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
532
533    buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
534    mCurrentPos += n;
535
536    *out = buffer;
537
538    return OK;
539}
540
541////////////////////////////////////////////////////////////////////////////////
542
543bool SniffWAV(
544        const sp<DataSource> &source, String8 *mimeType, float *confidence,
545        sp<AMessage> *) {
546    char header[12];
547    if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
548        return false;
549    }
550
551    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
552        return false;
553    }
554
555    sp<MediaExtractor> extractor = new WAVExtractor(source);
556    if (extractor->countTracks() == 0) {
557        return false;
558    }
559
560    *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
561    *confidence = 0.3f;
562
563    return true;
564}
565
566}  // namespace android
567