1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "WAVExtractor"
19#include <utils/Log.h>
20
21#include "WAVExtractor.h"
22
23#include <audio_utils/primitives.h>
24#include <media/DataSourceBase.h>
25#include <media/MediaTrack.h>
26#include <media/stagefright/foundation/ADebug.h>
27#include <media/stagefright/MediaBufferGroup.h>
28#include <media/stagefright/MediaDefs.h>
29#include <media/stagefright/MediaErrors.h>
30#include <media/stagefright/MetaData.h>
31#include <utils/String8.h>
32#include <cutils/bitops.h>
33
34#define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35
36namespace android {
37
38enum {
39    WAVE_FORMAT_PCM        = 0x0001,
40    WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41    WAVE_FORMAT_ALAW       = 0x0006,
42    WAVE_FORMAT_MULAW      = 0x0007,
43    WAVE_FORMAT_MSGSM      = 0x0031,
44    WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45};
46
47static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
49
50static uint32_t U32_LE_AT(const uint8_t *ptr) {
51    return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52}
53
54static uint16_t U16_LE_AT(const uint8_t *ptr) {
55    return ptr[1] << 8 | ptr[0];
56}
57
58struct WAVSource : public MediaTrack {
59    WAVSource(
60            DataSourceBase *dataSource,
61            MetaDataBase &meta,
62            uint16_t waveFormat,
63            int32_t bitsPerSample,
64            off64_t offset, size_t size);
65
66    virtual status_t start(MetaDataBase *params = NULL);
67    virtual status_t stop();
68    virtual status_t getFormat(MetaDataBase &meta);
69
70    virtual status_t read(
71            MediaBufferBase **buffer, const ReadOptions *options = NULL);
72
73    virtual bool supportNonblockingRead() { return true; }
74
75protected:
76    virtual ~WAVSource();
77
78private:
79    static const size_t kMaxFrameSize;
80
81    DataSourceBase *mDataSource;
82    MetaDataBase &mMeta;
83    uint16_t mWaveFormat;
84    int32_t mSampleRate;
85    int32_t mNumChannels;
86    int32_t mBitsPerSample;
87    off64_t mOffset;
88    size_t mSize;
89    bool mStarted;
90    MediaBufferGroup *mGroup;
91    off64_t mCurrentPos;
92
93    WAVSource(const WAVSource &);
94    WAVSource &operator=(const WAVSource &);
95};
96
97WAVExtractor::WAVExtractor(DataSourceBase *source)
98    : mDataSource(source),
99      mValidFormat(false),
100      mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
101    mInitCheck = init();
102}
103
104WAVExtractor::~WAVExtractor() {
105}
106
107status_t WAVExtractor::getMetaData(MetaDataBase &meta) {
108    meta.clear();
109    if (mInitCheck == OK) {
110        meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111    }
112
113    return OK;
114}
115
116size_t WAVExtractor::countTracks() {
117    return mInitCheck == OK ? 1 : 0;
118}
119
120MediaTrack *WAVExtractor::getTrack(size_t index) {
121    if (mInitCheck != OK || index > 0) {
122        return NULL;
123    }
124
125    return new WAVSource(
126            mDataSource, mTrackMeta,
127            mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
128}
129
130status_t WAVExtractor::getTrackMetaData(
131        MetaDataBase &meta,
132        size_t index, uint32_t /* flags */) {
133    if (mInitCheck != OK || index > 0) {
134        return UNKNOWN_ERROR;
135    }
136
137    meta = mTrackMeta;
138    return OK;
139}
140
141status_t WAVExtractor::init() {
142    uint8_t header[12];
143    if (mDataSource->readAt(
144                0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
145        return NO_INIT;
146    }
147
148    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
149        return NO_INIT;
150    }
151
152    size_t totalSize = U32_LE_AT(&header[4]);
153
154    off64_t offset = 12;
155    size_t remainingSize = totalSize;
156    while (remainingSize >= 8) {
157        uint8_t chunkHeader[8];
158        if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
159            return NO_INIT;
160        }
161
162        remainingSize -= 8;
163        offset += 8;
164
165        uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
166
167        if (chunkSize > remainingSize) {
168            return NO_INIT;
169        }
170
171        if (!memcmp(chunkHeader, "fmt ", 4)) {
172            if (chunkSize < 16) {
173                return NO_INIT;
174            }
175
176            uint8_t formatSpec[40];
177            if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
178                return NO_INIT;
179            }
180
181            mWaveFormat = U16_LE_AT(formatSpec);
182            if (mWaveFormat != WAVE_FORMAT_PCM
183                    && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
184                    && mWaveFormat != WAVE_FORMAT_ALAW
185                    && mWaveFormat != WAVE_FORMAT_MULAW
186                    && mWaveFormat != WAVE_FORMAT_MSGSM
187                    && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
188                return ERROR_UNSUPPORTED;
189            }
190
191            uint8_t fmtSize = 16;
192            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
193                fmtSize = 40;
194            }
195            if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
196                return NO_INIT;
197            }
198
199            mNumChannels = U16_LE_AT(&formatSpec[2]);
200
201            if (mNumChannels < 1 || mNumChannels > 8) {
202                ALOGE("Unsupported number of channels (%d)", mNumChannels);
203                return ERROR_UNSUPPORTED;
204            }
205
206            if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
207                if (mNumChannels != 1 && mNumChannels != 2) {
208                    ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
209                            mNumChannels);
210                }
211            }
212
213            mSampleRate = U32_LE_AT(&formatSpec[4]);
214
215            if (mSampleRate == 0) {
216                return ERROR_MALFORMED;
217            }
218
219            mBitsPerSample = U16_LE_AT(&formatSpec[14]);
220
221            if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
222                uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
223                if (validBitsPerSample != mBitsPerSample) {
224                    if (validBitsPerSample != 0) {
225                        ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
226                                validBitsPerSample, mBitsPerSample);
227                        return ERROR_UNSUPPORTED;
228                    } else {
229                        // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
230                        // writers don't correctly set the valid bits value, and leave it at 0.
231                        ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
232                    }
233                }
234
235                mChannelMask = U32_LE_AT(&formatSpec[20]);
236                ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
237                if ((mChannelMask >> 18) != 0) {
238                    ALOGE("invalid channel mask 0x%x", mChannelMask);
239                    return ERROR_MALFORMED;
240                }
241
242                if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
243                        && (popcount(mChannelMask) != mNumChannels)) {
244                    ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
245                            popcount(mChannelMask), mChannelMask);
246                    return ERROR_MALFORMED;
247                }
248
249                // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
250                // the sample format, using the same definitions as a regular WAV header
251                mWaveFormat = U16_LE_AT(&formatSpec[24]);
252                if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
253                    memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
254                    ALOGE("unsupported GUID");
255                    return ERROR_UNSUPPORTED;
256                }
257            }
258
259            if (mWaveFormat == WAVE_FORMAT_PCM) {
260                if (mBitsPerSample != 8 && mBitsPerSample != 16
261                    && mBitsPerSample != 24 && mBitsPerSample != 32) {
262                    return ERROR_UNSUPPORTED;
263                }
264            } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
265                if (mBitsPerSample != 32) {  // TODO we don't support double
266                    return ERROR_UNSUPPORTED;
267                }
268            }
269            else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
270                if (mBitsPerSample != 0) {
271                    return ERROR_UNSUPPORTED;
272                }
273            } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
274                if (mBitsPerSample != 8) {
275                    return ERROR_UNSUPPORTED;
276                }
277            } else {
278                return ERROR_UNSUPPORTED;
279            }
280
281            mValidFormat = true;
282        } else if (!memcmp(chunkHeader, "data", 4)) {
283            if (mValidFormat) {
284                mDataOffset = offset;
285                mDataSize = chunkSize;
286
287                mTrackMeta.clear();
288
289                switch (mWaveFormat) {
290                    case WAVE_FORMAT_PCM:
291                    case WAVE_FORMAT_IEEE_FLOAT:
292                        mTrackMeta.setCString(
293                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
294                        break;
295                    case WAVE_FORMAT_ALAW:
296                        mTrackMeta.setCString(
297                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
298                        break;
299                    case WAVE_FORMAT_MSGSM:
300                        mTrackMeta.setCString(
301                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
302                        break;
303                    default:
304                        CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
305                        mTrackMeta.setCString(
306                                kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
307                        break;
308                }
309
310                mTrackMeta.setInt32(kKeyChannelCount, mNumChannels);
311                mTrackMeta.setInt32(kKeyChannelMask, mChannelMask);
312                mTrackMeta.setInt32(kKeySampleRate, mSampleRate);
313                mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
314
315                int64_t durationUs = 0;
316                if (mWaveFormat == WAVE_FORMAT_MSGSM) {
317                    // 65 bytes decode to 320 8kHz samples
318                    durationUs =
319                        1000000LL * (mDataSize / 65 * 320) / 8000;
320                } else {
321                    size_t bytesPerSample = mBitsPerSample >> 3;
322
323                    if (!bytesPerSample || !mNumChannels)
324                        return ERROR_MALFORMED;
325
326                    size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
327
328                    if (!mSampleRate)
329                        return ERROR_MALFORMED;
330
331                    durationUs =
332                        1000000LL * num_samples / mSampleRate;
333                }
334
335                mTrackMeta.setInt64(kKeyDuration, durationUs);
336
337                return OK;
338            }
339        }
340
341        offset += chunkSize;
342    }
343
344    return NO_INIT;
345}
346
347const size_t WAVSource::kMaxFrameSize = 32768;
348
349WAVSource::WAVSource(
350        DataSourceBase *dataSource,
351        MetaDataBase &meta,
352        uint16_t waveFormat,
353        int32_t bitsPerSample,
354        off64_t offset, size_t size)
355    : mDataSource(dataSource),
356      mMeta(meta),
357      mWaveFormat(waveFormat),
358      mSampleRate(0),
359      mNumChannels(0),
360      mBitsPerSample(bitsPerSample),
361      mOffset(offset),
362      mSize(size),
363      mStarted(false),
364      mGroup(NULL) {
365    CHECK(mMeta.findInt32(kKeySampleRate, &mSampleRate));
366    CHECK(mMeta.findInt32(kKeyChannelCount, &mNumChannels));
367
368    mMeta.setInt32(kKeyMaxInputSize, kMaxFrameSize);
369}
370
371WAVSource::~WAVSource() {
372    if (mStarted) {
373        stop();
374    }
375}
376
377status_t WAVSource::start(MetaDataBase * /* params */) {
378    ALOGV("WAVSource::start");
379
380    CHECK(!mStarted);
381
382    // some WAV files may have large audio buffers that use shared memory transfer.
383    mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
384
385    if (mBitsPerSample == 8) {
386        // As a temporary buffer for 8->16 bit conversion.
387        mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize));
388    }
389
390    mCurrentPos = mOffset;
391
392    mStarted = true;
393
394    return OK;
395}
396
397status_t WAVSource::stop() {
398    ALOGV("WAVSource::stop");
399
400    CHECK(mStarted);
401
402    delete mGroup;
403    mGroup = NULL;
404
405    mStarted = false;
406
407    return OK;
408}
409
410status_t WAVSource::getFormat(MetaDataBase &meta) {
411    ALOGV("WAVSource::getFormat");
412
413    meta = mMeta;
414    return OK;
415}
416
417status_t WAVSource::read(
418        MediaBufferBase **out, const ReadOptions *options) {
419    *out = NULL;
420
421    if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
422        return WOULD_BLOCK;
423    }
424
425    int64_t seekTimeUs;
426    ReadOptions::SeekMode mode;
427    if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
428        int64_t pos = 0;
429
430        if (mWaveFormat == WAVE_FORMAT_MSGSM) {
431            // 65 bytes decode to 320 8kHz samples
432            int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
433            int64_t framenumber = samplenumber / 320;
434            pos = framenumber * 65;
435        } else {
436            pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
437        }
438        if (pos > (off64_t)mSize) {
439            pos = mSize;
440        }
441        mCurrentPos = pos + mOffset;
442    }
443
444    MediaBufferBase *buffer;
445    status_t err = mGroup->acquire_buffer(&buffer);
446    if (err != OK) {
447        return err;
448    }
449
450    // make sure that maxBytesToRead is multiple of 3, in 24-bit case
451    size_t maxBytesToRead =
452        mBitsPerSample == 8 ? kMaxFrameSize / 2 :
453        (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
454
455    size_t maxBytesAvailable =
456        (mCurrentPos - mOffset >= (off64_t)mSize)
457            ? 0 : mSize - (mCurrentPos - mOffset);
458
459    if (maxBytesToRead > maxBytesAvailable) {
460        maxBytesToRead = maxBytesAvailable;
461    }
462
463    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
464        // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
465        // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
466        if (maxBytesToRead > 1024) {
467            maxBytesToRead = 1024;
468        }
469        maxBytesToRead = (maxBytesToRead / 65) * 65;
470    } else {
471        // read only integral amounts of audio unit frames.
472        const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
473        maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
474    }
475
476    ssize_t n = mDataSource->readAt(
477            mCurrentPos, buffer->data(),
478            maxBytesToRead);
479
480    if (n <= 0) {
481        buffer->release();
482        buffer = NULL;
483
484        return ERROR_END_OF_STREAM;
485    }
486
487    buffer->set_range(0, n);
488
489    // TODO: add capability to return data as float PCM instead of 16 bit PCM.
490    if (mWaveFormat == WAVE_FORMAT_PCM) {
491        if (mBitsPerSample == 8) {
492            // Convert 8-bit unsigned samples to 16-bit signed.
493
494            // Create new buffer with 2 byte wide samples
495            MediaBufferBase *tmp;
496            CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
497            tmp->set_range(0, 2 * n);
498
499            memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
500            buffer->release();
501            buffer = tmp;
502        } else if (mBitsPerSample == 24) {
503            // Convert 24-bit signed samples to 16-bit signed in place
504            const size_t numSamples = n / 3;
505
506            memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
507            buffer->set_range(0, 2 * numSamples);
508        }  else if (mBitsPerSample == 32) {
509            // Convert 32-bit signed samples to 16-bit signed in place
510            const size_t numSamples = n / 4;
511
512            memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
513            buffer->set_range(0, 2 * numSamples);
514        }
515    } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
516        if (mBitsPerSample == 32) {
517            // Convert 32-bit float samples to 16-bit signed in place
518            const size_t numSamples = n / 4;
519
520            memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
521            buffer->set_range(0, 2 * numSamples);
522        }
523    }
524
525    int64_t timeStampUs = 0;
526
527    if (mWaveFormat == WAVE_FORMAT_MSGSM) {
528        timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
529    } else {
530        size_t bytesPerSample = mBitsPerSample >> 3;
531        timeStampUs = 1000000LL * (mCurrentPos - mOffset)
532                / (mNumChannels * bytesPerSample) / mSampleRate;
533    }
534
535    buffer->meta_data().setInt64(kKeyTime, timeStampUs);
536
537    buffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
538    mCurrentPos += n;
539
540    *out = buffer;
541
542    return OK;
543}
544
545////////////////////////////////////////////////////////////////////////////////
546
547static MediaExtractor* CreateExtractor(
548        DataSourceBase *source,
549        void *) {
550    return new WAVExtractor(source);
551}
552
553static MediaExtractor::CreatorFunc Sniff(
554        DataSourceBase *source,
555        float *confidence,
556        void **,
557        MediaExtractor::FreeMetaFunc *) {
558    char header[12];
559    if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
560        return NULL;
561    }
562
563    if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
564        return NULL;
565    }
566
567    MediaExtractor *extractor = new WAVExtractor(source);
568    int numTracks = extractor->countTracks();
569    delete extractor;
570    if (numTracks == 0) {
571        return NULL;
572    }
573
574    *confidence = 0.3f;
575
576    return CreateExtractor;
577}
578
579extern "C" {
580// This is the only symbol that needs to be exported
581__attribute__ ((visibility ("default")))
582MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
583    return {
584        MediaExtractor::EXTRACTORDEF_VERSION,
585        UUID("7d613858-5837-4a38-84c5-332d1cddee27"),
586        1, // version
587        "WAV Extractor",
588        Sniff
589    };
590}
591
592} // extern "C"
593
594} // namespace android
595