1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "WAVExtractor.h" 22 23#include <audio_utils/primitives.h> 24#include <media/DataSourceBase.h> 25#include <media/MediaTrack.h> 26#include <media/stagefright/foundation/ADebug.h> 27#include <media/stagefright/MediaBufferGroup.h> 28#include <media/stagefright/MediaDefs.h> 29#include <media/stagefright/MediaErrors.h> 30#include <media/stagefright/MetaData.h> 31#include <utils/String8.h> 32#include <cutils/bitops.h> 33 34#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 35 36namespace android { 37 38enum { 39 WAVE_FORMAT_PCM = 0x0001, 40 WAVE_FORMAT_IEEE_FLOAT = 0x0003, 41 WAVE_FORMAT_ALAW = 0x0006, 42 WAVE_FORMAT_MULAW = 0x0007, 43 WAVE_FORMAT_MSGSM = 0x0031, 44 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 45}; 46 47static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 48static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00"; 49 50static uint32_t U32_LE_AT(const uint8_t *ptr) { 51 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 52} 53 54static uint16_t U16_LE_AT(const uint8_t *ptr) { 55 return ptr[1] << 8 | ptr[0]; 56} 57 58struct WAVSource : public MediaTrack { 59 WAVSource( 60 DataSourceBase *dataSource, 61 MetaDataBase &meta, 62 uint16_t waveFormat, 63 int32_t bitsPerSample, 64 off64_t offset, size_t size); 65 66 virtual status_t start(MetaDataBase *params = NULL); 67 virtual status_t stop(); 68 virtual status_t getFormat(MetaDataBase &meta); 69 70 virtual status_t read( 71 MediaBufferBase **buffer, const ReadOptions *options = NULL); 72 73 virtual bool supportNonblockingRead() { return true; } 74 75protected: 76 virtual ~WAVSource(); 77 78private: 79 static const size_t kMaxFrameSize; 80 81 DataSourceBase *mDataSource; 82 MetaDataBase &mMeta; 83 uint16_t mWaveFormat; 84 int32_t mSampleRate; 85 int32_t mNumChannels; 86 int32_t mBitsPerSample; 87 off64_t mOffset; 88 size_t mSize; 89 bool mStarted; 90 MediaBufferGroup *mGroup; 91 off64_t mCurrentPos; 92 93 WAVSource(const WAVSource &); 94 WAVSource &operator=(const WAVSource &); 95}; 96 97WAVExtractor::WAVExtractor(DataSourceBase *source) 98 : mDataSource(source), 99 mValidFormat(false), 100 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 101 mInitCheck = init(); 102} 103 104WAVExtractor::~WAVExtractor() { 105} 106 107status_t WAVExtractor::getMetaData(MetaDataBase &meta) { 108 meta.clear(); 109 if (mInitCheck == OK) { 110 meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 111 } 112 113 return OK; 114} 115 116size_t WAVExtractor::countTracks() { 117 return mInitCheck == OK ? 1 : 0; 118} 119 120MediaTrack *WAVExtractor::getTrack(size_t index) { 121 if (mInitCheck != OK || index > 0) { 122 return NULL; 123 } 124 125 return new WAVSource( 126 mDataSource, mTrackMeta, 127 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 128} 129 130status_t WAVExtractor::getTrackMetaData( 131 MetaDataBase &meta, 132 size_t index, uint32_t /* flags */) { 133 if (mInitCheck != OK || index > 0) { 134 return UNKNOWN_ERROR; 135 } 136 137 meta = mTrackMeta; 138 return OK; 139} 140 141status_t WAVExtractor::init() { 142 uint8_t header[12]; 143 if (mDataSource->readAt( 144 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 145 return NO_INIT; 146 } 147 148 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 149 return NO_INIT; 150 } 151 152 size_t totalSize = U32_LE_AT(&header[4]); 153 154 off64_t offset = 12; 155 size_t remainingSize = totalSize; 156 while (remainingSize >= 8) { 157 uint8_t chunkHeader[8]; 158 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 159 return NO_INIT; 160 } 161 162 remainingSize -= 8; 163 offset += 8; 164 165 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 166 167 if (chunkSize > remainingSize) { 168 return NO_INIT; 169 } 170 171 if (!memcmp(chunkHeader, "fmt ", 4)) { 172 if (chunkSize < 16) { 173 return NO_INIT; 174 } 175 176 uint8_t formatSpec[40]; 177 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 178 return NO_INIT; 179 } 180 181 mWaveFormat = U16_LE_AT(formatSpec); 182 if (mWaveFormat != WAVE_FORMAT_PCM 183 && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT 184 && mWaveFormat != WAVE_FORMAT_ALAW 185 && mWaveFormat != WAVE_FORMAT_MULAW 186 && mWaveFormat != WAVE_FORMAT_MSGSM 187 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 188 return ERROR_UNSUPPORTED; 189 } 190 191 uint8_t fmtSize = 16; 192 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 193 fmtSize = 40; 194 } 195 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 196 return NO_INIT; 197 } 198 199 mNumChannels = U16_LE_AT(&formatSpec[2]); 200 201 if (mNumChannels < 1 || mNumChannels > 8) { 202 ALOGE("Unsupported number of channels (%d)", mNumChannels); 203 return ERROR_UNSUPPORTED; 204 } 205 206 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 207 if (mNumChannels != 1 && mNumChannels != 2) { 208 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 209 mNumChannels); 210 } 211 } 212 213 mSampleRate = U32_LE_AT(&formatSpec[4]); 214 215 if (mSampleRate == 0) { 216 return ERROR_MALFORMED; 217 } 218 219 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 220 221 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 222 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 223 if (validBitsPerSample != mBitsPerSample) { 224 if (validBitsPerSample != 0) { 225 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 226 validBitsPerSample, mBitsPerSample); 227 return ERROR_UNSUPPORTED; 228 } else { 229 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 230 // writers don't correctly set the valid bits value, and leave it at 0. 231 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 232 } 233 } 234 235 mChannelMask = U32_LE_AT(&formatSpec[20]); 236 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 237 if ((mChannelMask >> 18) != 0) { 238 ALOGE("invalid channel mask 0x%x", mChannelMask); 239 return ERROR_MALFORMED; 240 } 241 242 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 243 && (popcount(mChannelMask) != mNumChannels)) { 244 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 245 popcount(mChannelMask), mChannelMask); 246 return ERROR_MALFORMED; 247 } 248 249 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 250 // the sample format, using the same definitions as a regular WAV header 251 mWaveFormat = U16_LE_AT(&formatSpec[24]); 252 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) && 253 memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) { 254 ALOGE("unsupported GUID"); 255 return ERROR_UNSUPPORTED; 256 } 257 } 258 259 if (mWaveFormat == WAVE_FORMAT_PCM) { 260 if (mBitsPerSample != 8 && mBitsPerSample != 16 261 && mBitsPerSample != 24 && mBitsPerSample != 32) { 262 return ERROR_UNSUPPORTED; 263 } 264 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { 265 if (mBitsPerSample != 32) { // TODO we don't support double 266 return ERROR_UNSUPPORTED; 267 } 268 } 269 else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 270 if (mBitsPerSample != 0) { 271 return ERROR_UNSUPPORTED; 272 } 273 } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) { 274 if (mBitsPerSample != 8) { 275 return ERROR_UNSUPPORTED; 276 } 277 } else { 278 return ERROR_UNSUPPORTED; 279 } 280 281 mValidFormat = true; 282 } else if (!memcmp(chunkHeader, "data", 4)) { 283 if (mValidFormat) { 284 mDataOffset = offset; 285 mDataSize = chunkSize; 286 287 mTrackMeta.clear(); 288 289 switch (mWaveFormat) { 290 case WAVE_FORMAT_PCM: 291 case WAVE_FORMAT_IEEE_FLOAT: 292 mTrackMeta.setCString( 293 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 294 break; 295 case WAVE_FORMAT_ALAW: 296 mTrackMeta.setCString( 297 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 298 break; 299 case WAVE_FORMAT_MSGSM: 300 mTrackMeta.setCString( 301 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 302 break; 303 default: 304 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 305 mTrackMeta.setCString( 306 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 307 break; 308 } 309 310 mTrackMeta.setInt32(kKeyChannelCount, mNumChannels); 311 mTrackMeta.setInt32(kKeyChannelMask, mChannelMask); 312 mTrackMeta.setInt32(kKeySampleRate, mSampleRate); 313 mTrackMeta.setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit); 314 315 int64_t durationUs = 0; 316 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 317 // 65 bytes decode to 320 8kHz samples 318 durationUs = 319 1000000LL * (mDataSize / 65 * 320) / 8000; 320 } else { 321 size_t bytesPerSample = mBitsPerSample >> 3; 322 323 if (!bytesPerSample || !mNumChannels) 324 return ERROR_MALFORMED; 325 326 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample); 327 328 if (!mSampleRate) 329 return ERROR_MALFORMED; 330 331 durationUs = 332 1000000LL * num_samples / mSampleRate; 333 } 334 335 mTrackMeta.setInt64(kKeyDuration, durationUs); 336 337 return OK; 338 } 339 } 340 341 offset += chunkSize; 342 } 343 344 return NO_INIT; 345} 346 347const size_t WAVSource::kMaxFrameSize = 32768; 348 349WAVSource::WAVSource( 350 DataSourceBase *dataSource, 351 MetaDataBase &meta, 352 uint16_t waveFormat, 353 int32_t bitsPerSample, 354 off64_t offset, size_t size) 355 : mDataSource(dataSource), 356 mMeta(meta), 357 mWaveFormat(waveFormat), 358 mSampleRate(0), 359 mNumChannels(0), 360 mBitsPerSample(bitsPerSample), 361 mOffset(offset), 362 mSize(size), 363 mStarted(false), 364 mGroup(NULL) { 365 CHECK(mMeta.findInt32(kKeySampleRate, &mSampleRate)); 366 CHECK(mMeta.findInt32(kKeyChannelCount, &mNumChannels)); 367 368 mMeta.setInt32(kKeyMaxInputSize, kMaxFrameSize); 369} 370 371WAVSource::~WAVSource() { 372 if (mStarted) { 373 stop(); 374 } 375} 376 377status_t WAVSource::start(MetaDataBase * /* params */) { 378 ALOGV("WAVSource::start"); 379 380 CHECK(!mStarted); 381 382 // some WAV files may have large audio buffers that use shared memory transfer. 383 mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize); 384 385 if (mBitsPerSample == 8) { 386 // As a temporary buffer for 8->16 bit conversion. 387 mGroup->add_buffer(MediaBufferBase::Create(kMaxFrameSize)); 388 } 389 390 mCurrentPos = mOffset; 391 392 mStarted = true; 393 394 return OK; 395} 396 397status_t WAVSource::stop() { 398 ALOGV("WAVSource::stop"); 399 400 CHECK(mStarted); 401 402 delete mGroup; 403 mGroup = NULL; 404 405 mStarted = false; 406 407 return OK; 408} 409 410status_t WAVSource::getFormat(MetaDataBase &meta) { 411 ALOGV("WAVSource::getFormat"); 412 413 meta = mMeta; 414 return OK; 415} 416 417status_t WAVSource::read( 418 MediaBufferBase **out, const ReadOptions *options) { 419 *out = NULL; 420 421 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) { 422 return WOULD_BLOCK; 423 } 424 425 int64_t seekTimeUs; 426 ReadOptions::SeekMode mode; 427 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 428 int64_t pos = 0; 429 430 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 431 // 65 bytes decode to 320 8kHz samples 432 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 433 int64_t framenumber = samplenumber / 320; 434 pos = framenumber * 65; 435 } else { 436 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 437 } 438 if (pos > (off64_t)mSize) { 439 pos = mSize; 440 } 441 mCurrentPos = pos + mOffset; 442 } 443 444 MediaBufferBase *buffer; 445 status_t err = mGroup->acquire_buffer(&buffer); 446 if (err != OK) { 447 return err; 448 } 449 450 // make sure that maxBytesToRead is multiple of 3, in 24-bit case 451 size_t maxBytesToRead = 452 mBitsPerSample == 8 ? kMaxFrameSize / 2 : 453 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize); 454 455 size_t maxBytesAvailable = 456 (mCurrentPos - mOffset >= (off64_t)mSize) 457 ? 0 : mSize - (mCurrentPos - mOffset); 458 459 if (maxBytesToRead > maxBytesAvailable) { 460 maxBytesToRead = maxBytesAvailable; 461 } 462 463 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 464 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 465 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 466 if (maxBytesToRead > 1024) { 467 maxBytesToRead = 1024; 468 } 469 maxBytesToRead = (maxBytesToRead / 65) * 65; 470 } else { 471 // read only integral amounts of audio unit frames. 472 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8; 473 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize; 474 } 475 476 ssize_t n = mDataSource->readAt( 477 mCurrentPos, buffer->data(), 478 maxBytesToRead); 479 480 if (n <= 0) { 481 buffer->release(); 482 buffer = NULL; 483 484 return ERROR_END_OF_STREAM; 485 } 486 487 buffer->set_range(0, n); 488 489 // TODO: add capability to return data as float PCM instead of 16 bit PCM. 490 if (mWaveFormat == WAVE_FORMAT_PCM) { 491 if (mBitsPerSample == 8) { 492 // Convert 8-bit unsigned samples to 16-bit signed. 493 494 // Create new buffer with 2 byte wide samples 495 MediaBufferBase *tmp; 496 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 497 tmp->set_range(0, 2 * n); 498 499 memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n); 500 buffer->release(); 501 buffer = tmp; 502 } else if (mBitsPerSample == 24) { 503 // Convert 24-bit signed samples to 16-bit signed in place 504 const size_t numSamples = n / 3; 505 506 memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples); 507 buffer->set_range(0, 2 * numSamples); 508 } else if (mBitsPerSample == 32) { 509 // Convert 32-bit signed samples to 16-bit signed in place 510 const size_t numSamples = n / 4; 511 512 memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples); 513 buffer->set_range(0, 2 * numSamples); 514 } 515 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { 516 if (mBitsPerSample == 32) { 517 // Convert 32-bit float samples to 16-bit signed in place 518 const size_t numSamples = n / 4; 519 520 memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples); 521 buffer->set_range(0, 2 * numSamples); 522 } 523 } 524 525 int64_t timeStampUs = 0; 526 527 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 528 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 529 } else { 530 size_t bytesPerSample = mBitsPerSample >> 3; 531 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 532 / (mNumChannels * bytesPerSample) / mSampleRate; 533 } 534 535 buffer->meta_data().setInt64(kKeyTime, timeStampUs); 536 537 buffer->meta_data().setInt32(kKeyIsSyncFrame, 1); 538 mCurrentPos += n; 539 540 *out = buffer; 541 542 return OK; 543} 544 545//////////////////////////////////////////////////////////////////////////////// 546 547static MediaExtractor* CreateExtractor( 548 DataSourceBase *source, 549 void *) { 550 return new WAVExtractor(source); 551} 552 553static MediaExtractor::CreatorFunc Sniff( 554 DataSourceBase *source, 555 float *confidence, 556 void **, 557 MediaExtractor::FreeMetaFunc *) { 558 char header[12]; 559 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 560 return NULL; 561 } 562 563 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 564 return NULL; 565 } 566 567 MediaExtractor *extractor = new WAVExtractor(source); 568 int numTracks = extractor->countTracks(); 569 delete extractor; 570 if (numTracks == 0) { 571 return NULL; 572 } 573 574 *confidence = 0.3f; 575 576 return CreateExtractor; 577} 578 579extern "C" { 580// This is the only symbol that needs to be exported 581__attribute__ ((visibility ("default"))) 582MediaExtractor::ExtractorDef GETEXTRACTORDEF() { 583 return { 584 MediaExtractor::EXTRACTORDEF_VERSION, 585 UUID("7d613858-5837-4a38-84c5-332d1cddee27"), 586 1, // version 587 "WAV Extractor", 588 Sniff 589 }; 590} 591 592} // extern "C" 593 594} // namespace android 595