WAVExtractor.cpp revision 9a40167c3dc32fccc72abd96f03df6ea5676793b
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <media/stagefright/foundation/ADebug.h> 24#include <media/stagefright/DataSource.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDefs.h> 27#include <media/stagefright/MediaErrors.h> 28#include <media/stagefright/MediaSource.h> 29#include <media/stagefright/MetaData.h> 30#include <utils/String8.h> 31#include <cutils/bitops.h> 32 33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35namespace android { 36 37enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_MSGSM = 0x0031, 42 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 43}; 44 45static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 46 47 48static uint32_t U32_LE_AT(const uint8_t *ptr) { 49 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 50} 51 52static uint16_t U16_LE_AT(const uint8_t *ptr) { 53 return ptr[1] << 8 | ptr[0]; 54} 55 56struct WAVSource : public MediaSource { 57 WAVSource( 58 const sp<DataSource> &dataSource, 59 const sp<MetaData> &meta, 60 uint16_t waveFormat, 61 int32_t bitsPerSample, 62 off64_t offset, size_t size); 63 64 virtual status_t start(MetaData *params = NULL); 65 virtual status_t stop(); 66 virtual sp<MetaData> getFormat(); 67 68 virtual status_t read( 69 MediaBuffer **buffer, const ReadOptions *options = NULL); 70 71protected: 72 virtual ~WAVSource(); 73 74private: 75 static const size_t kMaxFrameSize; 76 77 sp<DataSource> mDataSource; 78 sp<MetaData> mMeta; 79 uint16_t mWaveFormat; 80 int32_t mSampleRate; 81 int32_t mNumChannels; 82 int32_t mBitsPerSample; 83 off64_t mOffset; 84 size_t mSize; 85 bool mStarted; 86 MediaBufferGroup *mGroup; 87 off64_t mCurrentPos; 88 89 WAVSource(const WAVSource &); 90 WAVSource &operator=(const WAVSource &); 91}; 92 93WAVExtractor::WAVExtractor(const sp<DataSource> &source) 94 : mDataSource(source), 95 mValidFormat(false), 96 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 97 mInitCheck = init(); 98} 99 100WAVExtractor::~WAVExtractor() { 101} 102 103sp<MetaData> WAVExtractor::getMetaData() { 104 sp<MetaData> meta = new MetaData; 105 106 if (mInitCheck != OK) { 107 return meta; 108 } 109 110 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 111 112 return meta; 113} 114 115size_t WAVExtractor::countTracks() { 116 return mInitCheck == OK ? 1 : 0; 117} 118 119sp<IMediaSource> WAVExtractor::getTrack(size_t index) { 120 if (mInitCheck != OK || index > 0) { 121 return NULL; 122 } 123 124 return new WAVSource( 125 mDataSource, mTrackMeta, 126 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 127} 128 129sp<MetaData> WAVExtractor::getTrackMetaData( 130 size_t index, uint32_t /* flags */) { 131 if (mInitCheck != OK || index > 0) { 132 return NULL; 133 } 134 135 return mTrackMeta; 136} 137 138status_t WAVExtractor::init() { 139 uint8_t header[12]; 140 if (mDataSource->readAt( 141 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 142 return NO_INIT; 143 } 144 145 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 146 return NO_INIT; 147 } 148 149 size_t totalSize = U32_LE_AT(&header[4]); 150 151 off64_t offset = 12; 152 size_t remainingSize = totalSize; 153 while (remainingSize >= 8) { 154 uint8_t chunkHeader[8]; 155 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 156 return NO_INIT; 157 } 158 159 remainingSize -= 8; 160 offset += 8; 161 162 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 163 164 if (chunkSize > remainingSize) { 165 return NO_INIT; 166 } 167 168 if (!memcmp(chunkHeader, "fmt ", 4)) { 169 if (chunkSize < 16) { 170 return NO_INIT; 171 } 172 173 uint8_t formatSpec[40]; 174 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 175 return NO_INIT; 176 } 177 178 mWaveFormat = U16_LE_AT(formatSpec); 179 if (mWaveFormat != WAVE_FORMAT_PCM 180 && mWaveFormat != WAVE_FORMAT_ALAW 181 && mWaveFormat != WAVE_FORMAT_MULAW 182 && mWaveFormat != WAVE_FORMAT_MSGSM 183 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 184 return ERROR_UNSUPPORTED; 185 } 186 187 uint8_t fmtSize = 16; 188 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 189 fmtSize = 40; 190 } 191 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 192 return NO_INIT; 193 } 194 195 mNumChannels = U16_LE_AT(&formatSpec[2]); 196 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 197 if (mNumChannels == 0) { 198 return ERROR_UNSUPPORTED; 199 } else if (mNumChannels != 1 && mNumChannels != 2) { 200 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 201 mNumChannels); 202 } 203 } else { 204 if (mNumChannels < 1 || mNumChannels > 8) { 205 return ERROR_UNSUPPORTED; 206 } 207 } 208 209 mSampleRate = U32_LE_AT(&formatSpec[4]); 210 211 if (mSampleRate == 0) { 212 return ERROR_MALFORMED; 213 } 214 215 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 216 217 if (mWaveFormat == WAVE_FORMAT_PCM 218 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 219 if (mBitsPerSample != 8 && mBitsPerSample != 16 220 && mBitsPerSample != 24) { 221 return ERROR_UNSUPPORTED; 222 } 223 } else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 224 if (mBitsPerSample != 0) { 225 return ERROR_UNSUPPORTED; 226 } 227 } else { 228 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 229 || mWaveFormat == WAVE_FORMAT_ALAW); 230 if (mBitsPerSample != 8) { 231 return ERROR_UNSUPPORTED; 232 } 233 } 234 235 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 236 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 237 if (validBitsPerSample != mBitsPerSample) { 238 if (validBitsPerSample != 0) { 239 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 240 validBitsPerSample, mBitsPerSample); 241 return ERROR_UNSUPPORTED; 242 } else { 243 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 244 // writers don't correctly set the valid bits value, and leave it at 0. 245 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 246 } 247 } 248 249 mChannelMask = U32_LE_AT(&formatSpec[20]); 250 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 251 if ((mChannelMask >> 18) != 0) { 252 ALOGE("invalid channel mask 0x%x", mChannelMask); 253 return ERROR_MALFORMED; 254 } 255 256 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 257 && (popcount(mChannelMask) != mNumChannels)) { 258 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 259 popcount(mChannelMask), mChannelMask); 260 return ERROR_MALFORMED; 261 } 262 263 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 264 // the sample format, using the same definitions as a regular WAV header 265 mWaveFormat = U16_LE_AT(&formatSpec[24]); 266 if (mWaveFormat != WAVE_FORMAT_PCM 267 && mWaveFormat != WAVE_FORMAT_ALAW 268 && mWaveFormat != WAVE_FORMAT_MULAW) { 269 return ERROR_UNSUPPORTED; 270 } 271 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 272 ALOGE("unsupported GUID"); 273 return ERROR_UNSUPPORTED; 274 } 275 } 276 277 mValidFormat = true; 278 } else if (!memcmp(chunkHeader, "data", 4)) { 279 if (mValidFormat) { 280 mDataOffset = offset; 281 mDataSize = chunkSize; 282 283 mTrackMeta = new MetaData; 284 285 switch (mWaveFormat) { 286 case WAVE_FORMAT_PCM: 287 mTrackMeta->setCString( 288 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 289 break; 290 case WAVE_FORMAT_ALAW: 291 mTrackMeta->setCString( 292 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 293 break; 294 case WAVE_FORMAT_MSGSM: 295 mTrackMeta->setCString( 296 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 297 break; 298 default: 299 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 300 mTrackMeta->setCString( 301 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 302 break; 303 } 304 305 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 306 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 307 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 308 309 int64_t durationUs = 0; 310 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 311 // 65 bytes decode to 320 8kHz samples 312 durationUs = 313 1000000LL * (mDataSize / 65 * 320) / 8000; 314 } else { 315 size_t bytesPerSample = mBitsPerSample >> 3; 316 317 if (!bytesPerSample || !mNumChannels) 318 return ERROR_MALFORMED; 319 320 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample); 321 322 if (!mSampleRate) 323 return ERROR_MALFORMED; 324 325 durationUs = 326 1000000LL * num_samples / mSampleRate; 327 } 328 329 mTrackMeta->setInt64(kKeyDuration, durationUs); 330 331 return OK; 332 } 333 } 334 335 offset += chunkSize; 336 } 337 338 return NO_INIT; 339} 340 341const size_t WAVSource::kMaxFrameSize = 32768; 342 343WAVSource::WAVSource( 344 const sp<DataSource> &dataSource, 345 const sp<MetaData> &meta, 346 uint16_t waveFormat, 347 int32_t bitsPerSample, 348 off64_t offset, size_t size) 349 : mDataSource(dataSource), 350 mMeta(meta), 351 mWaveFormat(waveFormat), 352 mSampleRate(0), 353 mNumChannels(0), 354 mBitsPerSample(bitsPerSample), 355 mOffset(offset), 356 mSize(size), 357 mStarted(false), 358 mGroup(NULL) { 359 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 360 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 361 362 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 363} 364 365WAVSource::~WAVSource() { 366 if (mStarted) { 367 stop(); 368 } 369} 370 371status_t WAVSource::start(MetaData * /* params */) { 372 ALOGV("WAVSource::start"); 373 374 CHECK(!mStarted); 375 376 mGroup = new MediaBufferGroup; 377 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 378 379 if (mBitsPerSample == 8) { 380 // As a temporary buffer for 8->16 bit conversion. 381 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 382 } 383 384 mCurrentPos = mOffset; 385 386 mStarted = true; 387 388 return OK; 389} 390 391status_t WAVSource::stop() { 392 ALOGV("WAVSource::stop"); 393 394 CHECK(mStarted); 395 396 delete mGroup; 397 mGroup = NULL; 398 399 mStarted = false; 400 401 return OK; 402} 403 404sp<MetaData> WAVSource::getFormat() { 405 ALOGV("WAVSource::getFormat"); 406 407 return mMeta; 408} 409 410status_t WAVSource::read( 411 MediaBuffer **out, const ReadOptions *options) { 412 *out = NULL; 413 414 int64_t seekTimeUs; 415 ReadOptions::SeekMode mode; 416 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 417 int64_t pos = 0; 418 419 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 420 // 65 bytes decode to 320 8kHz samples 421 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 422 int64_t framenumber = samplenumber / 320; 423 pos = framenumber * 65; 424 } else { 425 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 426 } 427 if (pos > (off64_t)mSize) { 428 pos = mSize; 429 } 430 mCurrentPos = pos + mOffset; 431 } 432 433 MediaBuffer *buffer; 434 status_t err = mGroup->acquire_buffer(&buffer); 435 if (err != OK) { 436 return err; 437 } 438 439 // make sure that maxBytesToRead is multiple of 3, in 24-bit case 440 size_t maxBytesToRead = 441 mBitsPerSample == 8 ? kMaxFrameSize / 2 : 442 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize); 443 444 size_t maxBytesAvailable = 445 (mCurrentPos - mOffset >= (off64_t)mSize) 446 ? 0 : mSize - (mCurrentPos - mOffset); 447 448 if (maxBytesToRead > maxBytesAvailable) { 449 maxBytesToRead = maxBytesAvailable; 450 } 451 452 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 453 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 454 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 455 if (maxBytesToRead > 1024) { 456 maxBytesToRead = 1024; 457 } 458 maxBytesToRead = (maxBytesToRead / 65) * 65; 459 } else { 460 // read only integral amounts of audio unit frames. 461 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8; 462 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize; 463 } 464 465 ssize_t n = mDataSource->readAt( 466 mCurrentPos, buffer->data(), 467 maxBytesToRead); 468 469 if (n <= 0) { 470 buffer->release(); 471 buffer = NULL; 472 473 return ERROR_END_OF_STREAM; 474 } 475 476 buffer->set_range(0, n); 477 478 if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 479 if (mBitsPerSample == 8) { 480 // Convert 8-bit unsigned samples to 16-bit signed. 481 482 MediaBuffer *tmp; 483 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 484 485 // The new buffer holds the sample number of samples, but each 486 // one is 2 bytes wide. 487 tmp->set_range(0, 2 * n); 488 489 int16_t *dst = (int16_t *)tmp->data(); 490 const uint8_t *src = (const uint8_t *)buffer->data(); 491 ssize_t numBytes = n; 492 493 while (numBytes-- > 0) { 494 *dst++ = ((int16_t)(*src) - 128) * 256; 495 ++src; 496 } 497 498 buffer->release(); 499 buffer = tmp; 500 } else if (mBitsPerSample == 24) { 501 // Convert 24-bit signed samples to 16-bit signed. 502 503 const uint8_t *src = 504 (const uint8_t *)buffer->data() + buffer->range_offset(); 505 int16_t *dst = (int16_t *)src; 506 507 size_t numSamples = buffer->range_length() / 3; 508 for (size_t i = 0; i < numSamples; ++i) { 509 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 510 x = (x << 8) >> 8; // sign extension 511 512 x = x >> 8; 513 *dst++ = (int16_t)x; 514 src += 3; 515 } 516 517 buffer->set_range(buffer->range_offset(), 2 * numSamples); 518 } 519 } 520 521 int64_t timeStampUs = 0; 522 523 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 524 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 525 } else { 526 size_t bytesPerSample = mBitsPerSample >> 3; 527 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 528 / (mNumChannels * bytesPerSample) / mSampleRate; 529 } 530 531 buffer->meta_data()->setInt64(kKeyTime, timeStampUs); 532 533 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 534 mCurrentPos += n; 535 536 *out = buffer; 537 538 return OK; 539} 540 541//////////////////////////////////////////////////////////////////////////////// 542 543bool SniffWAV( 544 const sp<DataSource> &source, String8 *mimeType, float *confidence, 545 sp<AMessage> *) { 546 char header[12]; 547 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 548 return false; 549 } 550 551 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 552 return false; 553 } 554 555 sp<MediaExtractor> extractor = new WAVExtractor(source); 556 if (extractor->countTracks() == 0) { 557 return false; 558 } 559 560 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 561 *confidence = 0.3f; 562 563 return true; 564} 565 566} // namespace android 567