WAVExtractor.cpp revision e52267adab4c89169aafd10a0fa2656ccb02c039
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <media/stagefright/foundation/ADebug.h> 24#include <media/stagefright/DataSource.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDefs.h> 27#include <media/stagefright/MediaErrors.h> 28#include <media/stagefright/MediaSource.h> 29#include <media/stagefright/MetaData.h> 30#include <utils/String8.h> 31#include <cutils/bitops.h> 32 33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35namespace android { 36 37enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_MSGSM = 0x0031, 42 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 43}; 44 45static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 46 47 48static uint32_t U32_LE_AT(const uint8_t *ptr) { 49 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 50} 51 52static uint16_t U16_LE_AT(const uint8_t *ptr) { 53 return ptr[1] << 8 | ptr[0]; 54} 55 56struct WAVSource : public MediaSource { 57 WAVSource( 58 const sp<DataSource> &dataSource, 59 const sp<MetaData> &meta, 60 uint16_t waveFormat, 61 int32_t bitsPerSample, 62 off64_t offset, size_t size); 63 64 virtual status_t start(MetaData *params = NULL); 65 virtual status_t stop(); 66 virtual sp<MetaData> getFormat(); 67 68 virtual status_t read( 69 MediaBuffer **buffer, const ReadOptions *options = NULL); 70 71protected: 72 virtual ~WAVSource(); 73 74private: 75 static const size_t kMaxFrameSize; 76 77 sp<DataSource> mDataSource; 78 sp<MetaData> mMeta; 79 uint16_t mWaveFormat; 80 int32_t mSampleRate; 81 int32_t mNumChannels; 82 int32_t mBitsPerSample; 83 off64_t mOffset; 84 size_t mSize; 85 bool mStarted; 86 MediaBufferGroup *mGroup; 87 off64_t mCurrentPos; 88 89 WAVSource(const WAVSource &); 90 WAVSource &operator=(const WAVSource &); 91}; 92 93WAVExtractor::WAVExtractor(const sp<DataSource> &source) 94 : mDataSource(source), 95 mValidFormat(false), 96 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 97 mInitCheck = init(); 98} 99 100WAVExtractor::~WAVExtractor() { 101} 102 103sp<MetaData> WAVExtractor::getMetaData() { 104 sp<MetaData> meta = new MetaData; 105 106 if (mInitCheck != OK) { 107 return meta; 108 } 109 110 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 111 112 return meta; 113} 114 115size_t WAVExtractor::countTracks() { 116 return mInitCheck == OK ? 1 : 0; 117} 118 119sp<MediaSource> WAVExtractor::getTrack(size_t index) { 120 if (mInitCheck != OK || index > 0) { 121 return NULL; 122 } 123 124 return new WAVSource( 125 mDataSource, mTrackMeta, 126 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 127} 128 129sp<MetaData> WAVExtractor::getTrackMetaData( 130 size_t index, uint32_t /* flags */) { 131 if (mInitCheck != OK || index > 0) { 132 return NULL; 133 } 134 135 return mTrackMeta; 136} 137 138status_t WAVExtractor::init() { 139 uint8_t header[12]; 140 if (mDataSource->readAt( 141 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 142 return NO_INIT; 143 } 144 145 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 146 return NO_INIT; 147 } 148 149 size_t totalSize = U32_LE_AT(&header[4]); 150 151 off64_t offset = 12; 152 size_t remainingSize = totalSize; 153 while (remainingSize >= 8) { 154 uint8_t chunkHeader[8]; 155 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 156 return NO_INIT; 157 } 158 159 remainingSize -= 8; 160 offset += 8; 161 162 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 163 164 if (chunkSize > remainingSize) { 165 return NO_INIT; 166 } 167 168 if (!memcmp(chunkHeader, "fmt ", 4)) { 169 if (chunkSize < 16) { 170 return NO_INIT; 171 } 172 173 uint8_t formatSpec[40]; 174 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 175 return NO_INIT; 176 } 177 178 mWaveFormat = U16_LE_AT(formatSpec); 179 if (mWaveFormat != WAVE_FORMAT_PCM 180 && mWaveFormat != WAVE_FORMAT_ALAW 181 && mWaveFormat != WAVE_FORMAT_MULAW 182 && mWaveFormat != WAVE_FORMAT_MSGSM 183 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 184 return ERROR_UNSUPPORTED; 185 } 186 187 uint8_t fmtSize = 16; 188 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 189 fmtSize = 40; 190 } 191 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 192 return NO_INIT; 193 } 194 195 mNumChannels = U16_LE_AT(&formatSpec[2]); 196 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 197 if (mNumChannels != 1 && mNumChannels != 2) { 198 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 199 mNumChannels); 200 } 201 } else { 202 if (mNumChannels < 1 && mNumChannels > 8) { 203 return ERROR_UNSUPPORTED; 204 } 205 } 206 207 mSampleRate = U32_LE_AT(&formatSpec[4]); 208 209 if (mSampleRate == 0) { 210 return ERROR_MALFORMED; 211 } 212 213 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 214 215 if (mWaveFormat == WAVE_FORMAT_PCM 216 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 217 if (mBitsPerSample != 8 && mBitsPerSample != 16 218 && mBitsPerSample != 24) { 219 return ERROR_UNSUPPORTED; 220 } 221 } else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 222 if (mBitsPerSample != 0) { 223 return ERROR_UNSUPPORTED; 224 } 225 } else { 226 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 227 || mWaveFormat == WAVE_FORMAT_ALAW); 228 if (mBitsPerSample != 8) { 229 return ERROR_UNSUPPORTED; 230 } 231 } 232 233 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 234 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 235 if (validBitsPerSample != mBitsPerSample) { 236 if (validBitsPerSample != 0) { 237 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 238 validBitsPerSample, mBitsPerSample); 239 return ERROR_UNSUPPORTED; 240 } else { 241 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 242 // writers don't correctly set the valid bits value, and leave it at 0. 243 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 244 } 245 } 246 247 mChannelMask = U32_LE_AT(&formatSpec[20]); 248 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 249 if ((mChannelMask >> 18) != 0) { 250 ALOGE("invalid channel mask 0x%x", mChannelMask); 251 return ERROR_MALFORMED; 252 } 253 254 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 255 && (popcount(mChannelMask) != mNumChannels)) { 256 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 257 popcount(mChannelMask), mChannelMask); 258 return ERROR_MALFORMED; 259 } 260 261 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 262 // the sample format, using the same definitions as a regular WAV header 263 mWaveFormat = U16_LE_AT(&formatSpec[24]); 264 if (mWaveFormat != WAVE_FORMAT_PCM 265 && mWaveFormat != WAVE_FORMAT_ALAW 266 && mWaveFormat != WAVE_FORMAT_MULAW) { 267 return ERROR_UNSUPPORTED; 268 } 269 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 270 ALOGE("unsupported GUID"); 271 return ERROR_UNSUPPORTED; 272 } 273 } 274 275 mValidFormat = true; 276 } else if (!memcmp(chunkHeader, "data", 4)) { 277 if (mValidFormat) { 278 mDataOffset = offset; 279 mDataSize = chunkSize; 280 281 mTrackMeta = new MetaData; 282 283 switch (mWaveFormat) { 284 case WAVE_FORMAT_PCM: 285 mTrackMeta->setCString( 286 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 287 break; 288 case WAVE_FORMAT_ALAW: 289 mTrackMeta->setCString( 290 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 291 break; 292 case WAVE_FORMAT_MSGSM: 293 mTrackMeta->setCString( 294 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 295 break; 296 default: 297 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 298 mTrackMeta->setCString( 299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 300 break; 301 } 302 303 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 304 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 305 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 306 307 int64_t durationUs = 0; 308 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 309 // 65 bytes decode to 320 8kHz samples 310 durationUs = 311 1000000LL * (mDataSize / 65 * 320) / 8000; 312 } else { 313 size_t bytesPerSample = mBitsPerSample >> 3; 314 315 if (!bytesPerSample || !mNumChannels) 316 return ERROR_MALFORMED; 317 318 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample); 319 320 if (!mSampleRate) 321 return ERROR_MALFORMED; 322 323 durationUs = 324 1000000LL * num_samples / mSampleRate; 325 } 326 327 mTrackMeta->setInt64(kKeyDuration, durationUs); 328 329 return OK; 330 } 331 } 332 333 offset += chunkSize; 334 } 335 336 return NO_INIT; 337} 338 339const size_t WAVSource::kMaxFrameSize = 32768; 340 341WAVSource::WAVSource( 342 const sp<DataSource> &dataSource, 343 const sp<MetaData> &meta, 344 uint16_t waveFormat, 345 int32_t bitsPerSample, 346 off64_t offset, size_t size) 347 : mDataSource(dataSource), 348 mMeta(meta), 349 mWaveFormat(waveFormat), 350 mSampleRate(0), 351 mNumChannels(0), 352 mBitsPerSample(bitsPerSample), 353 mOffset(offset), 354 mSize(size), 355 mStarted(false), 356 mGroup(NULL) { 357 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 358 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 359 360 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 361} 362 363WAVSource::~WAVSource() { 364 if (mStarted) { 365 stop(); 366 } 367} 368 369status_t WAVSource::start(MetaData * /* params */) { 370 ALOGV("WAVSource::start"); 371 372 CHECK(!mStarted); 373 374 mGroup = new MediaBufferGroup; 375 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 376 377 if (mBitsPerSample == 8) { 378 // As a temporary buffer for 8->16 bit conversion. 379 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 380 } 381 382 mCurrentPos = mOffset; 383 384 mStarted = true; 385 386 return OK; 387} 388 389status_t WAVSource::stop() { 390 ALOGV("WAVSource::stop"); 391 392 CHECK(mStarted); 393 394 delete mGroup; 395 mGroup = NULL; 396 397 mStarted = false; 398 399 return OK; 400} 401 402sp<MetaData> WAVSource::getFormat() { 403 ALOGV("WAVSource::getFormat"); 404 405 return mMeta; 406} 407 408status_t WAVSource::read( 409 MediaBuffer **out, const ReadOptions *options) { 410 *out = NULL; 411 412 int64_t seekTimeUs; 413 ReadOptions::SeekMode mode; 414 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 415 int64_t pos = 0; 416 417 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 418 // 65 bytes decode to 320 8kHz samples 419 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 420 int64_t framenumber = samplenumber / 320; 421 pos = framenumber * 65; 422 } else { 423 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 424 } 425 if (pos > (off64_t)mSize) { 426 pos = mSize; 427 } 428 mCurrentPos = pos + mOffset; 429 } 430 431 MediaBuffer *buffer; 432 status_t err = mGroup->acquire_buffer(&buffer); 433 if (err != OK) { 434 return err; 435 } 436 437 // make sure that maxBytesToRead is multiple of 3, in 24-bit case 438 size_t maxBytesToRead = 439 mBitsPerSample == 8 ? kMaxFrameSize / 2 : 440 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize); 441 442 size_t maxBytesAvailable = 443 (mCurrentPos - mOffset >= (off64_t)mSize) 444 ? 0 : mSize - (mCurrentPos - mOffset); 445 446 if (maxBytesToRead > maxBytesAvailable) { 447 maxBytesToRead = maxBytesAvailable; 448 } 449 450 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 451 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 452 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 453 if (maxBytesToRead > 1024) { 454 maxBytesToRead = 1024; 455 } 456 maxBytesToRead = (maxBytesToRead / 65) * 65; 457 } else { 458 // read only integral amounts of audio unit frames. 459 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8; 460 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize; 461 } 462 463 ssize_t n = mDataSource->readAt( 464 mCurrentPos, buffer->data(), 465 maxBytesToRead); 466 467 if (n <= 0) { 468 buffer->release(); 469 buffer = NULL; 470 471 return ERROR_END_OF_STREAM; 472 } 473 474 buffer->set_range(0, n); 475 476 if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 477 if (mBitsPerSample == 8) { 478 // Convert 8-bit unsigned samples to 16-bit signed. 479 480 MediaBuffer *tmp; 481 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 482 483 // The new buffer holds the sample number of samples, but each 484 // one is 2 bytes wide. 485 tmp->set_range(0, 2 * n); 486 487 int16_t *dst = (int16_t *)tmp->data(); 488 const uint8_t *src = (const uint8_t *)buffer->data(); 489 ssize_t numBytes = n; 490 491 while (numBytes-- > 0) { 492 *dst++ = ((int16_t)(*src) - 128) * 256; 493 ++src; 494 } 495 496 buffer->release(); 497 buffer = tmp; 498 } else if (mBitsPerSample == 24) { 499 // Convert 24-bit signed samples to 16-bit signed. 500 501 const uint8_t *src = 502 (const uint8_t *)buffer->data() + buffer->range_offset(); 503 int16_t *dst = (int16_t *)src; 504 505 size_t numSamples = buffer->range_length() / 3; 506 for (size_t i = 0; i < numSamples; ++i) { 507 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 508 x = (x << 8) >> 8; // sign extension 509 510 x = x >> 8; 511 *dst++ = (int16_t)x; 512 src += 3; 513 } 514 515 buffer->set_range(buffer->range_offset(), 2 * numSamples); 516 } 517 } 518 519 int64_t timeStampUs = 0; 520 521 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 522 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 523 } else { 524 size_t bytesPerSample = mBitsPerSample >> 3; 525 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 526 / (mNumChannels * bytesPerSample) / mSampleRate; 527 } 528 529 buffer->meta_data()->setInt64(kKeyTime, timeStampUs); 530 531 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 532 mCurrentPos += n; 533 534 *out = buffer; 535 536 return OK; 537} 538 539//////////////////////////////////////////////////////////////////////////////// 540 541bool SniffWAV( 542 const sp<DataSource> &source, String8 *mimeType, float *confidence, 543 sp<AMessage> *) { 544 char header[12]; 545 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 546 return false; 547 } 548 549 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 550 return false; 551 } 552 553 sp<MediaExtractor> extractor = new WAVExtractor(source); 554 if (extractor->countTracks() == 0) { 555 return false; 556 } 557 558 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 559 *confidence = 0.3f; 560 561 return true; 562} 563 564} // namespace android 565