WAVExtractor.cpp revision 78bd91b15ee8ea5aa2ab5a8cad7e892cb2d01c1b
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <audio_utils/primitives.h> 24#include <media/stagefright/foundation/ADebug.h> 25#include <media/stagefright/DataSource.h> 26#include <media/stagefright/MediaBufferGroup.h> 27#include <media/stagefright/MediaDefs.h> 28#include <media/stagefright/MediaErrors.h> 29#include <media/stagefright/MediaSource.h> 30#include <media/stagefright/MetaData.h> 31#include <utils/String8.h> 32#include <cutils/bitops.h> 33 34#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 35 36namespace android { 37 38enum { 39 WAVE_FORMAT_PCM = 0x0001, 40 WAVE_FORMAT_IEEE_FLOAT = 0x0003, 41 WAVE_FORMAT_ALAW = 0x0006, 42 WAVE_FORMAT_MULAW = 0x0007, 43 WAVE_FORMAT_MSGSM = 0x0031, 44 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 45}; 46 47static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 48 49 50static uint32_t U32_LE_AT(const uint8_t *ptr) { 51 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 52} 53 54static uint16_t U16_LE_AT(const uint8_t *ptr) { 55 return ptr[1] << 8 | ptr[0]; 56} 57 58struct WAVSource : public MediaSource { 59 WAVSource( 60 const sp<DataSource> &dataSource, 61 const sp<MetaData> &meta, 62 uint16_t waveFormat, 63 int32_t bitsPerSample, 64 off64_t offset, size_t size); 65 66 virtual status_t start(MetaData *params = NULL); 67 virtual status_t stop(); 68 virtual sp<MetaData> getFormat(); 69 70 virtual status_t read( 71 MediaBuffer **buffer, const ReadOptions *options = NULL); 72 73protected: 74 virtual ~WAVSource(); 75 76private: 77 static const size_t kMaxFrameSize; 78 79 sp<DataSource> mDataSource; 80 sp<MetaData> mMeta; 81 uint16_t mWaveFormat; 82 int32_t mSampleRate; 83 int32_t mNumChannels; 84 int32_t mBitsPerSample; 85 off64_t mOffset; 86 size_t mSize; 87 bool mStarted; 88 MediaBufferGroup *mGroup; 89 off64_t mCurrentPos; 90 91 WAVSource(const WAVSource &); 92 WAVSource &operator=(const WAVSource &); 93}; 94 95WAVExtractor::WAVExtractor(const sp<DataSource> &source) 96 : mDataSource(source), 97 mValidFormat(false), 98 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 99 mInitCheck = init(); 100} 101 102WAVExtractor::~WAVExtractor() { 103} 104 105sp<MetaData> WAVExtractor::getMetaData() { 106 sp<MetaData> meta = new MetaData; 107 108 if (mInitCheck != OK) { 109 return meta; 110 } 111 112 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 113 114 return meta; 115} 116 117size_t WAVExtractor::countTracks() { 118 return mInitCheck == OK ? 1 : 0; 119} 120 121sp<IMediaSource> WAVExtractor::getTrack(size_t index) { 122 if (mInitCheck != OK || index > 0) { 123 return NULL; 124 } 125 126 return new WAVSource( 127 mDataSource, mTrackMeta, 128 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 129} 130 131sp<MetaData> WAVExtractor::getTrackMetaData( 132 size_t index, uint32_t /* flags */) { 133 if (mInitCheck != OK || index > 0) { 134 return NULL; 135 } 136 137 return mTrackMeta; 138} 139 140status_t WAVExtractor::init() { 141 uint8_t header[12]; 142 if (mDataSource->readAt( 143 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 144 return NO_INIT; 145 } 146 147 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 148 return NO_INIT; 149 } 150 151 size_t totalSize = U32_LE_AT(&header[4]); 152 153 off64_t offset = 12; 154 size_t remainingSize = totalSize; 155 while (remainingSize >= 8) { 156 uint8_t chunkHeader[8]; 157 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 158 return NO_INIT; 159 } 160 161 remainingSize -= 8; 162 offset += 8; 163 164 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 165 166 if (chunkSize > remainingSize) { 167 return NO_INIT; 168 } 169 170 if (!memcmp(chunkHeader, "fmt ", 4)) { 171 if (chunkSize < 16) { 172 return NO_INIT; 173 } 174 175 uint8_t formatSpec[40]; 176 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 177 return NO_INIT; 178 } 179 180 mWaveFormat = U16_LE_AT(formatSpec); 181 if (mWaveFormat != WAVE_FORMAT_PCM 182 && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT 183 && mWaveFormat != WAVE_FORMAT_ALAW 184 && mWaveFormat != WAVE_FORMAT_MULAW 185 && mWaveFormat != WAVE_FORMAT_MSGSM 186 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 187 return ERROR_UNSUPPORTED; 188 } 189 190 uint8_t fmtSize = 16; 191 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 192 fmtSize = 40; 193 } 194 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 195 return NO_INIT; 196 } 197 198 mNumChannels = U16_LE_AT(&formatSpec[2]); 199 200 if (mNumChannels < 1 || mNumChannels > 8) { 201 ALOGE("Unsupported number of channels (%d)", mNumChannels); 202 return ERROR_UNSUPPORTED; 203 } 204 205 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 206 if (mNumChannels != 1 && mNumChannels != 2) { 207 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 208 mNumChannels); 209 } 210 } 211 212 mSampleRate = U32_LE_AT(&formatSpec[4]); 213 214 if (mSampleRate == 0) { 215 return ERROR_MALFORMED; 216 } 217 218 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 219 220 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 221 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 222 if (validBitsPerSample != mBitsPerSample) { 223 if (validBitsPerSample != 0) { 224 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 225 validBitsPerSample, mBitsPerSample); 226 return ERROR_UNSUPPORTED; 227 } else { 228 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 229 // writers don't correctly set the valid bits value, and leave it at 0. 230 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 231 } 232 } 233 234 mChannelMask = U32_LE_AT(&formatSpec[20]); 235 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 236 if ((mChannelMask >> 18) != 0) { 237 ALOGE("invalid channel mask 0x%x", mChannelMask); 238 return ERROR_MALFORMED; 239 } 240 241 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 242 && (popcount(mChannelMask) != mNumChannels)) { 243 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 244 popcount(mChannelMask), mChannelMask); 245 return ERROR_MALFORMED; 246 } 247 248 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 249 // the sample format, using the same definitions as a regular WAV header 250 mWaveFormat = U16_LE_AT(&formatSpec[24]); 251 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 252 ALOGE("unsupported GUID"); 253 return ERROR_UNSUPPORTED; 254 } 255 } 256 257 if (mWaveFormat == WAVE_FORMAT_PCM) { 258 if (mBitsPerSample != 8 && mBitsPerSample != 16 259 && mBitsPerSample != 24 && mBitsPerSample != 32) { 260 return ERROR_UNSUPPORTED; 261 } 262 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { 263 if (mBitsPerSample != 32) { // TODO we don't support double 264 return ERROR_UNSUPPORTED; 265 } 266 } 267 else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 268 if (mBitsPerSample != 0) { 269 return ERROR_UNSUPPORTED; 270 } 271 } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) { 272 if (mBitsPerSample != 8) { 273 return ERROR_UNSUPPORTED; 274 } 275 } else { 276 return ERROR_UNSUPPORTED; 277 } 278 279 mValidFormat = true; 280 } else if (!memcmp(chunkHeader, "data", 4)) { 281 if (mValidFormat) { 282 mDataOffset = offset; 283 mDataSize = chunkSize; 284 285 mTrackMeta = new MetaData; 286 287 switch (mWaveFormat) { 288 case WAVE_FORMAT_PCM: 289 case WAVE_FORMAT_IEEE_FLOAT: 290 mTrackMeta->setCString( 291 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 292 break; 293 case WAVE_FORMAT_ALAW: 294 mTrackMeta->setCString( 295 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 296 break; 297 case WAVE_FORMAT_MSGSM: 298 mTrackMeta->setCString( 299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 300 break; 301 default: 302 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 303 mTrackMeta->setCString( 304 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 305 break; 306 } 307 308 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 309 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 310 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 311 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit); 312 313 int64_t durationUs = 0; 314 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 315 // 65 bytes decode to 320 8kHz samples 316 durationUs = 317 1000000LL * (mDataSize / 65 * 320) / 8000; 318 } else { 319 size_t bytesPerSample = mBitsPerSample >> 3; 320 321 if (!bytesPerSample || !mNumChannels) 322 return ERROR_MALFORMED; 323 324 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample); 325 326 if (!mSampleRate) 327 return ERROR_MALFORMED; 328 329 durationUs = 330 1000000LL * num_samples / mSampleRate; 331 } 332 333 mTrackMeta->setInt64(kKeyDuration, durationUs); 334 335 return OK; 336 } 337 } 338 339 offset += chunkSize; 340 } 341 342 return NO_INIT; 343} 344 345const size_t WAVSource::kMaxFrameSize = 32768; 346 347WAVSource::WAVSource( 348 const sp<DataSource> &dataSource, 349 const sp<MetaData> &meta, 350 uint16_t waveFormat, 351 int32_t bitsPerSample, 352 off64_t offset, size_t size) 353 : mDataSource(dataSource), 354 mMeta(meta), 355 mWaveFormat(waveFormat), 356 mSampleRate(0), 357 mNumChannels(0), 358 mBitsPerSample(bitsPerSample), 359 mOffset(offset), 360 mSize(size), 361 mStarted(false), 362 mGroup(NULL) { 363 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 364 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 365 366 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 367} 368 369WAVSource::~WAVSource() { 370 if (mStarted) { 371 stop(); 372 } 373} 374 375status_t WAVSource::start(MetaData * /* params */) { 376 ALOGV("WAVSource::start"); 377 378 CHECK(!mStarted); 379 380 mGroup = new MediaBufferGroup; 381 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 382 383 if (mBitsPerSample == 8) { 384 // As a temporary buffer for 8->16 bit conversion. 385 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 386 } 387 388 mCurrentPos = mOffset; 389 390 mStarted = true; 391 392 return OK; 393} 394 395status_t WAVSource::stop() { 396 ALOGV("WAVSource::stop"); 397 398 CHECK(mStarted); 399 400 delete mGroup; 401 mGroup = NULL; 402 403 mStarted = false; 404 405 return OK; 406} 407 408sp<MetaData> WAVSource::getFormat() { 409 ALOGV("WAVSource::getFormat"); 410 411 return mMeta; 412} 413 414status_t WAVSource::read( 415 MediaBuffer **out, const ReadOptions *options) { 416 *out = NULL; 417 418 int64_t seekTimeUs; 419 ReadOptions::SeekMode mode; 420 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 421 int64_t pos = 0; 422 423 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 424 // 65 bytes decode to 320 8kHz samples 425 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 426 int64_t framenumber = samplenumber / 320; 427 pos = framenumber * 65; 428 } else { 429 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 430 } 431 if (pos > (off64_t)mSize) { 432 pos = mSize; 433 } 434 mCurrentPos = pos + mOffset; 435 } 436 437 MediaBuffer *buffer; 438 status_t err = mGroup->acquire_buffer(&buffer); 439 if (err != OK) { 440 return err; 441 } 442 443 // make sure that maxBytesToRead is multiple of 3, in 24-bit case 444 size_t maxBytesToRead = 445 mBitsPerSample == 8 ? kMaxFrameSize / 2 : 446 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize); 447 448 size_t maxBytesAvailable = 449 (mCurrentPos - mOffset >= (off64_t)mSize) 450 ? 0 : mSize - (mCurrentPos - mOffset); 451 452 if (maxBytesToRead > maxBytesAvailable) { 453 maxBytesToRead = maxBytesAvailable; 454 } 455 456 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 457 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 458 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 459 if (maxBytesToRead > 1024) { 460 maxBytesToRead = 1024; 461 } 462 maxBytesToRead = (maxBytesToRead / 65) * 65; 463 } else { 464 // read only integral amounts of audio unit frames. 465 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8; 466 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize; 467 } 468 469 ssize_t n = mDataSource->readAt( 470 mCurrentPos, buffer->data(), 471 maxBytesToRead); 472 473 if (n <= 0) { 474 buffer->release(); 475 buffer = NULL; 476 477 return ERROR_END_OF_STREAM; 478 } 479 480 buffer->set_range(0, n); 481 482 // TODO: add capability to return data as float PCM instead of 16 bit PCM. 483 if (mWaveFormat == WAVE_FORMAT_PCM) { 484 if (mBitsPerSample == 8) { 485 // Convert 8-bit unsigned samples to 16-bit signed. 486 487 // Create new buffer with 2 byte wide samples 488 MediaBuffer *tmp; 489 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 490 tmp->set_range(0, 2 * n); 491 492 memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n); 493 buffer->release(); 494 buffer = tmp; 495 } else if (mBitsPerSample == 24) { 496 // Convert 24-bit signed samples to 16-bit signed in place 497 const size_t numSamples = n / 3; 498 499 memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples); 500 buffer->set_range(0, 2 * numSamples); 501 } else if (mBitsPerSample == 32) { 502 // Convert 32-bit signed samples to 16-bit signed in place 503 const size_t numSamples = n / 4; 504 505 memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples); 506 buffer->set_range(0, 2 * numSamples); 507 } 508 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) { 509 if (mBitsPerSample == 32) { 510 // Convert 32-bit float samples to 16-bit signed in place 511 const size_t numSamples = n / 4; 512 513 memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples); 514 buffer->set_range(0, 2 * numSamples); 515 } 516 } 517 518 int64_t timeStampUs = 0; 519 520 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 521 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 522 } else { 523 size_t bytesPerSample = mBitsPerSample >> 3; 524 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 525 / (mNumChannels * bytesPerSample) / mSampleRate; 526 } 527 528 buffer->meta_data()->setInt64(kKeyTime, timeStampUs); 529 530 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 531 mCurrentPos += n; 532 533 *out = buffer; 534 535 return OK; 536} 537 538//////////////////////////////////////////////////////////////////////////////// 539 540bool SniffWAV( 541 const sp<DataSource> &source, String8 *mimeType, float *confidence, 542 sp<AMessage> *) { 543 char header[12]; 544 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 545 return false; 546 } 547 548 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 549 return false; 550 } 551 552 sp<MediaExtractor> extractor = new WAVExtractor(source); 553 if (extractor->countTracks() == 0) { 554 return false; 555 } 556 557 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 558 *confidence = 0.3f; 559 560 return true; 561} 562 563} // namespace android 564