WAVExtractor.cpp revision ba933df89521d63f75ca66af12ce9d7ae9496b9e
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <media/stagefright/foundation/ADebug.h> 24#include <media/stagefright/DataSource.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDefs.h> 27#include <media/stagefright/MediaErrors.h> 28#include <media/stagefright/MediaSource.h> 29#include <media/stagefright/MetaData.h> 30#include <utils/String8.h> 31#include <cutils/bitops.h> 32 33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35namespace android { 36 37enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_MSGSM = 0x0031, 42 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 43}; 44 45static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 46 47 48static uint32_t U32_LE_AT(const uint8_t *ptr) { 49 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 50} 51 52static uint16_t U16_LE_AT(const uint8_t *ptr) { 53 return ptr[1] << 8 | ptr[0]; 54} 55 56struct WAVSource : public MediaSource { 57 WAVSource( 58 const sp<DataSource> &dataSource, 59 const sp<MetaData> &meta, 60 uint16_t waveFormat, 61 int32_t bitsPerSample, 62 off64_t offset, size_t size); 63 64 virtual status_t start(MetaData *params = NULL); 65 virtual status_t stop(); 66 virtual sp<MetaData> getFormat(); 67 68 virtual status_t read( 69 MediaBuffer **buffer, const ReadOptions *options = NULL); 70 71protected: 72 virtual ~WAVSource(); 73 74private: 75 static const size_t kMaxFrameSize; 76 77 sp<DataSource> mDataSource; 78 sp<MetaData> mMeta; 79 uint16_t mWaveFormat; 80 int32_t mSampleRate; 81 int32_t mNumChannels; 82 int32_t mBitsPerSample; 83 off64_t mOffset; 84 size_t mSize; 85 bool mStarted; 86 MediaBufferGroup *mGroup; 87 off64_t mCurrentPos; 88 89 WAVSource(const WAVSource &); 90 WAVSource &operator=(const WAVSource &); 91}; 92 93WAVExtractor::WAVExtractor(const sp<DataSource> &source) 94 : mDataSource(source), 95 mValidFormat(false), 96 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 97 mInitCheck = init(); 98} 99 100WAVExtractor::~WAVExtractor() { 101} 102 103sp<MetaData> WAVExtractor::getMetaData() { 104 sp<MetaData> meta = new MetaData; 105 106 if (mInitCheck != OK) { 107 return meta; 108 } 109 110 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 111 112 return meta; 113} 114 115size_t WAVExtractor::countTracks() { 116 return mInitCheck == OK ? 1 : 0; 117} 118 119sp<MediaSource> WAVExtractor::getTrack(size_t index) { 120 if (mInitCheck != OK || index > 0) { 121 return NULL; 122 } 123 124 return new WAVSource( 125 mDataSource, mTrackMeta, 126 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 127} 128 129sp<MetaData> WAVExtractor::getTrackMetaData( 130 size_t index, uint32_t flags) { 131 if (mInitCheck != OK || index > 0) { 132 return NULL; 133 } 134 135 return mTrackMeta; 136} 137 138status_t WAVExtractor::init() { 139 uint8_t header[12]; 140 if (mDataSource->readAt( 141 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 142 return NO_INIT; 143 } 144 145 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 146 return NO_INIT; 147 } 148 149 size_t totalSize = U32_LE_AT(&header[4]); 150 151 off64_t offset = 12; 152 size_t remainingSize = totalSize; 153 while (remainingSize >= 8) { 154 uint8_t chunkHeader[8]; 155 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 156 return NO_INIT; 157 } 158 159 remainingSize -= 8; 160 offset += 8; 161 162 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 163 164 if (chunkSize > remainingSize) { 165 return NO_INIT; 166 } 167 168 if (!memcmp(chunkHeader, "fmt ", 4)) { 169 if (chunkSize < 16) { 170 return NO_INIT; 171 } 172 173 uint8_t formatSpec[40]; 174 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 175 return NO_INIT; 176 } 177 178 mWaveFormat = U16_LE_AT(formatSpec); 179 if (mWaveFormat != WAVE_FORMAT_PCM 180 && mWaveFormat != WAVE_FORMAT_ALAW 181 && mWaveFormat != WAVE_FORMAT_MULAW 182 && mWaveFormat != WAVE_FORMAT_MSGSM 183 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 184 return ERROR_UNSUPPORTED; 185 } 186 187 uint8_t fmtSize = 16; 188 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 189 fmtSize = 40; 190 } 191 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 192 return NO_INIT; 193 } 194 195 mNumChannels = U16_LE_AT(&formatSpec[2]); 196 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 197 if (mNumChannels != 1 && mNumChannels != 2) { 198 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 199 mNumChannels); 200 } 201 } else { 202 if (mNumChannels < 1 && mNumChannels > 8) { 203 return ERROR_UNSUPPORTED; 204 } 205 } 206 207 mSampleRate = U32_LE_AT(&formatSpec[4]); 208 209 if (mSampleRate == 0) { 210 return ERROR_MALFORMED; 211 } 212 213 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 214 215 if (mWaveFormat == WAVE_FORMAT_PCM 216 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 217 if (mBitsPerSample != 8 && mBitsPerSample != 16 218 && mBitsPerSample != 24) { 219 return ERROR_UNSUPPORTED; 220 } 221 } else if (mWaveFormat == WAVE_FORMAT_MSGSM) { 222 if (mBitsPerSample != 0) { 223 return ERROR_UNSUPPORTED; 224 } 225 } else { 226 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 227 || mWaveFormat == WAVE_FORMAT_ALAW); 228 if (mBitsPerSample != 8) { 229 return ERROR_UNSUPPORTED; 230 } 231 } 232 233 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 234 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 235 if (validBitsPerSample != mBitsPerSample) { 236 if (validBitsPerSample != 0) { 237 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 238 validBitsPerSample, mBitsPerSample); 239 return ERROR_UNSUPPORTED; 240 } else { 241 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 242 // writers don't correctly set the valid bits value, and leave it at 0. 243 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 244 } 245 } 246 247 mChannelMask = U32_LE_AT(&formatSpec[20]); 248 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 249 if ((mChannelMask >> 18) != 0) { 250 ALOGE("invalid channel mask 0x%x", mChannelMask); 251 return ERROR_MALFORMED; 252 } 253 254 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 255 && (popcount(mChannelMask) != mNumChannels)) { 256 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 257 popcount(mChannelMask), mChannelMask); 258 return ERROR_MALFORMED; 259 } 260 261 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 262 // the sample format, using the same definitions as a regular WAV header 263 mWaveFormat = U16_LE_AT(&formatSpec[24]); 264 if (mWaveFormat != WAVE_FORMAT_PCM 265 && mWaveFormat != WAVE_FORMAT_ALAW 266 && mWaveFormat != WAVE_FORMAT_MULAW) { 267 return ERROR_UNSUPPORTED; 268 } 269 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 270 ALOGE("unsupported GUID"); 271 return ERROR_UNSUPPORTED; 272 } 273 } 274 275 mValidFormat = true; 276 } else if (!memcmp(chunkHeader, "data", 4)) { 277 if (mValidFormat) { 278 mDataOffset = offset; 279 mDataSize = chunkSize; 280 281 mTrackMeta = new MetaData; 282 283 switch (mWaveFormat) { 284 case WAVE_FORMAT_PCM: 285 mTrackMeta->setCString( 286 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 287 break; 288 case WAVE_FORMAT_ALAW: 289 mTrackMeta->setCString( 290 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 291 break; 292 case WAVE_FORMAT_MSGSM: 293 mTrackMeta->setCString( 294 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM); 295 break; 296 default: 297 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 298 mTrackMeta->setCString( 299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 300 break; 301 } 302 303 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 304 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 305 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 306 307 int64_t durationUs = 0; 308 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 309 // 65 bytes decode to 320 8kHz samples 310 durationUs = 311 1000000LL * (mDataSize / 65 * 320) / 8000; 312 } else { 313 size_t bytesPerSample = mBitsPerSample >> 3; 314 durationUs = 315 1000000LL * (mDataSize / (mNumChannels * bytesPerSample)) 316 / mSampleRate; 317 } 318 319 mTrackMeta->setInt64(kKeyDuration, durationUs); 320 321 return OK; 322 } 323 } 324 325 offset += chunkSize; 326 } 327 328 return NO_INIT; 329} 330 331const size_t WAVSource::kMaxFrameSize = 32768; 332 333WAVSource::WAVSource( 334 const sp<DataSource> &dataSource, 335 const sp<MetaData> &meta, 336 uint16_t waveFormat, 337 int32_t bitsPerSample, 338 off64_t offset, size_t size) 339 : mDataSource(dataSource), 340 mMeta(meta), 341 mWaveFormat(waveFormat), 342 mSampleRate(0), 343 mNumChannels(0), 344 mBitsPerSample(bitsPerSample), 345 mOffset(offset), 346 mSize(size), 347 mStarted(false), 348 mGroup(NULL) { 349 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 350 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 351 352 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 353} 354 355WAVSource::~WAVSource() { 356 if (mStarted) { 357 stop(); 358 } 359} 360 361status_t WAVSource::start(MetaData *params) { 362 ALOGV("WAVSource::start"); 363 364 CHECK(!mStarted); 365 366 mGroup = new MediaBufferGroup; 367 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 368 369 if (mBitsPerSample == 8) { 370 // As a temporary buffer for 8->16 bit conversion. 371 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 372 } 373 374 mCurrentPos = mOffset; 375 376 mStarted = true; 377 378 return OK; 379} 380 381status_t WAVSource::stop() { 382 ALOGV("WAVSource::stop"); 383 384 CHECK(mStarted); 385 386 delete mGroup; 387 mGroup = NULL; 388 389 mStarted = false; 390 391 return OK; 392} 393 394sp<MetaData> WAVSource::getFormat() { 395 ALOGV("WAVSource::getFormat"); 396 397 return mMeta; 398} 399 400status_t WAVSource::read( 401 MediaBuffer **out, const ReadOptions *options) { 402 *out = NULL; 403 404 int64_t seekTimeUs; 405 ReadOptions::SeekMode mode; 406 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 407 int64_t pos = 0; 408 409 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 410 // 65 bytes decode to 320 8kHz samples 411 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000; 412 int64_t framenumber = samplenumber / 320; 413 pos = framenumber * 65; 414 } else { 415 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 416 } 417 if (pos > mSize) { 418 pos = mSize; 419 } 420 mCurrentPos = pos + mOffset; 421 } 422 423 MediaBuffer *buffer; 424 status_t err = mGroup->acquire_buffer(&buffer); 425 if (err != OK) { 426 return err; 427 } 428 429 size_t maxBytesToRead = 430 mBitsPerSample == 8 ? kMaxFrameSize / 2 : kMaxFrameSize; 431 432 size_t maxBytesAvailable = 433 (mCurrentPos - mOffset >= (off64_t)mSize) 434 ? 0 : mSize - (mCurrentPos - mOffset); 435 436 if (maxBytesToRead > maxBytesAvailable) { 437 maxBytesToRead = maxBytesAvailable; 438 } 439 440 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 441 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames, 442 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio 443 if (maxBytesToRead > 1024) { 444 maxBytesToRead = 1024; 445 } 446 maxBytesToRead = (maxBytesToRead / 65) * 65; 447 } 448 449 ssize_t n = mDataSource->readAt( 450 mCurrentPos, buffer->data(), 451 maxBytesToRead); 452 453 if (n <= 0) { 454 buffer->release(); 455 buffer = NULL; 456 457 return ERROR_END_OF_STREAM; 458 } 459 460 buffer->set_range(0, n); 461 462 if (mWaveFormat == WAVE_FORMAT_PCM) { 463 if (mBitsPerSample == 8) { 464 // Convert 8-bit unsigned samples to 16-bit signed. 465 466 MediaBuffer *tmp; 467 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 468 469 // The new buffer holds the sample number of samples, but each 470 // one is 2 bytes wide. 471 tmp->set_range(0, 2 * n); 472 473 int16_t *dst = (int16_t *)tmp->data(); 474 const uint8_t *src = (const uint8_t *)buffer->data(); 475 ssize_t numBytes = n; 476 477 while (numBytes-- > 0) { 478 *dst++ = ((int16_t)(*src) - 128) * 256; 479 ++src; 480 } 481 482 buffer->release(); 483 buffer = tmp; 484 } else if (mBitsPerSample == 24) { 485 // Convert 24-bit signed samples to 16-bit signed. 486 487 const uint8_t *src = 488 (const uint8_t *)buffer->data() + buffer->range_offset(); 489 int16_t *dst = (int16_t *)src; 490 491 size_t numSamples = buffer->range_length() / 3; 492 for (size_t i = 0; i < numSamples; ++i) { 493 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 494 x = (x << 8) >> 8; // sign extension 495 496 x = x >> 8; 497 *dst++ = (int16_t)x; 498 src += 3; 499 } 500 501 buffer->set_range(buffer->range_offset(), 2 * numSamples); 502 } 503 } 504 505 int64_t timeStampUs = 0; 506 507 if (mWaveFormat == WAVE_FORMAT_MSGSM) { 508 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate; 509 } else { 510 size_t bytesPerSample = mBitsPerSample >> 3; 511 timeStampUs = 1000000LL * (mCurrentPos - mOffset) 512 / (mNumChannels * bytesPerSample) / mSampleRate; 513 } 514 515 buffer->meta_data()->setInt64(kKeyTime, timeStampUs); 516 517 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 518 mCurrentPos += n; 519 520 *out = buffer; 521 522 return OK; 523} 524 525//////////////////////////////////////////////////////////////////////////////// 526 527bool SniffWAV( 528 const sp<DataSource> &source, String8 *mimeType, float *confidence, 529 sp<AMessage> *) { 530 char header[12]; 531 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 532 return false; 533 } 534 535 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 536 return false; 537 } 538 539 sp<MediaExtractor> extractor = new WAVExtractor(source); 540 if (extractor->countTracks() == 0) { 541 return false; 542 } 543 544 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 545 *confidence = 0.3f; 546 547 return true; 548} 549 550} // namespace android 551