WAVExtractor.cpp revision 3bed68cb3d43af40475f56211869c40470c1fb4d
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <media/stagefright/foundation/ADebug.h> 24#include <media/stagefright/DataSource.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDefs.h> 27#include <media/stagefright/MediaErrors.h> 28#include <media/stagefright/MediaSource.h> 29#include <media/stagefright/MetaData.h> 30#include <utils/String8.h> 31#include <cutils/bitops.h> 32 33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35namespace android { 36 37enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 42}; 43 44static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 45 46 47static uint32_t U32_LE_AT(const uint8_t *ptr) { 48 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 49} 50 51static uint16_t U16_LE_AT(const uint8_t *ptr) { 52 return ptr[1] << 8 | ptr[0]; 53} 54 55struct WAVSource : public MediaSource { 56 WAVSource( 57 const sp<DataSource> &dataSource, 58 const sp<MetaData> &meta, 59 uint16_t waveFormat, 60 int32_t bitsPerSample, 61 off64_t offset, size_t size); 62 63 virtual status_t start(MetaData *params = NULL); 64 virtual status_t stop(); 65 virtual sp<MetaData> getFormat(); 66 67 virtual status_t read( 68 MediaBuffer **buffer, const ReadOptions *options = NULL); 69 70protected: 71 virtual ~WAVSource(); 72 73private: 74 static const size_t kMaxFrameSize; 75 76 sp<DataSource> mDataSource; 77 sp<MetaData> mMeta; 78 uint16_t mWaveFormat; 79 int32_t mSampleRate; 80 int32_t mNumChannels; 81 int32_t mBitsPerSample; 82 off64_t mOffset; 83 size_t mSize; 84 bool mStarted; 85 MediaBufferGroup *mGroup; 86 off64_t mCurrentPos; 87 88 WAVSource(const WAVSource &); 89 WAVSource &operator=(const WAVSource &); 90}; 91 92WAVExtractor::WAVExtractor(const sp<DataSource> &source) 93 : mDataSource(source), 94 mValidFormat(false), 95 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 96 mInitCheck = init(); 97} 98 99WAVExtractor::~WAVExtractor() { 100} 101 102sp<MetaData> WAVExtractor::getMetaData() { 103 sp<MetaData> meta = new MetaData; 104 105 if (mInitCheck != OK) { 106 return meta; 107 } 108 109 meta->setCString(kKeyMIMEType, "audio/x-wav"); 110 111 return meta; 112} 113 114size_t WAVExtractor::countTracks() { 115 return mInitCheck == OK ? 1 : 0; 116} 117 118sp<MediaSource> WAVExtractor::getTrack(size_t index) { 119 if (mInitCheck != OK || index > 0) { 120 return NULL; 121 } 122 123 return new WAVSource( 124 mDataSource, mTrackMeta, 125 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 126} 127 128sp<MetaData> WAVExtractor::getTrackMetaData( 129 size_t index, uint32_t flags) { 130 if (mInitCheck != OK || index > 0) { 131 return NULL; 132 } 133 134 return mTrackMeta; 135} 136 137status_t WAVExtractor::init() { 138 uint8_t header[12]; 139 if (mDataSource->readAt( 140 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 141 return NO_INIT; 142 } 143 144 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 145 return NO_INIT; 146 } 147 148 size_t totalSize = U32_LE_AT(&header[4]); 149 150 off64_t offset = 12; 151 size_t remainingSize = totalSize; 152 while (remainingSize >= 8) { 153 uint8_t chunkHeader[8]; 154 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 155 return NO_INIT; 156 } 157 158 remainingSize -= 8; 159 offset += 8; 160 161 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 162 163 if (chunkSize > remainingSize) { 164 return NO_INIT; 165 } 166 167 if (!memcmp(chunkHeader, "fmt ", 4)) { 168 if (chunkSize < 16) { 169 return NO_INIT; 170 } 171 172 uint8_t formatSpec[40]; 173 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 174 return NO_INIT; 175 } 176 177 mWaveFormat = U16_LE_AT(formatSpec); 178 if (mWaveFormat != WAVE_FORMAT_PCM 179 && mWaveFormat != WAVE_FORMAT_ALAW 180 && mWaveFormat != WAVE_FORMAT_MULAW 181 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 182 return ERROR_UNSUPPORTED; 183 } 184 185 uint8_t fmtSize = 16; 186 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 187 fmtSize = 40; 188 } 189 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 190 return NO_INIT; 191 } 192 193 mNumChannels = U16_LE_AT(&formatSpec[2]); 194 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 195 if (mNumChannels != 1 && mNumChannels != 2) { 196 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 197 mNumChannels); 198 } 199 } else { 200 if (mNumChannels < 1 && mNumChannels > 8) { 201 return ERROR_UNSUPPORTED; 202 } 203 } 204 205 mSampleRate = U32_LE_AT(&formatSpec[4]); 206 207 if (mSampleRate == 0) { 208 return ERROR_MALFORMED; 209 } 210 211 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 212 213 if (mWaveFormat == WAVE_FORMAT_PCM 214 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 215 if (mBitsPerSample != 8 && mBitsPerSample != 16 216 && mBitsPerSample != 24) { 217 return ERROR_UNSUPPORTED; 218 } 219 } else { 220 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 221 || mWaveFormat == WAVE_FORMAT_ALAW); 222 if (mBitsPerSample != 8) { 223 return ERROR_UNSUPPORTED; 224 } 225 } 226 227 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 228 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 229 if (validBitsPerSample != mBitsPerSample) { 230 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 231 validBitsPerSample, mBitsPerSample); 232 return ERROR_UNSUPPORTED; 233 } 234 235 mChannelMask = U32_LE_AT(&formatSpec[20]); 236 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 237 if ((mChannelMask >> 18) != 0) { 238 ALOGE("invalid channel mask 0x%x", mChannelMask); 239 return ERROR_MALFORMED; 240 } 241 242 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 243 && (popcount(mChannelMask) != mNumChannels)) { 244 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 245 popcount(mChannelMask), mChannelMask); 246 return ERROR_MALFORMED; 247 } 248 249 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 250 // the sample format, using the same definitions as a regular WAV header 251 mWaveFormat = U16_LE_AT(&formatSpec[24]); 252 if (mWaveFormat != WAVE_FORMAT_PCM 253 && mWaveFormat != WAVE_FORMAT_ALAW 254 && mWaveFormat != WAVE_FORMAT_MULAW) { 255 return ERROR_UNSUPPORTED; 256 } 257 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 258 ALOGE("unsupported GUID"); 259 return ERROR_UNSUPPORTED; 260 } 261 } 262 263 mValidFormat = true; 264 } else if (!memcmp(chunkHeader, "data", 4)) { 265 if (mValidFormat) { 266 mDataOffset = offset; 267 mDataSize = chunkSize; 268 269 mTrackMeta = new MetaData; 270 271 switch (mWaveFormat) { 272 case WAVE_FORMAT_PCM: 273 mTrackMeta->setCString( 274 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 275 break; 276 case WAVE_FORMAT_ALAW: 277 mTrackMeta->setCString( 278 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 279 break; 280 default: 281 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 282 mTrackMeta->setCString( 283 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 284 break; 285 } 286 287 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 288 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 289 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 290 291 size_t bytesPerSample = mBitsPerSample >> 3; 292 293 int64_t durationUs = 294 1000000LL * (mDataSize / (mNumChannels * bytesPerSample)) 295 / mSampleRate; 296 297 mTrackMeta->setInt64(kKeyDuration, durationUs); 298 299 return OK; 300 } 301 } 302 303 offset += chunkSize; 304 } 305 306 return NO_INIT; 307} 308 309const size_t WAVSource::kMaxFrameSize = 32768; 310 311WAVSource::WAVSource( 312 const sp<DataSource> &dataSource, 313 const sp<MetaData> &meta, 314 uint16_t waveFormat, 315 int32_t bitsPerSample, 316 off64_t offset, size_t size) 317 : mDataSource(dataSource), 318 mMeta(meta), 319 mWaveFormat(waveFormat), 320 mSampleRate(0), 321 mNumChannels(0), 322 mBitsPerSample(bitsPerSample), 323 mOffset(offset), 324 mSize(size), 325 mStarted(false), 326 mGroup(NULL) { 327 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 328 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 329 330 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 331} 332 333WAVSource::~WAVSource() { 334 if (mStarted) { 335 stop(); 336 } 337} 338 339status_t WAVSource::start(MetaData *params) { 340 ALOGV("WAVSource::start"); 341 342 CHECK(!mStarted); 343 344 mGroup = new MediaBufferGroup; 345 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 346 347 if (mBitsPerSample == 8) { 348 // As a temporary buffer for 8->16 bit conversion. 349 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 350 } 351 352 mCurrentPos = mOffset; 353 354 mStarted = true; 355 356 return OK; 357} 358 359status_t WAVSource::stop() { 360 ALOGV("WAVSource::stop"); 361 362 CHECK(mStarted); 363 364 delete mGroup; 365 mGroup = NULL; 366 367 mStarted = false; 368 369 return OK; 370} 371 372sp<MetaData> WAVSource::getFormat() { 373 ALOGV("WAVSource::getFormat"); 374 375 return mMeta; 376} 377 378status_t WAVSource::read( 379 MediaBuffer **out, const ReadOptions *options) { 380 *out = NULL; 381 382 int64_t seekTimeUs; 383 ReadOptions::SeekMode mode; 384 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 385 int64_t pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 386 if (pos > mSize) { 387 pos = mSize; 388 } 389 mCurrentPos = pos + mOffset; 390 } 391 392 MediaBuffer *buffer; 393 status_t err = mGroup->acquire_buffer(&buffer); 394 if (err != OK) { 395 return err; 396 } 397 398 size_t maxBytesToRead = 399 mBitsPerSample == 8 ? kMaxFrameSize / 2 : kMaxFrameSize; 400 401 size_t maxBytesAvailable = 402 (mCurrentPos - mOffset >= (off64_t)mSize) 403 ? 0 : mSize - (mCurrentPos - mOffset); 404 405 if (maxBytesToRead > maxBytesAvailable) { 406 maxBytesToRead = maxBytesAvailable; 407 } 408 409 ssize_t n = mDataSource->readAt( 410 mCurrentPos, buffer->data(), 411 maxBytesToRead); 412 413 if (n <= 0) { 414 buffer->release(); 415 buffer = NULL; 416 417 return ERROR_END_OF_STREAM; 418 } 419 420 buffer->set_range(0, n); 421 422 if (mWaveFormat == WAVE_FORMAT_PCM) { 423 if (mBitsPerSample == 8) { 424 // Convert 8-bit unsigned samples to 16-bit signed. 425 426 MediaBuffer *tmp; 427 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 428 429 // The new buffer holds the sample number of samples, but each 430 // one is 2 bytes wide. 431 tmp->set_range(0, 2 * n); 432 433 int16_t *dst = (int16_t *)tmp->data(); 434 const uint8_t *src = (const uint8_t *)buffer->data(); 435 ssize_t numBytes = n; 436 437 while (numBytes-- > 0) { 438 *dst++ = ((int16_t)(*src) - 128) * 256; 439 ++src; 440 } 441 442 buffer->release(); 443 buffer = tmp; 444 } else if (mBitsPerSample == 24) { 445 // Convert 24-bit signed samples to 16-bit signed. 446 447 const uint8_t *src = 448 (const uint8_t *)buffer->data() + buffer->range_offset(); 449 int16_t *dst = (int16_t *)src; 450 451 size_t numSamples = buffer->range_length() / 3; 452 for (size_t i = 0; i < numSamples; ++i) { 453 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 454 x = (x << 8) >> 8; // sign extension 455 456 x = x >> 8; 457 *dst++ = (int16_t)x; 458 src += 3; 459 } 460 461 buffer->set_range(buffer->range_offset(), 2 * numSamples); 462 } 463 } 464 465 size_t bytesPerSample = mBitsPerSample >> 3; 466 467 buffer->meta_data()->setInt64( 468 kKeyTime, 469 1000000LL * (mCurrentPos - mOffset) 470 / (mNumChannels * bytesPerSample) / mSampleRate); 471 472 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 473 mCurrentPos += n; 474 475 *out = buffer; 476 477 return OK; 478} 479 480//////////////////////////////////////////////////////////////////////////////// 481 482bool SniffWAV( 483 const sp<DataSource> &source, String8 *mimeType, float *confidence, 484 sp<AMessage> *) { 485 char header[12]; 486 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 487 return false; 488 } 489 490 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 491 return false; 492 } 493 494 sp<MediaExtractor> extractor = new WAVExtractor(source); 495 if (extractor->countTracks() == 0) { 496 return false; 497 } 498 499 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 500 *confidence = 0.3f; 501 502 return true; 503} 504 505} // namespace android 506 507