1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "WAVExtractor" 19#include <utils/Log.h> 20 21#include "include/WAVExtractor.h" 22 23#include <media/stagefright/foundation/ADebug.h> 24#include <media/stagefright/DataSource.h> 25#include <media/stagefright/MediaBufferGroup.h> 26#include <media/stagefright/MediaDefs.h> 27#include <media/stagefright/MediaErrors.h> 28#include <media/stagefright/MediaSource.h> 29#include <media/stagefright/MetaData.h> 30#include <utils/String8.h> 31#include <cutils/bitops.h> 32 33#define CHANNEL_MASK_USE_CHANNEL_ORDER 0 34 35namespace android { 36 37enum { 38 WAVE_FORMAT_PCM = 0x0001, 39 WAVE_FORMAT_ALAW = 0x0006, 40 WAVE_FORMAT_MULAW = 0x0007, 41 WAVE_FORMAT_EXTENSIBLE = 0xFFFE 42}; 43 44static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"; 45 46 47static uint32_t U32_LE_AT(const uint8_t *ptr) { 48 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0]; 49} 50 51static uint16_t U16_LE_AT(const uint8_t *ptr) { 52 return ptr[1] << 8 | ptr[0]; 53} 54 55struct WAVSource : public MediaSource { 56 WAVSource( 57 const sp<DataSource> &dataSource, 58 const sp<MetaData> &meta, 59 uint16_t waveFormat, 60 int32_t bitsPerSample, 61 off64_t offset, size_t size); 62 63 virtual status_t start(MetaData *params = NULL); 64 virtual status_t stop(); 65 virtual sp<MetaData> getFormat(); 66 67 virtual status_t read( 68 MediaBuffer **buffer, const ReadOptions *options = NULL); 69 70protected: 71 virtual ~WAVSource(); 72 73private: 74 static const size_t kMaxFrameSize; 75 76 sp<DataSource> mDataSource; 77 sp<MetaData> mMeta; 78 uint16_t mWaveFormat; 79 int32_t mSampleRate; 80 int32_t mNumChannels; 81 int32_t mBitsPerSample; 82 off64_t mOffset; 83 size_t mSize; 84 bool mStarted; 85 MediaBufferGroup *mGroup; 86 off64_t mCurrentPos; 87 88 WAVSource(const WAVSource &); 89 WAVSource &operator=(const WAVSource &); 90}; 91 92WAVExtractor::WAVExtractor(const sp<DataSource> &source) 93 : mDataSource(source), 94 mValidFormat(false), 95 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) { 96 mInitCheck = init(); 97} 98 99WAVExtractor::~WAVExtractor() { 100} 101 102sp<MetaData> WAVExtractor::getMetaData() { 103 sp<MetaData> meta = new MetaData; 104 105 if (mInitCheck != OK) { 106 return meta; 107 } 108 109 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV); 110 111 return meta; 112} 113 114size_t WAVExtractor::countTracks() { 115 return mInitCheck == OK ? 1 : 0; 116} 117 118sp<MediaSource> WAVExtractor::getTrack(size_t index) { 119 if (mInitCheck != OK || index > 0) { 120 return NULL; 121 } 122 123 return new WAVSource( 124 mDataSource, mTrackMeta, 125 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize); 126} 127 128sp<MetaData> WAVExtractor::getTrackMetaData( 129 size_t index, uint32_t flags) { 130 if (mInitCheck != OK || index > 0) { 131 return NULL; 132 } 133 134 return mTrackMeta; 135} 136 137status_t WAVExtractor::init() { 138 uint8_t header[12]; 139 if (mDataSource->readAt( 140 0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 141 return NO_INIT; 142 } 143 144 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 145 return NO_INIT; 146 } 147 148 size_t totalSize = U32_LE_AT(&header[4]); 149 150 off64_t offset = 12; 151 size_t remainingSize = totalSize; 152 while (remainingSize >= 8) { 153 uint8_t chunkHeader[8]; 154 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) { 155 return NO_INIT; 156 } 157 158 remainingSize -= 8; 159 offset += 8; 160 161 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]); 162 163 if (chunkSize > remainingSize) { 164 return NO_INIT; 165 } 166 167 if (!memcmp(chunkHeader, "fmt ", 4)) { 168 if (chunkSize < 16) { 169 return NO_INIT; 170 } 171 172 uint8_t formatSpec[40]; 173 if (mDataSource->readAt(offset, formatSpec, 2) < 2) { 174 return NO_INIT; 175 } 176 177 mWaveFormat = U16_LE_AT(formatSpec); 178 if (mWaveFormat != WAVE_FORMAT_PCM 179 && mWaveFormat != WAVE_FORMAT_ALAW 180 && mWaveFormat != WAVE_FORMAT_MULAW 181 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 182 return ERROR_UNSUPPORTED; 183 } 184 185 uint8_t fmtSize = 16; 186 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 187 fmtSize = 40; 188 } 189 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) { 190 return NO_INIT; 191 } 192 193 mNumChannels = U16_LE_AT(&formatSpec[2]); 194 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) { 195 if (mNumChannels != 1 && mNumChannels != 2) { 196 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask", 197 mNumChannels); 198 } 199 } else { 200 if (mNumChannels < 1 && mNumChannels > 8) { 201 return ERROR_UNSUPPORTED; 202 } 203 } 204 205 mSampleRate = U32_LE_AT(&formatSpec[4]); 206 207 if (mSampleRate == 0) { 208 return ERROR_MALFORMED; 209 } 210 211 mBitsPerSample = U16_LE_AT(&formatSpec[14]); 212 213 if (mWaveFormat == WAVE_FORMAT_PCM 214 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 215 if (mBitsPerSample != 8 && mBitsPerSample != 16 216 && mBitsPerSample != 24) { 217 return ERROR_UNSUPPORTED; 218 } 219 } else { 220 CHECK(mWaveFormat == WAVE_FORMAT_MULAW 221 || mWaveFormat == WAVE_FORMAT_ALAW); 222 if (mBitsPerSample != 8) { 223 return ERROR_UNSUPPORTED; 224 } 225 } 226 227 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) { 228 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]); 229 if (validBitsPerSample != mBitsPerSample) { 230 if (validBitsPerSample != 0) { 231 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported", 232 validBitsPerSample, mBitsPerSample); 233 return ERROR_UNSUPPORTED; 234 } else { 235 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT 236 // writers don't correctly set the valid bits value, and leave it at 0. 237 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring"); 238 } 239 } 240 241 mChannelMask = U32_LE_AT(&formatSpec[20]); 242 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask); 243 if ((mChannelMask >> 18) != 0) { 244 ALOGE("invalid channel mask 0x%x", mChannelMask); 245 return ERROR_MALFORMED; 246 } 247 248 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER) 249 && (popcount(mChannelMask) != mNumChannels)) { 250 ALOGE("invalid number of channels (%d) in channel mask (0x%x)", 251 popcount(mChannelMask), mChannelMask); 252 return ERROR_MALFORMED; 253 } 254 255 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain 256 // the sample format, using the same definitions as a regular WAV header 257 mWaveFormat = U16_LE_AT(&formatSpec[24]); 258 if (mWaveFormat != WAVE_FORMAT_PCM 259 && mWaveFormat != WAVE_FORMAT_ALAW 260 && mWaveFormat != WAVE_FORMAT_MULAW) { 261 return ERROR_UNSUPPORTED; 262 } 263 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) { 264 ALOGE("unsupported GUID"); 265 return ERROR_UNSUPPORTED; 266 } 267 } 268 269 mValidFormat = true; 270 } else if (!memcmp(chunkHeader, "data", 4)) { 271 if (mValidFormat) { 272 mDataOffset = offset; 273 mDataSize = chunkSize; 274 275 mTrackMeta = new MetaData; 276 277 switch (mWaveFormat) { 278 case WAVE_FORMAT_PCM: 279 mTrackMeta->setCString( 280 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW); 281 break; 282 case WAVE_FORMAT_ALAW: 283 mTrackMeta->setCString( 284 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW); 285 break; 286 default: 287 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW); 288 mTrackMeta->setCString( 289 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW); 290 break; 291 } 292 293 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels); 294 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask); 295 mTrackMeta->setInt32(kKeySampleRate, mSampleRate); 296 297 size_t bytesPerSample = mBitsPerSample >> 3; 298 299 int64_t durationUs = 300 1000000LL * (mDataSize / (mNumChannels * bytesPerSample)) 301 / mSampleRate; 302 303 mTrackMeta->setInt64(kKeyDuration, durationUs); 304 305 return OK; 306 } 307 } 308 309 offset += chunkSize; 310 } 311 312 return NO_INIT; 313} 314 315const size_t WAVSource::kMaxFrameSize = 32768; 316 317WAVSource::WAVSource( 318 const sp<DataSource> &dataSource, 319 const sp<MetaData> &meta, 320 uint16_t waveFormat, 321 int32_t bitsPerSample, 322 off64_t offset, size_t size) 323 : mDataSource(dataSource), 324 mMeta(meta), 325 mWaveFormat(waveFormat), 326 mSampleRate(0), 327 mNumChannels(0), 328 mBitsPerSample(bitsPerSample), 329 mOffset(offset), 330 mSize(size), 331 mStarted(false), 332 mGroup(NULL) { 333 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate)); 334 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels)); 335 336 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize); 337} 338 339WAVSource::~WAVSource() { 340 if (mStarted) { 341 stop(); 342 } 343} 344 345status_t WAVSource::start(MetaData *params) { 346 ALOGV("WAVSource::start"); 347 348 CHECK(!mStarted); 349 350 mGroup = new MediaBufferGroup; 351 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 352 353 if (mBitsPerSample == 8) { 354 // As a temporary buffer for 8->16 bit conversion. 355 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize)); 356 } 357 358 mCurrentPos = mOffset; 359 360 mStarted = true; 361 362 return OK; 363} 364 365status_t WAVSource::stop() { 366 ALOGV("WAVSource::stop"); 367 368 CHECK(mStarted); 369 370 delete mGroup; 371 mGroup = NULL; 372 373 mStarted = false; 374 375 return OK; 376} 377 378sp<MetaData> WAVSource::getFormat() { 379 ALOGV("WAVSource::getFormat"); 380 381 return mMeta; 382} 383 384status_t WAVSource::read( 385 MediaBuffer **out, const ReadOptions *options) { 386 *out = NULL; 387 388 int64_t seekTimeUs; 389 ReadOptions::SeekMode mode; 390 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) { 391 int64_t pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3); 392 if (pos > mSize) { 393 pos = mSize; 394 } 395 mCurrentPos = pos + mOffset; 396 } 397 398 MediaBuffer *buffer; 399 status_t err = mGroup->acquire_buffer(&buffer); 400 if (err != OK) { 401 return err; 402 } 403 404 size_t maxBytesToRead = 405 mBitsPerSample == 8 ? kMaxFrameSize / 2 : kMaxFrameSize; 406 407 size_t maxBytesAvailable = 408 (mCurrentPos - mOffset >= (off64_t)mSize) 409 ? 0 : mSize - (mCurrentPos - mOffset); 410 411 if (maxBytesToRead > maxBytesAvailable) { 412 maxBytesToRead = maxBytesAvailable; 413 } 414 415 ssize_t n = mDataSource->readAt( 416 mCurrentPos, buffer->data(), 417 maxBytesToRead); 418 419 if (n <= 0) { 420 buffer->release(); 421 buffer = NULL; 422 423 return ERROR_END_OF_STREAM; 424 } 425 426 buffer->set_range(0, n); 427 428 if (mWaveFormat == WAVE_FORMAT_PCM) { 429 if (mBitsPerSample == 8) { 430 // Convert 8-bit unsigned samples to 16-bit signed. 431 432 MediaBuffer *tmp; 433 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK); 434 435 // The new buffer holds the sample number of samples, but each 436 // one is 2 bytes wide. 437 tmp->set_range(0, 2 * n); 438 439 int16_t *dst = (int16_t *)tmp->data(); 440 const uint8_t *src = (const uint8_t *)buffer->data(); 441 ssize_t numBytes = n; 442 443 while (numBytes-- > 0) { 444 *dst++ = ((int16_t)(*src) - 128) * 256; 445 ++src; 446 } 447 448 buffer->release(); 449 buffer = tmp; 450 } else if (mBitsPerSample == 24) { 451 // Convert 24-bit signed samples to 16-bit signed. 452 453 const uint8_t *src = 454 (const uint8_t *)buffer->data() + buffer->range_offset(); 455 int16_t *dst = (int16_t *)src; 456 457 size_t numSamples = buffer->range_length() / 3; 458 for (size_t i = 0; i < numSamples; ++i) { 459 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16); 460 x = (x << 8) >> 8; // sign extension 461 462 x = x >> 8; 463 *dst++ = (int16_t)x; 464 src += 3; 465 } 466 467 buffer->set_range(buffer->range_offset(), 2 * numSamples); 468 } 469 } 470 471 size_t bytesPerSample = mBitsPerSample >> 3; 472 473 buffer->meta_data()->setInt64( 474 kKeyTime, 475 1000000LL * (mCurrentPos - mOffset) 476 / (mNumChannels * bytesPerSample) / mSampleRate); 477 478 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 479 mCurrentPos += n; 480 481 *out = buffer; 482 483 return OK; 484} 485 486//////////////////////////////////////////////////////////////////////////////// 487 488bool SniffWAV( 489 const sp<DataSource> &source, String8 *mimeType, float *confidence, 490 sp<AMessage> *) { 491 char header[12]; 492 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) { 493 return false; 494 } 495 496 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) { 497 return false; 498 } 499 500 sp<MediaExtractor> extractor = new WAVExtractor(source); 501 if (extractor->countTracks() == 0) { 502 return false; 503 } 504 505 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV; 506 *confidence = 0.3f; 507 508 return true; 509} 510 511} // namespace android 512