MPEG4Extractor.cpp revision e5f0966c76bd0a7e81e4205c8d8b55e6b34c833e
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/foundation/AUtils.h> 35#include <media/stagefright/MediaBuffer.h> 36#include <media/stagefright/MediaBufferGroup.h> 37#include <media/stagefright/MediaDefs.h> 38#include <media/stagefright/MediaSource.h> 39#include <media/stagefright/MetaData.h> 40#include <utils/String8.h> 41 42namespace android { 43 44class MPEG4Source : public MediaSource { 45public: 46 // Caller retains ownership of both "dataSource" and "sampleTable". 47 MPEG4Source(const sp<MetaData> &format, 48 const sp<DataSource> &dataSource, 49 int32_t timeScale, 50 const sp<SampleTable> &sampleTable, 51 Vector<SidxEntry> &sidx, 52 off64_t firstMoofOffset); 53 54 virtual status_t start(MetaData *params = NULL); 55 virtual status_t stop(); 56 57 virtual sp<MetaData> getFormat(); 58 59 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 60 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 61 62protected: 63 virtual ~MPEG4Source(); 64 65private: 66 Mutex mLock; 67 68 sp<MetaData> mFormat; 69 sp<DataSource> mDataSource; 70 int32_t mTimescale; 71 sp<SampleTable> mSampleTable; 72 uint32_t mCurrentSampleIndex; 73 uint32_t mCurrentFragmentIndex; 74 Vector<SidxEntry> &mSegments; 75 off64_t mFirstMoofOffset; 76 off64_t mCurrentMoofOffset; 77 off64_t mNextMoofOffset; 78 uint32_t mCurrentTime; 79 int32_t mLastParsedTrackId; 80 int32_t mTrackId; 81 82 int32_t mCryptoMode; // passed in from extractor 83 int32_t mDefaultIVSize; // passed in from extractor 84 uint8_t mCryptoKey[16]; // passed in from extractor 85 uint32_t mCurrentAuxInfoType; 86 uint32_t mCurrentAuxInfoTypeParameter; 87 int32_t mCurrentDefaultSampleInfoSize; 88 uint32_t mCurrentSampleInfoCount; 89 uint32_t mCurrentSampleInfoAllocSize; 90 uint8_t* mCurrentSampleInfoSizes; 91 uint32_t mCurrentSampleInfoOffsetCount; 92 uint32_t mCurrentSampleInfoOffsetsAllocSize; 93 uint64_t* mCurrentSampleInfoOffsets; 94 95 bool mIsAVC; 96 size_t mNALLengthSize; 97 98 bool mStarted; 99 100 MediaBufferGroup *mGroup; 101 102 MediaBuffer *mBuffer; 103 104 bool mWantsNALFragments; 105 106 uint8_t *mSrcBuffer; 107 108 size_t parseNALSize(const uint8_t *data) const; 109 status_t parseChunk(off64_t *offset); 110 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 111 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 112 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 113 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 114 115 struct TrackFragmentHeaderInfo { 116 enum Flags { 117 kBaseDataOffsetPresent = 0x01, 118 kSampleDescriptionIndexPresent = 0x02, 119 kDefaultSampleDurationPresent = 0x08, 120 kDefaultSampleSizePresent = 0x10, 121 kDefaultSampleFlagsPresent = 0x20, 122 kDurationIsEmpty = 0x10000, 123 }; 124 125 uint32_t mTrackID; 126 uint32_t mFlags; 127 uint64_t mBaseDataOffset; 128 uint32_t mSampleDescriptionIndex; 129 uint32_t mDefaultSampleDuration; 130 uint32_t mDefaultSampleSize; 131 uint32_t mDefaultSampleFlags; 132 133 uint64_t mDataOffset; 134 }; 135 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 136 137 struct Sample { 138 off64_t offset; 139 size_t size; 140 uint32_t duration; 141 uint8_t iv[16]; 142 Vector<size_t> clearsizes; 143 Vector<size_t> encryptedsizes; 144 }; 145 Vector<Sample> mCurrentSamples; 146 147 MPEG4Source(const MPEG4Source &); 148 MPEG4Source &operator=(const MPEG4Source &); 149}; 150 151// This custom data source wraps an existing one and satisfies requests 152// falling entirely within a cached range from the cache while forwarding 153// all remaining requests to the wrapped datasource. 154// This is used to cache the full sampletable metadata for a single track, 155// possibly wrapping multiple times to cover all tracks, i.e. 156// Each MPEG4DataSource caches the sampletable metadata for a single track. 157 158struct MPEG4DataSource : public DataSource { 159 MPEG4DataSource(const sp<DataSource> &source); 160 161 virtual status_t initCheck() const; 162 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 163 virtual status_t getSize(off64_t *size); 164 virtual uint32_t flags(); 165 166 status_t setCachedRange(off64_t offset, size_t size); 167 168protected: 169 virtual ~MPEG4DataSource(); 170 171private: 172 Mutex mLock; 173 174 sp<DataSource> mSource; 175 off64_t mCachedOffset; 176 size_t mCachedSize; 177 uint8_t *mCache; 178 179 void clearCache(); 180 181 MPEG4DataSource(const MPEG4DataSource &); 182 MPEG4DataSource &operator=(const MPEG4DataSource &); 183}; 184 185MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 186 : mSource(source), 187 mCachedOffset(0), 188 mCachedSize(0), 189 mCache(NULL) { 190} 191 192MPEG4DataSource::~MPEG4DataSource() { 193 clearCache(); 194} 195 196void MPEG4DataSource::clearCache() { 197 if (mCache) { 198 free(mCache); 199 mCache = NULL; 200 } 201 202 mCachedOffset = 0; 203 mCachedSize = 0; 204} 205 206status_t MPEG4DataSource::initCheck() const { 207 return mSource->initCheck(); 208} 209 210ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 211 Mutex::Autolock autoLock(mLock); 212 213 if (isInRange(mCachedOffset, mCachedSize, offset, size)) { 214 memcpy(data, &mCache[offset - mCachedOffset], size); 215 return size; 216 } 217 218 return mSource->readAt(offset, data, size); 219} 220 221status_t MPEG4DataSource::getSize(off64_t *size) { 222 return mSource->getSize(size); 223} 224 225uint32_t MPEG4DataSource::flags() { 226 return mSource->flags(); 227} 228 229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 230 Mutex::Autolock autoLock(mLock); 231 232 clearCache(); 233 234 mCache = (uint8_t *)malloc(size); 235 236 if (mCache == NULL) { 237 return -ENOMEM; 238 } 239 240 mCachedOffset = offset; 241 mCachedSize = size; 242 243 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 244 245 if (err < (ssize_t)size) { 246 clearCache(); 247 248 return ERROR_IO; 249 } 250 251 return OK; 252} 253 254//////////////////////////////////////////////////////////////////////////////// 255 256static void hexdump(const void *_data, size_t size) { 257 const uint8_t *data = (const uint8_t *)_data; 258 size_t offset = 0; 259 while (offset < size) { 260 printf("0x%04x ", offset); 261 262 size_t n = size - offset; 263 if (n > 16) { 264 n = 16; 265 } 266 267 for (size_t i = 0; i < 16; ++i) { 268 if (i == 8) { 269 printf(" "); 270 } 271 272 if (offset + i < size) { 273 printf("%02x ", data[offset + i]); 274 } else { 275 printf(" "); 276 } 277 } 278 279 printf(" "); 280 281 for (size_t i = 0; i < n; ++i) { 282 if (isprint(data[offset + i])) { 283 printf("%c", data[offset + i]); 284 } else { 285 printf("."); 286 } 287 } 288 289 printf("\n"); 290 291 offset += 16; 292 } 293} 294 295static const char *FourCC2MIME(uint32_t fourcc) { 296 switch (fourcc) { 297 case FOURCC('m', 'p', '4', 'a'): 298 return MEDIA_MIMETYPE_AUDIO_AAC; 299 300 case FOURCC('s', 'a', 'm', 'r'): 301 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 302 303 case FOURCC('s', 'a', 'w', 'b'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 305 306 case FOURCC('m', 'p', '4', 'v'): 307 return MEDIA_MIMETYPE_VIDEO_MPEG4; 308 309 case FOURCC('s', '2', '6', '3'): 310 case FOURCC('h', '2', '6', '3'): 311 case FOURCC('H', '2', '6', '3'): 312 return MEDIA_MIMETYPE_VIDEO_H263; 313 314 case FOURCC('a', 'v', 'c', '1'): 315 return MEDIA_MIMETYPE_VIDEO_AVC; 316 317 default: 318 CHECK(!"should not be here."); 319 return NULL; 320 } 321} 322 323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 324 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 325 // AMR NB audio is always mono, 8kHz 326 *channels = 1; 327 *rate = 8000; 328 return true; 329 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 330 // AMR WB audio is always mono, 16kHz 331 *channels = 1; 332 *rate = 16000; 333 return true; 334 } 335 return false; 336} 337 338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 339 : mSidxDuration(0), 340 mMoofOffset(0), 341 mDataSource(source), 342 mInitCheck(NO_INIT), 343 mHasVideo(false), 344 mHeaderTimescale(0), 345 mFirstTrack(NULL), 346 mLastTrack(NULL), 347 mFileMetaData(new MetaData), 348 mFirstSINF(NULL), 349 mIsDrm(false) { 350} 351 352MPEG4Extractor::~MPEG4Extractor() { 353 Track *track = mFirstTrack; 354 while (track) { 355 Track *next = track->next; 356 357 delete track; 358 track = next; 359 } 360 mFirstTrack = mLastTrack = NULL; 361 362 SINF *sinf = mFirstSINF; 363 while (sinf) { 364 SINF *next = sinf->next; 365 delete sinf->IPMPData; 366 delete sinf; 367 sinf = next; 368 } 369 mFirstSINF = NULL; 370 371 for (size_t i = 0; i < mPssh.size(); i++) { 372 delete [] mPssh[i].data; 373 } 374} 375 376uint32_t MPEG4Extractor::flags() const { 377 return CAN_PAUSE | 378 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 379 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 380} 381 382sp<MetaData> MPEG4Extractor::getMetaData() { 383 status_t err; 384 if ((err = readMetaData()) != OK) { 385 return new MetaData; 386 } 387 388 return mFileMetaData; 389} 390 391size_t MPEG4Extractor::countTracks() { 392 status_t err; 393 if ((err = readMetaData()) != OK) { 394 ALOGV("MPEG4Extractor::countTracks: no tracks"); 395 return 0; 396 } 397 398 size_t n = 0; 399 Track *track = mFirstTrack; 400 while (track) { 401 ++n; 402 track = track->next; 403 } 404 405 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 406 return n; 407} 408 409sp<MetaData> MPEG4Extractor::getTrackMetaData( 410 size_t index, uint32_t flags) { 411 status_t err; 412 if ((err = readMetaData()) != OK) { 413 return NULL; 414 } 415 416 Track *track = mFirstTrack; 417 while (index > 0) { 418 if (track == NULL) { 419 return NULL; 420 } 421 422 track = track->next; 423 --index; 424 } 425 426 if (track == NULL) { 427 return NULL; 428 } 429 430 if ((flags & kIncludeExtensiveMetaData) 431 && !track->includes_expensive_metadata) { 432 track->includes_expensive_metadata = true; 433 434 const char *mime; 435 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 436 if (!strncasecmp("video/", mime, 6)) { 437 if (mMoofOffset > 0) { 438 int64_t duration; 439 if (track->meta->findInt64(kKeyDuration, &duration)) { 440 // nothing fancy, just pick a frame near 1/4th of the duration 441 track->meta->setInt64( 442 kKeyThumbnailTime, duration / 4); 443 } 444 } else { 445 uint32_t sampleIndex; 446 uint32_t sampleTime; 447 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 448 && track->sampleTable->getMetaDataForSample( 449 sampleIndex, NULL /* offset */, NULL /* size */, 450 &sampleTime) == OK) { 451 track->meta->setInt64( 452 kKeyThumbnailTime, 453 ((int64_t)sampleTime * 1000000) / track->timescale); 454 } 455 } 456 } 457 } 458 459 return track->meta; 460} 461 462static void MakeFourCCString(uint32_t x, char *s) { 463 s[0] = x >> 24; 464 s[1] = (x >> 16) & 0xff; 465 s[2] = (x >> 8) & 0xff; 466 s[3] = x & 0xff; 467 s[4] = '\0'; 468} 469 470status_t MPEG4Extractor::readMetaData() { 471 if (mInitCheck != NO_INIT) { 472 return mInitCheck; 473 } 474 475 off64_t offset = 0; 476 status_t err; 477 while (true) { 478 err = parseChunk(&offset, 0); 479 if (err == OK) { 480 continue; 481 } 482 483 uint32_t hdr[2]; 484 if (mDataSource->readAt(offset, hdr, 8) < 8) { 485 break; 486 } 487 uint32_t chunk_type = ntohl(hdr[1]); 488 if (chunk_type == FOURCC('s', 'i', 'd', 'x')) { 489 // parse the sidx box too 490 continue; 491 } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 492 // store the offset of the first segment 493 mMoofOffset = offset; 494 } 495 break; 496 } 497 498 if (mInitCheck == OK) { 499 if (mHasVideo) { 500 mFileMetaData->setCString( 501 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 502 } else { 503 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 504 } 505 506 mInitCheck = OK; 507 } else { 508 mInitCheck = err; 509 } 510 511 CHECK_NE(err, (status_t)NO_INIT); 512 513 // copy pssh data into file metadata 514 int psshsize = 0; 515 for (size_t i = 0; i < mPssh.size(); i++) { 516 psshsize += 20 + mPssh[i].datalen; 517 } 518 if (psshsize) { 519 char *buf = (char*)malloc(psshsize); 520 char *ptr = buf; 521 for (size_t i = 0; i < mPssh.size(); i++) { 522 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 523 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 524 ptr += (20 + mPssh[i].datalen); 525 } 526 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 527 free(buf); 528 } 529 return mInitCheck; 530} 531 532char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 533 if (mFirstSINF == NULL) { 534 return NULL; 535 } 536 537 SINF *sinf = mFirstSINF; 538 while (sinf && (trackID != sinf->trackID)) { 539 sinf = sinf->next; 540 } 541 542 if (sinf == NULL) { 543 return NULL; 544 } 545 546 *len = sinf->len; 547 return sinf->IPMPData; 548} 549 550// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 551static int32_t readSize(off64_t offset, 552 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 553 uint32_t size = 0; 554 uint8_t data; 555 bool moreData = true; 556 *numOfBytes = 0; 557 558 while (moreData) { 559 if (DataSource->readAt(offset, &data, 1) < 1) { 560 return -1; 561 } 562 offset ++; 563 moreData = (data >= 128) ? true : false; 564 size = (size << 7) | (data & 0x7f); // Take last 7 bits 565 (*numOfBytes) ++; 566 } 567 568 return size; 569} 570 571status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) { 572 uint8_t updateIdTag; 573 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 574 return ERROR_IO; 575 } 576 data_offset ++; 577 578 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 579 return ERROR_MALFORMED; 580 } 581 582 uint8_t numOfBytes; 583 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 584 if (size < 0) { 585 return ERROR_IO; 586 } 587 int32_t classSize = size; 588 data_offset += numOfBytes; 589 590 while(size >= 11 ) { 591 uint8_t descriptorTag; 592 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 593 return ERROR_IO; 594 } 595 data_offset ++; 596 597 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 598 return ERROR_MALFORMED; 599 } 600 601 uint8_t buffer[8]; 602 //ObjectDescriptorID and ObjectDescriptor url flag 603 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 604 return ERROR_IO; 605 } 606 data_offset += 2; 607 608 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 609 return ERROR_MALFORMED; 610 } 611 612 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 613 return ERROR_IO; 614 } 615 data_offset += 8; 616 617 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 618 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 619 return ERROR_MALFORMED; 620 } 621 622 SINF *sinf = new SINF; 623 sinf->trackID = U16_AT(&buffer[3]); 624 sinf->IPMPDescriptorID = buffer[7]; 625 sinf->next = mFirstSINF; 626 mFirstSINF = sinf; 627 628 size -= (8 + 2 + 1); 629 } 630 631 if (size != 0) { 632 return ERROR_MALFORMED; 633 } 634 635 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 636 return ERROR_IO; 637 } 638 data_offset ++; 639 640 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 641 return ERROR_MALFORMED; 642 } 643 644 size = readSize(data_offset, mDataSource, &numOfBytes); 645 if (size < 0) { 646 return ERROR_IO; 647 } 648 classSize = size; 649 data_offset += numOfBytes; 650 651 while (size > 0) { 652 uint8_t tag; 653 int32_t dataLen; 654 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 655 return ERROR_IO; 656 } 657 data_offset ++; 658 659 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 660 uint8_t id; 661 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 662 if (dataLen < 0) { 663 return ERROR_IO; 664 } else if (dataLen < 4) { 665 return ERROR_MALFORMED; 666 } 667 data_offset += numOfBytes; 668 669 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 670 return ERROR_IO; 671 } 672 data_offset ++; 673 674 SINF *sinf = mFirstSINF; 675 while (sinf && (sinf->IPMPDescriptorID != id)) { 676 sinf = sinf->next; 677 } 678 if (sinf == NULL) { 679 return ERROR_MALFORMED; 680 } 681 sinf->len = dataLen - 3; 682 sinf->IPMPData = new char[sinf->len]; 683 684 if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) { 685 return ERROR_IO; 686 } 687 data_offset += sinf->len; 688 689 size -= (dataLen + numOfBytes + 1); 690 } 691 } 692 693 if (size != 0) { 694 return ERROR_MALFORMED; 695 } 696 697 return UNKNOWN_ERROR; // Return a dummy error. 698} 699 700struct PathAdder { 701 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 702 : mPath(path) { 703 mPath->push(chunkType); 704 } 705 706 ~PathAdder() { 707 mPath->pop(); 708 } 709 710private: 711 Vector<uint32_t> *mPath; 712 713 PathAdder(const PathAdder &); 714 PathAdder &operator=(const PathAdder &); 715}; 716 717static bool underMetaDataPath(const Vector<uint32_t> &path) { 718 return path.size() >= 5 719 && path[0] == FOURCC('m', 'o', 'o', 'v') 720 && path[1] == FOURCC('u', 'd', 't', 'a') 721 && path[2] == FOURCC('m', 'e', 't', 'a') 722 && path[3] == FOURCC('i', 'l', 's', 't'); 723} 724 725// Given a time in seconds since Jan 1 1904, produce a human-readable string. 726static void convertTimeToDate(int64_t time_1904, String8 *s) { 727 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 728 729 char tmp[32]; 730 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 731 732 s->setTo(tmp); 733} 734 735status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 736 ALOGV("entering parseChunk %lld/%d", *offset, depth); 737 uint32_t hdr[2]; 738 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 739 return ERROR_IO; 740 } 741 uint64_t chunk_size = ntohl(hdr[0]); 742 uint32_t chunk_type = ntohl(hdr[1]); 743 off64_t data_offset = *offset + 8; 744 745 if (chunk_size == 1) { 746 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 747 return ERROR_IO; 748 } 749 chunk_size = ntoh64(chunk_size); 750 data_offset += 8; 751 752 if (chunk_size < 16) { 753 // The smallest valid chunk is 16 bytes long in this case. 754 return ERROR_MALFORMED; 755 } 756 } else if (chunk_size < 8) { 757 // The smallest valid chunk is 8 bytes long. 758 return ERROR_MALFORMED; 759 } 760 761 char chunk[5]; 762 MakeFourCCString(chunk_type, chunk); 763 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 764 765#if 0 766 static const char kWhitespace[] = " "; 767 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 768 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 769 770 char buffer[256]; 771 size_t n = chunk_size; 772 if (n > sizeof(buffer)) { 773 n = sizeof(buffer); 774 } 775 if (mDataSource->readAt(*offset, buffer, n) 776 < (ssize_t)n) { 777 return ERROR_IO; 778 } 779 780 hexdump(buffer, n); 781#endif 782 783 PathAdder autoAdder(&mPath, chunk_type); 784 785 off64_t chunk_data_size = *offset + chunk_size - data_offset; 786 787 if (chunk_type != FOURCC('c', 'p', 'r', 't') 788 && chunk_type != FOURCC('c', 'o', 'v', 'r') 789 && mPath.size() == 5 && underMetaDataPath(mPath)) { 790 off64_t stop_offset = *offset + chunk_size; 791 *offset = data_offset; 792 while (*offset < stop_offset) { 793 status_t err = parseChunk(offset, depth + 1); 794 if (err != OK) { 795 return err; 796 } 797 } 798 799 if (*offset != stop_offset) { 800 return ERROR_MALFORMED; 801 } 802 803 return OK; 804 } 805 806 switch(chunk_type) { 807 case FOURCC('m', 'o', 'o', 'v'): 808 case FOURCC('t', 'r', 'a', 'k'): 809 case FOURCC('m', 'd', 'i', 'a'): 810 case FOURCC('m', 'i', 'n', 'f'): 811 case FOURCC('d', 'i', 'n', 'f'): 812 case FOURCC('s', 't', 'b', 'l'): 813 case FOURCC('m', 'v', 'e', 'x'): 814 case FOURCC('m', 'o', 'o', 'f'): 815 case FOURCC('t', 'r', 'a', 'f'): 816 case FOURCC('m', 'f', 'r', 'a'): 817 case FOURCC('u', 'd', 't', 'a'): 818 case FOURCC('i', 'l', 's', 't'): 819 case FOURCC('s', 'i', 'n', 'f'): 820 case FOURCC('s', 'c', 'h', 'i'): 821 case FOURCC('e', 'd', 't', 's'): 822 { 823 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 824 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 825 826 if (mDataSource->flags() 827 & (DataSource::kWantsPrefetching 828 | DataSource::kIsCachingDataSource)) { 829 sp<MPEG4DataSource> cachedSource = 830 new MPEG4DataSource(mDataSource); 831 832 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 833 mDataSource = cachedSource; 834 } 835 } 836 837 mLastTrack->sampleTable = new SampleTable(mDataSource); 838 } 839 840 bool isTrack = false; 841 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 842 isTrack = true; 843 844 Track *track = new Track; 845 track->next = NULL; 846 if (mLastTrack) { 847 mLastTrack->next = track; 848 } else { 849 mFirstTrack = track; 850 } 851 mLastTrack = track; 852 853 track->meta = new MetaData; 854 track->includes_expensive_metadata = false; 855 track->skipTrack = false; 856 track->timescale = 0; 857 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 858 } 859 860 off64_t stop_offset = *offset + chunk_size; 861 *offset = data_offset; 862 while (*offset < stop_offset) { 863 status_t err = parseChunk(offset, depth + 1); 864 if (err != OK) { 865 return err; 866 } 867 } 868 869 if (*offset != stop_offset) { 870 return ERROR_MALFORMED; 871 } 872 873 if (isTrack) { 874 if (mLastTrack->skipTrack) { 875 Track *cur = mFirstTrack; 876 877 if (cur == mLastTrack) { 878 delete cur; 879 mFirstTrack = mLastTrack = NULL; 880 } else { 881 while (cur && cur->next != mLastTrack) { 882 cur = cur->next; 883 } 884 cur->next = NULL; 885 delete mLastTrack; 886 mLastTrack = cur; 887 } 888 889 return OK; 890 } 891 892 status_t err = verifyTrack(mLastTrack); 893 894 if (err != OK) { 895 return err; 896 } 897 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 898 mInitCheck = OK; 899 900 if (!mIsDrm) { 901 return UNKNOWN_ERROR; // Return a dummy error. 902 } else { 903 return OK; 904 } 905 } 906 break; 907 } 908 909 case FOURCC('e', 'l', 's', 't'): 910 { 911 // See 14496-12 8.6.6 912 uint8_t version; 913 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 914 return ERROR_IO; 915 } 916 917 uint32_t entry_count; 918 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 919 return ERROR_IO; 920 } 921 922 if (entry_count != 1) { 923 // we only support a single entry at the moment, for gapless playback 924 ALOGW("ignoring edit list with %d entries", entry_count); 925 } else if (mHeaderTimescale == 0) { 926 ALOGW("ignoring edit list because timescale is 0"); 927 } else { 928 off64_t entriesoffset = data_offset + 8; 929 uint64_t segment_duration; 930 int64_t media_time; 931 932 if (version == 1) { 933 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 934 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 935 return ERROR_IO; 936 } 937 } else if (version == 0) { 938 uint32_t sd; 939 int32_t mt; 940 if (!mDataSource->getUInt32(entriesoffset, &sd) || 941 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 942 return ERROR_IO; 943 } 944 segment_duration = sd; 945 media_time = mt; 946 } else { 947 return ERROR_IO; 948 } 949 950 uint64_t halfscale = mHeaderTimescale / 2; 951 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 952 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 953 954 int64_t duration; 955 int32_t samplerate; 956 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 957 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 958 959 int64_t delay = (media_time * samplerate + 500000) / 1000000; 960 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 961 962 int64_t paddingus = duration - (segment_duration + media_time); 963 if (paddingus < 0) { 964 // track duration from media header (which is what kKeyDuration is) might 965 // be slightly shorter than the segment duration, which would make the 966 // padding negative. Clamp to zero. 967 paddingus = 0; 968 } 969 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 970 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 971 } 972 } 973 *offset += chunk_size; 974 break; 975 } 976 977 case FOURCC('f', 'r', 'm', 'a'): 978 { 979 uint32_t original_fourcc; 980 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 981 return ERROR_IO; 982 } 983 original_fourcc = ntohl(original_fourcc); 984 ALOGV("read original format: %d", original_fourcc); 985 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 986 uint32_t num_channels = 0; 987 uint32_t sample_rate = 0; 988 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 989 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 990 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 991 } 992 *offset += chunk_size; 993 break; 994 } 995 996 case FOURCC('t', 'e', 'n', 'c'): 997 { 998 if (chunk_size < 32) { 999 return ERROR_MALFORMED; 1000 } 1001 1002 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1003 // default IV size, 16 bytes default KeyID 1004 // (ISO 23001-7) 1005 char buf[4]; 1006 memset(buf, 0, 4); 1007 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1008 return ERROR_IO; 1009 } 1010 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1011 if (defaultAlgorithmId > 1) { 1012 // only 0 (clear) and 1 (AES-128) are valid 1013 return ERROR_MALFORMED; 1014 } 1015 1016 memset(buf, 0, 4); 1017 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1018 return ERROR_IO; 1019 } 1020 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1021 1022 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1023 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1024 // only unencrypted data must have 0 IV size 1025 return ERROR_MALFORMED; 1026 } else if (defaultIVSize != 0 && 1027 defaultIVSize != 8 && 1028 defaultIVSize != 16) { 1029 // only supported sizes are 0, 8 and 16 1030 return ERROR_MALFORMED; 1031 } 1032 1033 uint8_t defaultKeyId[16]; 1034 1035 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1036 return ERROR_IO; 1037 } 1038 1039 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1040 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1041 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1042 *offset += chunk_size; 1043 break; 1044 } 1045 1046 case FOURCC('t', 'k', 'h', 'd'): 1047 { 1048 status_t err; 1049 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1050 return err; 1051 } 1052 1053 *offset += chunk_size; 1054 break; 1055 } 1056 1057 case FOURCC('p', 's', 's', 'h'): 1058 { 1059 PsshInfo pssh; 1060 1061 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1062 return ERROR_IO; 1063 } 1064 1065 uint32_t psshdatalen = 0; 1066 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1067 return ERROR_IO; 1068 } 1069 pssh.datalen = ntohl(psshdatalen); 1070 ALOGV("pssh data size: %d", pssh.datalen); 1071 if (pssh.datalen + 20 > chunk_size) { 1072 // pssh data length exceeds size of containing box 1073 return ERROR_MALFORMED; 1074 } 1075 1076 pssh.data = new uint8_t[pssh.datalen]; 1077 ALOGV("allocated pssh @ %p", pssh.data); 1078 ssize_t requested = (ssize_t) pssh.datalen; 1079 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1080 return ERROR_IO; 1081 } 1082 mPssh.push_back(pssh); 1083 1084 *offset += chunk_size; 1085 break; 1086 } 1087 1088 case FOURCC('m', 'd', 'h', 'd'): 1089 { 1090 if (chunk_data_size < 4) { 1091 return ERROR_MALFORMED; 1092 } 1093 1094 uint8_t version; 1095 if (mDataSource->readAt( 1096 data_offset, &version, sizeof(version)) 1097 < (ssize_t)sizeof(version)) { 1098 return ERROR_IO; 1099 } 1100 1101 off64_t timescale_offset; 1102 1103 if (version == 1) { 1104 timescale_offset = data_offset + 4 + 16; 1105 } else if (version == 0) { 1106 timescale_offset = data_offset + 4 + 8; 1107 } else { 1108 return ERROR_IO; 1109 } 1110 1111 uint32_t timescale; 1112 if (mDataSource->readAt( 1113 timescale_offset, ×cale, sizeof(timescale)) 1114 < (ssize_t)sizeof(timescale)) { 1115 return ERROR_IO; 1116 } 1117 1118 mLastTrack->timescale = ntohl(timescale); 1119 1120 int64_t duration = 0; 1121 if (version == 1) { 1122 if (mDataSource->readAt( 1123 timescale_offset + 4, &duration, sizeof(duration)) 1124 < (ssize_t)sizeof(duration)) { 1125 return ERROR_IO; 1126 } 1127 duration = ntoh64(duration); 1128 } else { 1129 uint32_t duration32; 1130 if (mDataSource->readAt( 1131 timescale_offset + 4, &duration32, sizeof(duration32)) 1132 < (ssize_t)sizeof(duration32)) { 1133 return ERROR_IO; 1134 } 1135 // ffmpeg sets duration to -1, which is incorrect. 1136 if (duration32 != 0xffffffff) { 1137 duration = ntohl(duration32); 1138 } 1139 } 1140 mLastTrack->meta->setInt64( 1141 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1142 1143 uint8_t lang[2]; 1144 off64_t lang_offset; 1145 if (version == 1) { 1146 lang_offset = timescale_offset + 4 + 8; 1147 } else if (version == 0) { 1148 lang_offset = timescale_offset + 4 + 4; 1149 } else { 1150 return ERROR_IO; 1151 } 1152 1153 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1154 < (ssize_t)sizeof(lang)) { 1155 return ERROR_IO; 1156 } 1157 1158 // To get the ISO-639-2/T three character language code 1159 // 1 bit pad followed by 3 5-bits characters. Each character 1160 // is packed as the difference between its ASCII value and 0x60. 1161 char lang_code[4]; 1162 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1163 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1164 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1165 lang_code[3] = '\0'; 1166 1167 mLastTrack->meta->setCString( 1168 kKeyMediaLanguage, lang_code); 1169 1170 *offset += chunk_size; 1171 break; 1172 } 1173 1174 case FOURCC('s', 't', 's', 'd'): 1175 { 1176 if (chunk_data_size < 8) { 1177 return ERROR_MALFORMED; 1178 } 1179 1180 uint8_t buffer[8]; 1181 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1182 return ERROR_MALFORMED; 1183 } 1184 1185 if (mDataSource->readAt( 1186 data_offset, buffer, 8) < 8) { 1187 return ERROR_IO; 1188 } 1189 1190 if (U32_AT(buffer) != 0) { 1191 // Should be version 0, flags 0. 1192 return ERROR_MALFORMED; 1193 } 1194 1195 uint32_t entry_count = U32_AT(&buffer[4]); 1196 1197 if (entry_count > 1) { 1198 // For 3GPP timed text, there could be multiple tx3g boxes contain 1199 // multiple text display formats. These formats will be used to 1200 // display the timed text. 1201 // For encrypted files, there may also be more than one entry. 1202 const char *mime; 1203 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1204 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1205 strcasecmp(mime, "application/octet-stream")) { 1206 // For now we only support a single type of media per track. 1207 mLastTrack->skipTrack = true; 1208 *offset += chunk_size; 1209 break; 1210 } 1211 } 1212 off64_t stop_offset = *offset + chunk_size; 1213 *offset = data_offset + 8; 1214 for (uint32_t i = 0; i < entry_count; ++i) { 1215 status_t err = parseChunk(offset, depth + 1); 1216 if (err != OK) { 1217 return err; 1218 } 1219 } 1220 1221 if (*offset != stop_offset) { 1222 return ERROR_MALFORMED; 1223 } 1224 break; 1225 } 1226 1227 case FOURCC('m', 'p', '4', 'a'): 1228 case FOURCC('e', 'n', 'c', 'a'): 1229 case FOURCC('s', 'a', 'm', 'r'): 1230 case FOURCC('s', 'a', 'w', 'b'): 1231 { 1232 uint8_t buffer[8 + 20]; 1233 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1234 // Basic AudioSampleEntry size. 1235 return ERROR_MALFORMED; 1236 } 1237 1238 if (mDataSource->readAt( 1239 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1240 return ERROR_IO; 1241 } 1242 1243 uint16_t data_ref_index = U16_AT(&buffer[6]); 1244 uint32_t num_channels = U16_AT(&buffer[16]); 1245 1246 uint16_t sample_size = U16_AT(&buffer[18]); 1247 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1248 1249 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1250 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1251 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1252 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1253 } 1254 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1255 chunk, num_channels, sample_size, sample_rate); 1256 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1257 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1258 1259 off64_t stop_offset = *offset + chunk_size; 1260 *offset = data_offset + sizeof(buffer); 1261 while (*offset < stop_offset) { 1262 status_t err = parseChunk(offset, depth + 1); 1263 if (err != OK) { 1264 return err; 1265 } 1266 } 1267 1268 if (*offset != stop_offset) { 1269 return ERROR_MALFORMED; 1270 } 1271 break; 1272 } 1273 1274 case FOURCC('m', 'p', '4', 'v'): 1275 case FOURCC('e', 'n', 'c', 'v'): 1276 case FOURCC('s', '2', '6', '3'): 1277 case FOURCC('H', '2', '6', '3'): 1278 case FOURCC('h', '2', '6', '3'): 1279 case FOURCC('a', 'v', 'c', '1'): 1280 { 1281 mHasVideo = true; 1282 1283 uint8_t buffer[78]; 1284 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1285 // Basic VideoSampleEntry size. 1286 return ERROR_MALFORMED; 1287 } 1288 1289 if (mDataSource->readAt( 1290 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1291 return ERROR_IO; 1292 } 1293 1294 uint16_t data_ref_index = U16_AT(&buffer[6]); 1295 uint16_t width = U16_AT(&buffer[6 + 18]); 1296 uint16_t height = U16_AT(&buffer[6 + 20]); 1297 1298 // The video sample is not standard-compliant if it has invalid dimension. 1299 // Use some default width and height value, and 1300 // let the decoder figure out the actual width and height (and thus 1301 // be prepared for INFO_FOMRAT_CHANGED event). 1302 if (width == 0) width = 352; 1303 if (height == 0) height = 288; 1304 1305 // printf("*** coding='%s' width=%d height=%d\n", 1306 // chunk, width, height); 1307 1308 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1309 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1310 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1311 } 1312 mLastTrack->meta->setInt32(kKeyWidth, width); 1313 mLastTrack->meta->setInt32(kKeyHeight, height); 1314 1315 off64_t stop_offset = *offset + chunk_size; 1316 *offset = data_offset + sizeof(buffer); 1317 while (*offset < stop_offset) { 1318 status_t err = parseChunk(offset, depth + 1); 1319 if (err != OK) { 1320 return err; 1321 } 1322 } 1323 1324 if (*offset != stop_offset) { 1325 return ERROR_MALFORMED; 1326 } 1327 break; 1328 } 1329 1330 case FOURCC('s', 't', 'c', 'o'): 1331 case FOURCC('c', 'o', '6', '4'): 1332 { 1333 status_t err = 1334 mLastTrack->sampleTable->setChunkOffsetParams( 1335 chunk_type, data_offset, chunk_data_size); 1336 1337 if (err != OK) { 1338 return err; 1339 } 1340 1341 *offset += chunk_size; 1342 break; 1343 } 1344 1345 case FOURCC('s', 't', 's', 'c'): 1346 { 1347 status_t err = 1348 mLastTrack->sampleTable->setSampleToChunkParams( 1349 data_offset, chunk_data_size); 1350 1351 if (err != OK) { 1352 return err; 1353 } 1354 1355 *offset += chunk_size; 1356 break; 1357 } 1358 1359 case FOURCC('s', 't', 's', 'z'): 1360 case FOURCC('s', 't', 'z', '2'): 1361 { 1362 status_t err = 1363 mLastTrack->sampleTable->setSampleSizeParams( 1364 chunk_type, data_offset, chunk_data_size); 1365 1366 if (err != OK) { 1367 return err; 1368 } 1369 1370 size_t max_size; 1371 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1372 1373 if (err != OK) { 1374 return err; 1375 } 1376 1377 if (max_size != 0) { 1378 // Assume that a given buffer only contains at most 10 chunks, 1379 // each chunk originally prefixed with a 2 byte length will 1380 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1381 // and thus will grow by 2 bytes per chunk. 1382 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1383 } else { 1384 // No size was specified. Pick a conservatively large size. 1385 int32_t width, height; 1386 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1387 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1388 ALOGE("No width or height, assuming worst case 1080p"); 1389 width = 1920; 1390 height = 1080; 1391 } 1392 1393 const char *mime; 1394 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1395 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1396 // AVC requires compression ratio of at least 2, and uses 1397 // macroblocks 1398 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1399 } else { 1400 // For all other formats there is no minimum compression 1401 // ratio. Use compression ratio of 1. 1402 max_size = width * height * 3 / 2; 1403 } 1404 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1405 } 1406 *offset += chunk_size; 1407 1408 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1409 // mimetype) previously obtained, so don't cache them. 1410 const char *mime; 1411 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1412 // Calculate average frame rate. 1413 if (!strncasecmp("video/", mime, 6)) { 1414 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1415 int64_t durationUs; 1416 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1417 if (durationUs > 0) { 1418 int32_t frameRate = (nSamples * 1000000LL + 1419 (durationUs >> 1)) / durationUs; 1420 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1421 } 1422 } 1423 } 1424 1425 break; 1426 } 1427 1428 case FOURCC('s', 't', 't', 's'): 1429 { 1430 status_t err = 1431 mLastTrack->sampleTable->setTimeToSampleParams( 1432 data_offset, chunk_data_size); 1433 1434 if (err != OK) { 1435 return err; 1436 } 1437 1438 *offset += chunk_size; 1439 break; 1440 } 1441 1442 case FOURCC('c', 't', 't', 's'): 1443 { 1444 status_t err = 1445 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1446 data_offset, chunk_data_size); 1447 1448 if (err != OK) { 1449 return err; 1450 } 1451 1452 *offset += chunk_size; 1453 break; 1454 } 1455 1456 case FOURCC('s', 't', 's', 's'): 1457 { 1458 status_t err = 1459 mLastTrack->sampleTable->setSyncSampleParams( 1460 data_offset, chunk_data_size); 1461 1462 if (err != OK) { 1463 return err; 1464 } 1465 1466 *offset += chunk_size; 1467 break; 1468 } 1469 1470 // @xyz 1471 case FOURCC('\xA9', 'x', 'y', 'z'): 1472 { 1473 // Best case the total data length inside "@xyz" box 1474 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1475 // where "\x00\x04" is the text string length with value = 4, 1476 // "\0x15\xc7" is the language code = en, and "0+0" is a 1477 // location (string) value with longitude = 0 and latitude = 0. 1478 if (chunk_data_size < 8) { 1479 return ERROR_MALFORMED; 1480 } 1481 1482 // Worst case the location string length would be 18, 1483 // for instance +90.0000-180.0000, without the trailing "/" and 1484 // the string length + language code. 1485 char buffer[18]; 1486 1487 // Substracting 5 from the data size is because the text string length + 1488 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1489 off64_t location_length = chunk_data_size - 5; 1490 if (location_length >= (off64_t) sizeof(buffer)) { 1491 return ERROR_MALFORMED; 1492 } 1493 1494 if (mDataSource->readAt( 1495 data_offset + 4, buffer, location_length) < location_length) { 1496 return ERROR_IO; 1497 } 1498 1499 buffer[location_length] = '\0'; 1500 mFileMetaData->setCString(kKeyLocation, buffer); 1501 *offset += chunk_size; 1502 break; 1503 } 1504 1505 case FOURCC('e', 's', 'd', 's'): 1506 { 1507 if (chunk_data_size < 4) { 1508 return ERROR_MALFORMED; 1509 } 1510 1511 uint8_t buffer[256]; 1512 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1513 return ERROR_BUFFER_TOO_SMALL; 1514 } 1515 1516 if (mDataSource->readAt( 1517 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1518 return ERROR_IO; 1519 } 1520 1521 if (U32_AT(buffer) != 0) { 1522 // Should be version 0, flags 0. 1523 return ERROR_MALFORMED; 1524 } 1525 1526 mLastTrack->meta->setData( 1527 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1528 1529 if (mPath.size() >= 2 1530 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1531 // Information from the ESDS must be relied on for proper 1532 // setup of sample rate and channel count for MPEG4 Audio. 1533 // The generic header appears to only contain generic 1534 // information... 1535 1536 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1537 &buffer[4], chunk_data_size - 4); 1538 1539 if (err != OK) { 1540 return err; 1541 } 1542 } 1543 1544 *offset += chunk_size; 1545 break; 1546 } 1547 1548 case FOURCC('a', 'v', 'c', 'C'): 1549 { 1550 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1551 1552 if (mDataSource->readAt( 1553 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1554 return ERROR_IO; 1555 } 1556 1557 mLastTrack->meta->setData( 1558 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1559 1560 *offset += chunk_size; 1561 break; 1562 } 1563 1564 case FOURCC('d', '2', '6', '3'): 1565 { 1566 /* 1567 * d263 contains a fixed 7 bytes part: 1568 * vendor - 4 bytes 1569 * version - 1 byte 1570 * level - 1 byte 1571 * profile - 1 byte 1572 * optionally, "d263" box itself may contain a 16-byte 1573 * bit rate box (bitr) 1574 * average bit rate - 4 bytes 1575 * max bit rate - 4 bytes 1576 */ 1577 char buffer[23]; 1578 if (chunk_data_size != 7 && 1579 chunk_data_size != 23) { 1580 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1581 return ERROR_MALFORMED; 1582 } 1583 1584 if (mDataSource->readAt( 1585 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1586 return ERROR_IO; 1587 } 1588 1589 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1590 1591 *offset += chunk_size; 1592 break; 1593 } 1594 1595 case FOURCC('m', 'e', 't', 'a'): 1596 { 1597 uint8_t buffer[4]; 1598 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1599 return ERROR_MALFORMED; 1600 } 1601 1602 if (mDataSource->readAt( 1603 data_offset, buffer, 4) < 4) { 1604 return ERROR_IO; 1605 } 1606 1607 if (U32_AT(buffer) != 0) { 1608 // Should be version 0, flags 0. 1609 1610 // If it's not, let's assume this is one of those 1611 // apparently malformed chunks that don't have flags 1612 // and completely different semantics than what's 1613 // in the MPEG4 specs and skip it. 1614 *offset += chunk_size; 1615 return OK; 1616 } 1617 1618 off64_t stop_offset = *offset + chunk_size; 1619 *offset = data_offset + sizeof(buffer); 1620 while (*offset < stop_offset) { 1621 status_t err = parseChunk(offset, depth + 1); 1622 if (err != OK) { 1623 return err; 1624 } 1625 } 1626 1627 if (*offset != stop_offset) { 1628 return ERROR_MALFORMED; 1629 } 1630 break; 1631 } 1632 1633 case FOURCC('m', 'e', 'a', 'n'): 1634 case FOURCC('n', 'a', 'm', 'e'): 1635 case FOURCC('d', 'a', 't', 'a'): 1636 { 1637 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1638 status_t err = parseMetaData(data_offset, chunk_data_size); 1639 1640 if (err != OK) { 1641 return err; 1642 } 1643 } 1644 1645 *offset += chunk_size; 1646 break; 1647 } 1648 1649 case FOURCC('m', 'v', 'h', 'd'): 1650 { 1651 if (chunk_data_size < 24) { 1652 return ERROR_MALFORMED; 1653 } 1654 1655 uint8_t header[24]; 1656 if (mDataSource->readAt( 1657 data_offset, header, sizeof(header)) 1658 < (ssize_t)sizeof(header)) { 1659 return ERROR_IO; 1660 } 1661 1662 uint64_t creationTime; 1663 if (header[0] == 1) { 1664 creationTime = U64_AT(&header[4]); 1665 mHeaderTimescale = U32_AT(&header[20]); 1666 } else if (header[0] != 0) { 1667 return ERROR_MALFORMED; 1668 } else { 1669 creationTime = U32_AT(&header[4]); 1670 mHeaderTimescale = U32_AT(&header[12]); 1671 } 1672 1673 String8 s; 1674 convertTimeToDate(creationTime, &s); 1675 1676 mFileMetaData->setCString(kKeyDate, s.string()); 1677 1678 *offset += chunk_size; 1679 break; 1680 } 1681 1682 case FOURCC('m', 'd', 'a', 't'): 1683 { 1684 ALOGV("mdat chunk, drm: %d", mIsDrm); 1685 if (!mIsDrm) { 1686 *offset += chunk_size; 1687 break; 1688 } 1689 1690 if (chunk_size < 8) { 1691 return ERROR_MALFORMED; 1692 } 1693 1694 return parseDrmSINF(offset, data_offset); 1695 } 1696 1697 case FOURCC('h', 'd', 'l', 'r'): 1698 { 1699 uint32_t buffer; 1700 if (mDataSource->readAt( 1701 data_offset + 8, &buffer, 4) < 4) { 1702 return ERROR_IO; 1703 } 1704 1705 uint32_t type = ntohl(buffer); 1706 // For the 3GPP file format, the handler-type within the 'hdlr' box 1707 // shall be 'text'. We also want to support 'sbtl' handler type 1708 // for a practical reason as various MPEG4 containers use it. 1709 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1710 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1711 } 1712 1713 *offset += chunk_size; 1714 break; 1715 } 1716 1717 case FOURCC('t', 'x', '3', 'g'): 1718 { 1719 uint32_t type; 1720 const void *data; 1721 size_t size = 0; 1722 if (!mLastTrack->meta->findData( 1723 kKeyTextFormatData, &type, &data, &size)) { 1724 size = 0; 1725 } 1726 1727 if (SIZE_MAX - chunk_size <= size) { 1728 return ERROR_MALFORMED; 1729 } 1730 1731 uint8_t *buffer = new uint8_t[size + chunk_size]; 1732 if (buffer == NULL) { 1733 return ERROR_MALFORMED; 1734 } 1735 1736 if (size > 0) { 1737 memcpy(buffer, data, size); 1738 } 1739 1740 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1741 < chunk_size) { 1742 delete[] buffer; 1743 buffer = NULL; 1744 1745 return ERROR_IO; 1746 } 1747 1748 mLastTrack->meta->setData( 1749 kKeyTextFormatData, 0, buffer, size + chunk_size); 1750 1751 delete[] buffer; 1752 1753 *offset += chunk_size; 1754 break; 1755 } 1756 1757 case FOURCC('c', 'o', 'v', 'r'): 1758 { 1759 if (mFileMetaData != NULL) { 1760 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1761 chunk_data_size, data_offset); 1762 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1763 if (mDataSource->readAt( 1764 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1765 return ERROR_IO; 1766 } 1767 const int kSkipBytesOfDataBox = 16; 1768 mFileMetaData->setData( 1769 kKeyAlbumArt, MetaData::TYPE_NONE, 1770 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1771 } 1772 1773 *offset += chunk_size; 1774 break; 1775 } 1776 1777 case FOURCC('-', '-', '-', '-'): 1778 { 1779 mLastCommentMean.clear(); 1780 mLastCommentName.clear(); 1781 mLastCommentData.clear(); 1782 *offset += chunk_size; 1783 break; 1784 } 1785 1786 case FOURCC('s', 'i', 'd', 'x'): 1787 { 1788 parseSegmentIndex(data_offset, chunk_data_size); 1789 *offset += chunk_size; 1790 return UNKNOWN_ERROR; // stop parsing after sidx 1791 } 1792 1793 default: 1794 { 1795 *offset += chunk_size; 1796 break; 1797 } 1798 } 1799 1800 return OK; 1801} 1802 1803status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1804 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1805 1806 if (size < 12) { 1807 return -EINVAL; 1808 } 1809 1810 uint32_t flags; 1811 if (!mDataSource->getUInt32(offset, &flags)) { 1812 return ERROR_MALFORMED; 1813 } 1814 1815 uint32_t version = flags >> 24; 1816 flags &= 0xffffff; 1817 1818 ALOGV("sidx version %d", version); 1819 1820 uint32_t referenceId; 1821 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1822 return ERROR_MALFORMED; 1823 } 1824 1825 uint32_t timeScale; 1826 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1827 return ERROR_MALFORMED; 1828 } 1829 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1830 1831 uint64_t earliestPresentationTime; 1832 uint64_t firstOffset; 1833 1834 offset += 12; 1835 size -= 12; 1836 1837 if (version == 0) { 1838 if (size < 8) { 1839 return -EINVAL; 1840 } 1841 uint32_t tmp; 1842 if (!mDataSource->getUInt32(offset, &tmp)) { 1843 return ERROR_MALFORMED; 1844 } 1845 earliestPresentationTime = tmp; 1846 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1847 return ERROR_MALFORMED; 1848 } 1849 firstOffset = tmp; 1850 offset += 8; 1851 size -= 8; 1852 } else { 1853 if (size < 16) { 1854 return -EINVAL; 1855 } 1856 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1857 return ERROR_MALFORMED; 1858 } 1859 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1860 return ERROR_MALFORMED; 1861 } 1862 offset += 16; 1863 size -= 16; 1864 } 1865 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1866 1867 if (size < 4) { 1868 return -EINVAL; 1869 } 1870 1871 uint16_t referenceCount; 1872 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1873 return ERROR_MALFORMED; 1874 } 1875 offset += 4; 1876 size -= 4; 1877 ALOGV("refcount: %d", referenceCount); 1878 1879 if (size < referenceCount * 12) { 1880 return -EINVAL; 1881 } 1882 1883 uint64_t total_duration = 0; 1884 for (unsigned int i = 0; i < referenceCount; i++) { 1885 uint32_t d1, d2, d3; 1886 1887 if (!mDataSource->getUInt32(offset, &d1) || // size 1888 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1889 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1890 return ERROR_MALFORMED; 1891 } 1892 1893 if (d1 & 0x80000000) { 1894 ALOGW("sub-sidx boxes not supported yet"); 1895 } 1896 bool sap = d3 & 0x80000000; 1897 bool saptype = d3 >> 28; 1898 if (!sap || saptype > 2) { 1899 ALOGW("not a stream access point, or unsupported type"); 1900 } 1901 total_duration += d2; 1902 offset += 12; 1903 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 1904 SidxEntry se; 1905 se.mSize = d1 & 0x7fffffff; 1906 se.mDurationUs = 1000000LL * d2 / timeScale; 1907 mSidxEntries.add(se); 1908 } 1909 1910 mSidxDuration = total_duration * 1000000 / timeScale; 1911 ALOGV("duration: %lld", mSidxDuration); 1912 1913 int64_t metaDuration; 1914 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 1915 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 1916 } 1917 return OK; 1918} 1919 1920 1921 1922status_t MPEG4Extractor::parseTrackHeader( 1923 off64_t data_offset, off64_t data_size) { 1924 if (data_size < 4) { 1925 return ERROR_MALFORMED; 1926 } 1927 1928 uint8_t version; 1929 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1930 return ERROR_IO; 1931 } 1932 1933 size_t dynSize = (version == 1) ? 36 : 24; 1934 1935 uint8_t buffer[36 + 60]; 1936 1937 if (data_size != (off64_t)dynSize + 60) { 1938 return ERROR_MALFORMED; 1939 } 1940 1941 if (mDataSource->readAt( 1942 data_offset, buffer, data_size) < (ssize_t)data_size) { 1943 return ERROR_IO; 1944 } 1945 1946 uint64_t ctime, mtime, duration; 1947 int32_t id; 1948 1949 if (version == 1) { 1950 ctime = U64_AT(&buffer[4]); 1951 mtime = U64_AT(&buffer[12]); 1952 id = U32_AT(&buffer[20]); 1953 duration = U64_AT(&buffer[28]); 1954 } else if (version == 0) { 1955 ctime = U32_AT(&buffer[4]); 1956 mtime = U32_AT(&buffer[8]); 1957 id = U32_AT(&buffer[12]); 1958 duration = U32_AT(&buffer[20]); 1959 } else { 1960 return ERROR_UNSUPPORTED; 1961 } 1962 1963 mLastTrack->meta->setInt32(kKeyTrackID, id); 1964 1965 size_t matrixOffset = dynSize + 16; 1966 int32_t a00 = U32_AT(&buffer[matrixOffset]); 1967 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 1968 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 1969 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 1970 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 1971 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 1972 1973#if 0 1974 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 1975 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 1976 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 1977 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 1978#endif 1979 1980 uint32_t rotationDegrees; 1981 1982 static const int32_t kFixedOne = 0x10000; 1983 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 1984 // Identity, no rotation 1985 rotationDegrees = 0; 1986 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 1987 rotationDegrees = 90; 1988 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 1989 rotationDegrees = 270; 1990 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 1991 rotationDegrees = 180; 1992 } else { 1993 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 1994 rotationDegrees = 0; 1995 } 1996 1997 if (rotationDegrees != 0) { 1998 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 1999 } 2000 2001 // Handle presentation display size, which could be different 2002 // from the image size indicated by kKeyWidth and kKeyHeight. 2003 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2004 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2005 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2006 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2007 2008 return OK; 2009} 2010 2011status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) { 2012 if (size < 4) { 2013 return ERROR_MALFORMED; 2014 } 2015 2016 uint8_t *buffer = new uint8_t[size + 1]; 2017 if (mDataSource->readAt( 2018 offset, buffer, size) != (ssize_t)size) { 2019 delete[] buffer; 2020 buffer = NULL; 2021 2022 return ERROR_IO; 2023 } 2024 2025 uint32_t flags = U32_AT(buffer); 2026 2027 uint32_t metadataKey = 0; 2028 char chunk[5]; 2029 MakeFourCCString(mPath[4], chunk); 2030 ALOGV("meta: %s @ %lld", chunk, offset); 2031 switch (mPath[4]) { 2032 case FOURCC(0xa9, 'a', 'l', 'b'): 2033 { 2034 metadataKey = kKeyAlbum; 2035 break; 2036 } 2037 case FOURCC(0xa9, 'A', 'R', 'T'): 2038 { 2039 metadataKey = kKeyArtist; 2040 break; 2041 } 2042 case FOURCC('a', 'A', 'R', 'T'): 2043 { 2044 metadataKey = kKeyAlbumArtist; 2045 break; 2046 } 2047 case FOURCC(0xa9, 'd', 'a', 'y'): 2048 { 2049 metadataKey = kKeyYear; 2050 break; 2051 } 2052 case FOURCC(0xa9, 'n', 'a', 'm'): 2053 { 2054 metadataKey = kKeyTitle; 2055 break; 2056 } 2057 case FOURCC(0xa9, 'w', 'r', 't'): 2058 { 2059 metadataKey = kKeyWriter; 2060 break; 2061 } 2062 case FOURCC('c', 'o', 'v', 'r'): 2063 { 2064 metadataKey = kKeyAlbumArt; 2065 break; 2066 } 2067 case FOURCC('g', 'n', 'r', 'e'): 2068 { 2069 metadataKey = kKeyGenre; 2070 break; 2071 } 2072 case FOURCC(0xa9, 'g', 'e', 'n'): 2073 { 2074 metadataKey = kKeyGenre; 2075 break; 2076 } 2077 case FOURCC('c', 'p', 'i', 'l'): 2078 { 2079 if (size == 9 && flags == 21) { 2080 char tmp[16]; 2081 sprintf(tmp, "%d", 2082 (int)buffer[size - 1]); 2083 2084 mFileMetaData->setCString(kKeyCompilation, tmp); 2085 } 2086 break; 2087 } 2088 case FOURCC('t', 'r', 'k', 'n'): 2089 { 2090 if (size == 16 && flags == 0) { 2091 char tmp[16]; 2092 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2093 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2094 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2095 2096 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2097 } 2098 break; 2099 } 2100 case FOURCC('d', 'i', 's', 'k'): 2101 { 2102 if ((size == 14 || size == 16) && flags == 0) { 2103 char tmp[16]; 2104 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2105 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2106 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2107 2108 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2109 } 2110 break; 2111 } 2112 case FOURCC('-', '-', '-', '-'): 2113 { 2114 buffer[size] = '\0'; 2115 switch (mPath[5]) { 2116 case FOURCC('m', 'e', 'a', 'n'): 2117 mLastCommentMean.setTo((const char *)buffer + 4); 2118 break; 2119 case FOURCC('n', 'a', 'm', 'e'): 2120 mLastCommentName.setTo((const char *)buffer + 4); 2121 break; 2122 case FOURCC('d', 'a', 't', 'a'): 2123 mLastCommentData.setTo((const char *)buffer + 8); 2124 break; 2125 } 2126 2127 // Once we have a set of mean/name/data info, go ahead and process 2128 // it to see if its something we are interested in. Whether or not 2129 // were are interested in the specific tag, make sure to clear out 2130 // the set so we can be ready to process another tuple should one 2131 // show up later in the file. 2132 if ((mLastCommentMean.length() != 0) && 2133 (mLastCommentName.length() != 0) && 2134 (mLastCommentData.length() != 0)) { 2135 2136 if (mLastCommentMean == "com.apple.iTunes" 2137 && mLastCommentName == "iTunSMPB") { 2138 int32_t delay, padding; 2139 if (sscanf(mLastCommentData, 2140 " %*x %x %x %*x", &delay, &padding) == 2) { 2141 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2142 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2143 } 2144 } 2145 2146 mLastCommentMean.clear(); 2147 mLastCommentName.clear(); 2148 mLastCommentData.clear(); 2149 } 2150 break; 2151 } 2152 2153 default: 2154 break; 2155 } 2156 2157 if (size >= 8 && metadataKey) { 2158 if (metadataKey == kKeyAlbumArt) { 2159 mFileMetaData->setData( 2160 kKeyAlbumArt, MetaData::TYPE_NONE, 2161 buffer + 8, size - 8); 2162 } else if (metadataKey == kKeyGenre) { 2163 if (flags == 0) { 2164 // uint8_t genre code, iTunes genre codes are 2165 // the standard id3 codes, except they start 2166 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2167 // We use standard id3 numbering, so subtract 1. 2168 int genrecode = (int)buffer[size - 1]; 2169 genrecode--; 2170 if (genrecode < 0) { 2171 genrecode = 255; // reserved for 'unknown genre' 2172 } 2173 char genre[10]; 2174 sprintf(genre, "%d", genrecode); 2175 2176 mFileMetaData->setCString(metadataKey, genre); 2177 } else if (flags == 1) { 2178 // custom genre string 2179 buffer[size] = '\0'; 2180 2181 mFileMetaData->setCString( 2182 metadataKey, (const char *)buffer + 8); 2183 } 2184 } else { 2185 buffer[size] = '\0'; 2186 2187 mFileMetaData->setCString( 2188 metadataKey, (const char *)buffer + 8); 2189 } 2190 } 2191 2192 delete[] buffer; 2193 buffer = NULL; 2194 2195 return OK; 2196} 2197 2198sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2199 status_t err; 2200 if ((err = readMetaData()) != OK) { 2201 return NULL; 2202 } 2203 2204 Track *track = mFirstTrack; 2205 while (index > 0) { 2206 if (track == NULL) { 2207 return NULL; 2208 } 2209 2210 track = track->next; 2211 --index; 2212 } 2213 2214 if (track == NULL) { 2215 return NULL; 2216 } 2217 2218 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2219 2220 return new MPEG4Source( 2221 track->meta, mDataSource, track->timescale, track->sampleTable, 2222 mSidxEntries, mMoofOffset); 2223} 2224 2225// static 2226status_t MPEG4Extractor::verifyTrack(Track *track) { 2227 const char *mime; 2228 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2229 2230 uint32_t type; 2231 const void *data; 2232 size_t size; 2233 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2234 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2235 || type != kTypeAVCC) { 2236 return ERROR_MALFORMED; 2237 } 2238 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2239 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2240 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2241 || type != kTypeESDS) { 2242 return ERROR_MALFORMED; 2243 } 2244 } 2245 2246 if (!track->sampleTable->isValid()) { 2247 // Make sure we have all the metadata we need. 2248 return ERROR_MALFORMED; 2249 } 2250 2251 return OK; 2252} 2253 2254status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2255 const void *esds_data, size_t esds_size) { 2256 ESDS esds(esds_data, esds_size); 2257 2258 uint8_t objectTypeIndication; 2259 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2260 return ERROR_MALFORMED; 2261 } 2262 2263 if (objectTypeIndication == 0xe1) { 2264 // This isn't MPEG4 audio at all, it's QCELP 14k... 2265 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2266 return OK; 2267 } 2268 2269 if (objectTypeIndication == 0x6b) { 2270 // The media subtype is MP3 audio 2271 // Our software MP3 audio decoder may not be able to handle 2272 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2273 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2274 return ERROR_UNSUPPORTED; 2275 } 2276 2277 const uint8_t *csd; 2278 size_t csd_size; 2279 if (esds.getCodecSpecificInfo( 2280 (const void **)&csd, &csd_size) != OK) { 2281 return ERROR_MALFORMED; 2282 } 2283 2284#if 0 2285 printf("ESD of size %d\n", csd_size); 2286 hexdump(csd, csd_size); 2287#endif 2288 2289 if (csd_size == 0) { 2290 // There's no further information, i.e. no codec specific data 2291 // Let's assume that the information provided in the mpeg4 headers 2292 // is accurate and hope for the best. 2293 2294 return OK; 2295 } 2296 2297 if (csd_size < 2) { 2298 return ERROR_MALFORMED; 2299 } 2300 2301 static uint32_t kSamplingRate[] = { 2302 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2303 16000, 12000, 11025, 8000, 7350 2304 }; 2305 2306 ABitReader br(csd, csd_size); 2307 uint32_t objectType = br.getBits(5); 2308 2309 if (objectType == 31) { // AAC-ELD => additional 6 bits 2310 objectType = 32 + br.getBits(6); 2311 } 2312 2313 //keep AOT type 2314 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2315 2316 uint32_t freqIndex = br.getBits(4); 2317 2318 int32_t sampleRate = 0; 2319 int32_t numChannels = 0; 2320 if (freqIndex == 15) { 2321 if (csd_size < 5) { 2322 return ERROR_MALFORMED; 2323 } 2324 sampleRate = br.getBits(24); 2325 numChannels = br.getBits(4); 2326 } else { 2327 numChannels = br.getBits(4); 2328 2329 if (freqIndex == 13 || freqIndex == 14) { 2330 return ERROR_MALFORMED; 2331 } 2332 2333 sampleRate = kSamplingRate[freqIndex]; 2334 } 2335 2336 if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13 2337 uint32_t extFreqIndex = br.getBits(4); 2338 int32_t extSampleRate; 2339 if (extFreqIndex == 15) { 2340 if (csd_size < 8) { 2341 return ERROR_MALFORMED; 2342 } 2343 extSampleRate = br.getBits(24); 2344 } else { 2345 if (extFreqIndex == 13 || extFreqIndex == 14) { 2346 return ERROR_MALFORMED; 2347 } 2348 extSampleRate = kSamplingRate[extFreqIndex]; 2349 } 2350 //TODO: save the extension sampling rate value in meta data => 2351 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2352 } 2353 2354 if (numChannels == 0) { 2355 return ERROR_UNSUPPORTED; 2356 } 2357 2358 int32_t prevSampleRate; 2359 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2360 2361 if (prevSampleRate != sampleRate) { 2362 ALOGV("mpeg4 audio sample rate different from previous setting. " 2363 "was: %d, now: %d", prevSampleRate, sampleRate); 2364 } 2365 2366 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2367 2368 int32_t prevChannelCount; 2369 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2370 2371 if (prevChannelCount != numChannels) { 2372 ALOGV("mpeg4 audio channel count different from previous setting. " 2373 "was: %d, now: %d", prevChannelCount, numChannels); 2374 } 2375 2376 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2377 2378 return OK; 2379} 2380 2381//////////////////////////////////////////////////////////////////////////////// 2382 2383MPEG4Source::MPEG4Source( 2384 const sp<MetaData> &format, 2385 const sp<DataSource> &dataSource, 2386 int32_t timeScale, 2387 const sp<SampleTable> &sampleTable, 2388 Vector<SidxEntry> &sidx, 2389 off64_t firstMoofOffset) 2390 : mFormat(format), 2391 mDataSource(dataSource), 2392 mTimescale(timeScale), 2393 mSampleTable(sampleTable), 2394 mCurrentSampleIndex(0), 2395 mCurrentFragmentIndex(0), 2396 mSegments(sidx), 2397 mFirstMoofOffset(firstMoofOffset), 2398 mCurrentMoofOffset(firstMoofOffset), 2399 mCurrentTime(0), 2400 mCurrentSampleInfoAllocSize(0), 2401 mCurrentSampleInfoSizes(NULL), 2402 mCurrentSampleInfoOffsetsAllocSize(0), 2403 mCurrentSampleInfoOffsets(NULL), 2404 mIsAVC(false), 2405 mNALLengthSize(0), 2406 mStarted(false), 2407 mGroup(NULL), 2408 mBuffer(NULL), 2409 mWantsNALFragments(false), 2410 mSrcBuffer(NULL) { 2411 2412 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2413 mDefaultIVSize = 0; 2414 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2415 uint32_t keytype; 2416 const void *key; 2417 size_t keysize; 2418 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2419 CHECK(keysize <= 16); 2420 memset(mCryptoKey, 0, 16); 2421 memcpy(mCryptoKey, key, keysize); 2422 } 2423 2424 const char *mime; 2425 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2426 CHECK(success); 2427 2428 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2429 2430 if (mIsAVC) { 2431 uint32_t type; 2432 const void *data; 2433 size_t size; 2434 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2435 2436 const uint8_t *ptr = (const uint8_t *)data; 2437 2438 CHECK(size >= 7); 2439 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2440 2441 // The number of bytes used to encode the length of a NAL unit. 2442 mNALLengthSize = 1 + (ptr[4] & 3); 2443 } 2444 2445 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2446 2447 if (mFirstMoofOffset != 0) { 2448 off64_t offset = mFirstMoofOffset; 2449 parseChunk(&offset); 2450 } 2451} 2452 2453MPEG4Source::~MPEG4Source() { 2454 if (mStarted) { 2455 stop(); 2456 } 2457 free(mCurrentSampleInfoSizes); 2458 free(mCurrentSampleInfoOffsets); 2459} 2460 2461status_t MPEG4Source::start(MetaData *params) { 2462 Mutex::Autolock autoLock(mLock); 2463 2464 CHECK(!mStarted); 2465 2466 int32_t val; 2467 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2468 && val != 0) { 2469 mWantsNALFragments = true; 2470 } else { 2471 mWantsNALFragments = false; 2472 } 2473 2474 mGroup = new MediaBufferGroup; 2475 2476 int32_t max_size; 2477 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2478 2479 mGroup->add_buffer(new MediaBuffer(max_size)); 2480 2481 mSrcBuffer = new uint8_t[max_size]; 2482 2483 mStarted = true; 2484 2485 return OK; 2486} 2487 2488status_t MPEG4Source::stop() { 2489 Mutex::Autolock autoLock(mLock); 2490 2491 CHECK(mStarted); 2492 2493 if (mBuffer != NULL) { 2494 mBuffer->release(); 2495 mBuffer = NULL; 2496 } 2497 2498 delete[] mSrcBuffer; 2499 mSrcBuffer = NULL; 2500 2501 delete mGroup; 2502 mGroup = NULL; 2503 2504 mStarted = false; 2505 mCurrentSampleIndex = 0; 2506 2507 return OK; 2508} 2509 2510status_t MPEG4Source::parseChunk(off64_t *offset) { 2511 uint32_t hdr[2]; 2512 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2513 return ERROR_IO; 2514 } 2515 uint64_t chunk_size = ntohl(hdr[0]); 2516 uint32_t chunk_type = ntohl(hdr[1]); 2517 off64_t data_offset = *offset + 8; 2518 2519 if (chunk_size == 1) { 2520 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2521 return ERROR_IO; 2522 } 2523 chunk_size = ntoh64(chunk_size); 2524 data_offset += 8; 2525 2526 if (chunk_size < 16) { 2527 // The smallest valid chunk is 16 bytes long in this case. 2528 return ERROR_MALFORMED; 2529 } 2530 } else if (chunk_size < 8) { 2531 // The smallest valid chunk is 8 bytes long. 2532 return ERROR_MALFORMED; 2533 } 2534 2535 char chunk[5]; 2536 MakeFourCCString(chunk_type, chunk); 2537 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2538 2539 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2540 2541 switch(chunk_type) { 2542 2543 case FOURCC('t', 'r', 'a', 'f'): 2544 case FOURCC('m', 'o', 'o', 'f'): { 2545 off64_t stop_offset = *offset + chunk_size; 2546 *offset = data_offset; 2547 while (*offset < stop_offset) { 2548 status_t err = parseChunk(offset); 2549 if (err != OK) { 2550 return err; 2551 } 2552 } 2553 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 2554 // *offset points to the mdat box following this moof 2555 parseChunk(offset); // doesn't actually parse it, just updates offset 2556 mNextMoofOffset = *offset; 2557 } 2558 break; 2559 } 2560 2561 case FOURCC('t', 'f', 'h', 'd'): { 2562 status_t err; 2563 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 2564 return err; 2565 } 2566 *offset += chunk_size; 2567 break; 2568 } 2569 2570 case FOURCC('t', 'r', 'u', 'n'): { 2571 status_t err; 2572 if (mLastParsedTrackId == mTrackId) { 2573 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 2574 return err; 2575 } 2576 } 2577 2578 *offset += chunk_size; 2579 break; 2580 } 2581 2582 case FOURCC('s', 'a', 'i', 'z'): { 2583 status_t err; 2584 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 2585 return err; 2586 } 2587 *offset += chunk_size; 2588 break; 2589 } 2590 case FOURCC('s', 'a', 'i', 'o'): { 2591 status_t err; 2592 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 2593 return err; 2594 } 2595 *offset += chunk_size; 2596 break; 2597 } 2598 2599 case FOURCC('m', 'd', 'a', 't'): { 2600 // parse DRM info if present 2601 ALOGV("MPEG4Source::parseChunk mdat"); 2602 // if saiz/saoi was previously observed, do something with the sampleinfos 2603 *offset += chunk_size; 2604 break; 2605 } 2606 2607 default: { 2608 *offset += chunk_size; 2609 break; 2610 } 2611 } 2612 return OK; 2613} 2614 2615status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) { 2616 ALOGV("parseSampleAuxiliaryInformationSizes"); 2617 // 14496-12 8.7.12 2618 uint8_t version; 2619 if (mDataSource->readAt( 2620 offset, &version, sizeof(version)) 2621 < (ssize_t)sizeof(version)) { 2622 return ERROR_IO; 2623 } 2624 2625 if (version != 0) { 2626 return ERROR_UNSUPPORTED; 2627 } 2628 offset++; 2629 2630 uint32_t flags; 2631 if (!mDataSource->getUInt24(offset, &flags)) { 2632 return ERROR_IO; 2633 } 2634 offset += 3; 2635 2636 if (flags & 1) { 2637 uint32_t tmp; 2638 if (!mDataSource->getUInt32(offset, &tmp)) { 2639 return ERROR_MALFORMED; 2640 } 2641 mCurrentAuxInfoType = tmp; 2642 offset += 4; 2643 if (!mDataSource->getUInt32(offset, &tmp)) { 2644 return ERROR_MALFORMED; 2645 } 2646 mCurrentAuxInfoTypeParameter = tmp; 2647 offset += 4; 2648 } 2649 2650 uint8_t defsize; 2651 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 2652 return ERROR_MALFORMED; 2653 } 2654 mCurrentDefaultSampleInfoSize = defsize; 2655 offset++; 2656 2657 uint32_t smplcnt; 2658 if (!mDataSource->getUInt32(offset, &smplcnt)) { 2659 return ERROR_MALFORMED; 2660 } 2661 mCurrentSampleInfoCount = smplcnt; 2662 offset += 4; 2663 2664 if (mCurrentDefaultSampleInfoSize != 0) { 2665 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 2666 return OK; 2667 } 2668 if (smplcnt > mCurrentSampleInfoAllocSize) { 2669 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 2670 mCurrentSampleInfoAllocSize = smplcnt; 2671 } 2672 2673 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 2674 return OK; 2675} 2676 2677status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) { 2678 ALOGV("parseSampleAuxiliaryInformationOffsets"); 2679 // 14496-12 8.7.13 2680 uint8_t version; 2681 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 2682 return ERROR_IO; 2683 } 2684 offset++; 2685 2686 uint32_t flags; 2687 if (!mDataSource->getUInt24(offset, &flags)) { 2688 return ERROR_IO; 2689 } 2690 offset += 3; 2691 2692 uint32_t entrycount; 2693 if (!mDataSource->getUInt32(offset, &entrycount)) { 2694 return ERROR_IO; 2695 } 2696 offset += 4; 2697 2698 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 2699 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 2700 mCurrentSampleInfoOffsetsAllocSize = entrycount; 2701 } 2702 mCurrentSampleInfoOffsetCount = entrycount; 2703 2704 for (size_t i = 0; i < entrycount; i++) { 2705 if (version == 0) { 2706 uint32_t tmp; 2707 if (!mDataSource->getUInt32(offset, &tmp)) { 2708 return ERROR_IO; 2709 } 2710 mCurrentSampleInfoOffsets[i] = tmp; 2711 offset += 4; 2712 } else { 2713 uint64_t tmp; 2714 if (!mDataSource->getUInt64(offset, &tmp)) { 2715 return ERROR_IO; 2716 } 2717 mCurrentSampleInfoOffsets[i] = tmp; 2718 offset += 8; 2719 } 2720 } 2721 2722 // parse clear/encrypted data 2723 2724 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 2725 2726 drmoffset += mCurrentMoofOffset; 2727 int ivlength; 2728 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 2729 2730 // read CencSampleAuxiliaryDataFormats 2731 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 2732 Sample *smpl = &mCurrentSamples.editItemAt(i); 2733 2734 memset(smpl->iv, 0, 16); 2735 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 2736 return ERROR_IO; 2737 } 2738 2739 drmoffset += ivlength; 2740 2741 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 2742 if (smplinfosize == 0) { 2743 smplinfosize = mCurrentSampleInfoSizes[i]; 2744 } 2745 if (smplinfosize > ivlength) { 2746 uint16_t numsubsamples; 2747 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 2748 return ERROR_IO; 2749 } 2750 drmoffset += 2; 2751 for (size_t j = 0; j < numsubsamples; j++) { 2752 uint16_t numclear; 2753 uint32_t numencrypted; 2754 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 2755 return ERROR_IO; 2756 } 2757 drmoffset += 2; 2758 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 2759 return ERROR_IO; 2760 } 2761 drmoffset += 4; 2762 smpl->clearsizes.add(numclear); 2763 smpl->encryptedsizes.add(numencrypted); 2764 } 2765 } else { 2766 smpl->clearsizes.add(0); 2767 smpl->encryptedsizes.add(smpl->size); 2768 } 2769 } 2770 2771 2772 return OK; 2773} 2774 2775status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 2776 2777 if (size < 8) { 2778 return -EINVAL; 2779 } 2780 2781 uint32_t flags; 2782 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 2783 return ERROR_MALFORMED; 2784 } 2785 2786 if (flags & 0xff000000) { 2787 return -EINVAL; 2788 } 2789 2790 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 2791 return ERROR_MALFORMED; 2792 } 2793 2794 if (mLastParsedTrackId != mTrackId) { 2795 // this is not the right track, skip it 2796 return OK; 2797 } 2798 2799 mTrackFragmentHeaderInfo.mFlags = flags; 2800 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 2801 offset += 8; 2802 size -= 8; 2803 2804 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 2805 2806 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 2807 if (size < 8) { 2808 return -EINVAL; 2809 } 2810 2811 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 2812 return ERROR_MALFORMED; 2813 } 2814 offset += 8; 2815 size -= 8; 2816 } 2817 2818 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 2819 if (size < 4) { 2820 return -EINVAL; 2821 } 2822 2823 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 2824 return ERROR_MALFORMED; 2825 } 2826 offset += 4; 2827 size -= 4; 2828 } 2829 2830 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2831 if (size < 4) { 2832 return -EINVAL; 2833 } 2834 2835 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 2836 return ERROR_MALFORMED; 2837 } 2838 offset += 4; 2839 size -= 4; 2840 } 2841 2842 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2843 if (size < 4) { 2844 return -EINVAL; 2845 } 2846 2847 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 2848 return ERROR_MALFORMED; 2849 } 2850 offset += 4; 2851 size -= 4; 2852 } 2853 2854 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2855 if (size < 4) { 2856 return -EINVAL; 2857 } 2858 2859 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 2860 return ERROR_MALFORMED; 2861 } 2862 offset += 4; 2863 size -= 4; 2864 } 2865 2866 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 2867 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 2868 } 2869 2870 mTrackFragmentHeaderInfo.mDataOffset = 0; 2871 return OK; 2872} 2873 2874status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 2875 2876 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 2877 if (size < 8) { 2878 return -EINVAL; 2879 } 2880 2881 enum { 2882 kDataOffsetPresent = 0x01, 2883 kFirstSampleFlagsPresent = 0x04, 2884 kSampleDurationPresent = 0x100, 2885 kSampleSizePresent = 0x200, 2886 kSampleFlagsPresent = 0x400, 2887 kSampleCompositionTimeOffsetPresent = 0x800, 2888 }; 2889 2890 uint32_t flags; 2891 if (!mDataSource->getUInt32(offset, &flags)) { 2892 return ERROR_MALFORMED; 2893 } 2894 ALOGV("fragment run flags: %08x", flags); 2895 2896 if (flags & 0xff000000) { 2897 return -EINVAL; 2898 } 2899 2900 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 2901 // These two shall not be used together. 2902 return -EINVAL; 2903 } 2904 2905 uint32_t sampleCount; 2906 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 2907 return ERROR_MALFORMED; 2908 } 2909 offset += 8; 2910 size -= 8; 2911 2912 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 2913 2914 uint32_t firstSampleFlags = 0; 2915 2916 if (flags & kDataOffsetPresent) { 2917 if (size < 4) { 2918 return -EINVAL; 2919 } 2920 2921 int32_t dataOffsetDelta; 2922 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 2923 return ERROR_MALFORMED; 2924 } 2925 2926 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 2927 2928 offset += 4; 2929 size -= 4; 2930 } 2931 2932 if (flags & kFirstSampleFlagsPresent) { 2933 if (size < 4) { 2934 return -EINVAL; 2935 } 2936 2937 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 2938 return ERROR_MALFORMED; 2939 } 2940 offset += 4; 2941 size -= 4; 2942 } 2943 2944 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 2945 sampleCtsOffset = 0; 2946 2947 size_t bytesPerSample = 0; 2948 if (flags & kSampleDurationPresent) { 2949 bytesPerSample += 4; 2950 } else if (mTrackFragmentHeaderInfo.mFlags 2951 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2952 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2953 } else { 2954 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2955 } 2956 2957 if (flags & kSampleSizePresent) { 2958 bytesPerSample += 4; 2959 } else if (mTrackFragmentHeaderInfo.mFlags 2960 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2961 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2962 } else { 2963 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2964 } 2965 2966 if (flags & kSampleFlagsPresent) { 2967 bytesPerSample += 4; 2968 } else if (mTrackFragmentHeaderInfo.mFlags 2969 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2970 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2971 } else { 2972 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2973 } 2974 2975 if (flags & kSampleCompositionTimeOffsetPresent) { 2976 bytesPerSample += 4; 2977 } else { 2978 sampleCtsOffset = 0; 2979 } 2980 2981 if (size < sampleCount * bytesPerSample) { 2982 return -EINVAL; 2983 } 2984 2985 Sample tmp; 2986 for (uint32_t i = 0; i < sampleCount; ++i) { 2987 if (flags & kSampleDurationPresent) { 2988 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 2989 return ERROR_MALFORMED; 2990 } 2991 offset += 4; 2992 } 2993 2994 if (flags & kSampleSizePresent) { 2995 if (!mDataSource->getUInt32(offset, &sampleSize)) { 2996 return ERROR_MALFORMED; 2997 } 2998 offset += 4; 2999 } 3000 3001 if (flags & kSampleFlagsPresent) { 3002 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3003 return ERROR_MALFORMED; 3004 } 3005 offset += 4; 3006 } 3007 3008 if (flags & kSampleCompositionTimeOffsetPresent) { 3009 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3010 return ERROR_MALFORMED; 3011 } 3012 offset += 4; 3013 } 3014 3015 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 3016 " flags 0x%08x", i + 1, 3017 dataOffset, sampleSize, sampleDuration, 3018 (flags & kFirstSampleFlagsPresent) && i == 0 3019 ? firstSampleFlags : sampleFlags); 3020 tmp.offset = dataOffset; 3021 tmp.size = sampleSize; 3022 tmp.duration = sampleDuration; 3023 mCurrentSamples.add(tmp); 3024 3025 dataOffset += sampleSize; 3026 } 3027 3028 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3029 3030 return OK; 3031} 3032 3033sp<MetaData> MPEG4Source::getFormat() { 3034 Mutex::Autolock autoLock(mLock); 3035 3036 return mFormat; 3037} 3038 3039size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3040 switch (mNALLengthSize) { 3041 case 1: 3042 return *data; 3043 case 2: 3044 return U16_AT(data); 3045 case 3: 3046 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3047 case 4: 3048 return U32_AT(data); 3049 } 3050 3051 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3052 // a 2-bit integer. 3053 CHECK(!"Should not be here."); 3054 3055 return 0; 3056} 3057 3058status_t MPEG4Source::read( 3059 MediaBuffer **out, const ReadOptions *options) { 3060 Mutex::Autolock autoLock(mLock); 3061 3062 CHECK(mStarted); 3063 3064 if (mFirstMoofOffset > 0) { 3065 return fragmentedRead(out, options); 3066 } 3067 3068 *out = NULL; 3069 3070 int64_t targetSampleTimeUs = -1; 3071 3072 int64_t seekTimeUs; 3073 ReadOptions::SeekMode mode; 3074 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3075 uint32_t findFlags = 0; 3076 switch (mode) { 3077 case ReadOptions::SEEK_PREVIOUS_SYNC: 3078 findFlags = SampleTable::kFlagBefore; 3079 break; 3080 case ReadOptions::SEEK_NEXT_SYNC: 3081 findFlags = SampleTable::kFlagAfter; 3082 break; 3083 case ReadOptions::SEEK_CLOSEST_SYNC: 3084 case ReadOptions::SEEK_CLOSEST: 3085 findFlags = SampleTable::kFlagClosest; 3086 break; 3087 default: 3088 CHECK(!"Should not be here."); 3089 break; 3090 } 3091 3092 uint32_t sampleIndex; 3093 status_t err = mSampleTable->findSampleAtTime( 3094 seekTimeUs * mTimescale / 1000000, 3095 &sampleIndex, findFlags); 3096 3097 if (mode == ReadOptions::SEEK_CLOSEST) { 3098 // We found the closest sample already, now we want the sync 3099 // sample preceding it (or the sample itself of course), even 3100 // if the subsequent sync sample is closer. 3101 findFlags = SampleTable::kFlagBefore; 3102 } 3103 3104 uint32_t syncSampleIndex; 3105 if (err == OK) { 3106 err = mSampleTable->findSyncSampleNear( 3107 sampleIndex, &syncSampleIndex, findFlags); 3108 } 3109 3110 uint32_t sampleTime; 3111 if (err == OK) { 3112 err = mSampleTable->getMetaDataForSample( 3113 sampleIndex, NULL, NULL, &sampleTime); 3114 } 3115 3116 if (err != OK) { 3117 if (err == ERROR_OUT_OF_RANGE) { 3118 // An attempt to seek past the end of the stream would 3119 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3120 // this all the way to the MediaPlayer would cause abnormal 3121 // termination. Legacy behaviour appears to be to behave as if 3122 // we had seeked to the end of stream, ending normally. 3123 err = ERROR_END_OF_STREAM; 3124 } 3125 ALOGV("end of stream"); 3126 return err; 3127 } 3128 3129 if (mode == ReadOptions::SEEK_CLOSEST) { 3130 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3131 } 3132 3133#if 0 3134 uint32_t syncSampleTime; 3135 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3136 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3137 3138 ALOGI("seek to time %lld us => sample at time %lld us, " 3139 "sync sample at time %lld us", 3140 seekTimeUs, 3141 sampleTime * 1000000ll / mTimescale, 3142 syncSampleTime * 1000000ll / mTimescale); 3143#endif 3144 3145 mCurrentSampleIndex = syncSampleIndex; 3146 if (mBuffer != NULL) { 3147 mBuffer->release(); 3148 mBuffer = NULL; 3149 } 3150 3151 // fall through 3152 } 3153 3154 off64_t offset; 3155 size_t size; 3156 uint32_t cts; 3157 bool isSyncSample; 3158 bool newBuffer = false; 3159 if (mBuffer == NULL) { 3160 newBuffer = true; 3161 3162 status_t err = 3163 mSampleTable->getMetaDataForSample( 3164 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample); 3165 3166 if (err != OK) { 3167 return err; 3168 } 3169 3170 err = mGroup->acquire_buffer(&mBuffer); 3171 3172 if (err != OK) { 3173 CHECK(mBuffer == NULL); 3174 return err; 3175 } 3176 } 3177 3178 if (!mIsAVC || mWantsNALFragments) { 3179 if (newBuffer) { 3180 ssize_t num_bytes_read = 3181 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3182 3183 if (num_bytes_read < (ssize_t)size) { 3184 mBuffer->release(); 3185 mBuffer = NULL; 3186 3187 return ERROR_IO; 3188 } 3189 3190 CHECK(mBuffer != NULL); 3191 mBuffer->set_range(0, size); 3192 mBuffer->meta_data()->clear(); 3193 mBuffer->meta_data()->setInt64( 3194 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3195 3196 if (targetSampleTimeUs >= 0) { 3197 mBuffer->meta_data()->setInt64( 3198 kKeyTargetTime, targetSampleTimeUs); 3199 } 3200 3201 if (isSyncSample) { 3202 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3203 } 3204 3205 ++mCurrentSampleIndex; 3206 } 3207 3208 if (!mIsAVC) { 3209 *out = mBuffer; 3210 mBuffer = NULL; 3211 3212 return OK; 3213 } 3214 3215 // Each NAL unit is split up into its constituent fragments and 3216 // each one of them returned in its own buffer. 3217 3218 CHECK(mBuffer->range_length() >= mNALLengthSize); 3219 3220 const uint8_t *src = 3221 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3222 3223 size_t nal_size = parseNALSize(src); 3224 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3225 ALOGE("incomplete NAL unit."); 3226 3227 mBuffer->release(); 3228 mBuffer = NULL; 3229 3230 return ERROR_MALFORMED; 3231 } 3232 3233 MediaBuffer *clone = mBuffer->clone(); 3234 CHECK(clone != NULL); 3235 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3236 3237 CHECK(mBuffer != NULL); 3238 mBuffer->set_range( 3239 mBuffer->range_offset() + mNALLengthSize + nal_size, 3240 mBuffer->range_length() - mNALLengthSize - nal_size); 3241 3242 if (mBuffer->range_length() == 0) { 3243 mBuffer->release(); 3244 mBuffer = NULL; 3245 } 3246 3247 *out = clone; 3248 3249 return OK; 3250 } else { 3251 // Whole NAL units are returned but each fragment is prefixed by 3252 // the start code (0x00 00 00 01). 3253 ssize_t num_bytes_read = 0; 3254 int32_t drm = 0; 3255 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3256 if (usesDRM) { 3257 num_bytes_read = 3258 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3259 } else { 3260 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3261 } 3262 3263 if (num_bytes_read < (ssize_t)size) { 3264 mBuffer->release(); 3265 mBuffer = NULL; 3266 3267 return ERROR_IO; 3268 } 3269 3270 if (usesDRM) { 3271 CHECK(mBuffer != NULL); 3272 mBuffer->set_range(0, size); 3273 3274 } else { 3275 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3276 size_t srcOffset = 0; 3277 size_t dstOffset = 0; 3278 3279 while (srcOffset < size) { 3280 bool isMalFormed = !isInRange(0u, size, srcOffset, mNALLengthSize); 3281 size_t nalLength = 0; 3282 if (!isMalFormed) { 3283 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3284 srcOffset += mNALLengthSize; 3285 isMalFormed = !isInRange(0u, size, srcOffset, nalLength); 3286 } 3287 3288 if (isMalFormed) { 3289 ALOGE("Video is malformed"); 3290 mBuffer->release(); 3291 mBuffer = NULL; 3292 return ERROR_MALFORMED; 3293 } 3294 3295 if (nalLength == 0) { 3296 continue; 3297 } 3298 3299 CHECK(dstOffset + 4 <= mBuffer->size()); 3300 3301 dstData[dstOffset++] = 0; 3302 dstData[dstOffset++] = 0; 3303 dstData[dstOffset++] = 0; 3304 dstData[dstOffset++] = 1; 3305 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3306 srcOffset += nalLength; 3307 dstOffset += nalLength; 3308 } 3309 CHECK_EQ(srcOffset, size); 3310 CHECK(mBuffer != NULL); 3311 mBuffer->set_range(0, dstOffset); 3312 } 3313 3314 mBuffer->meta_data()->clear(); 3315 mBuffer->meta_data()->setInt64( 3316 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3317 3318 if (targetSampleTimeUs >= 0) { 3319 mBuffer->meta_data()->setInt64( 3320 kKeyTargetTime, targetSampleTimeUs); 3321 } 3322 3323 if (isSyncSample) { 3324 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3325 } 3326 3327 ++mCurrentSampleIndex; 3328 3329 *out = mBuffer; 3330 mBuffer = NULL; 3331 3332 return OK; 3333 } 3334} 3335 3336status_t MPEG4Source::fragmentedRead( 3337 MediaBuffer **out, const ReadOptions *options) { 3338 3339 ALOGV("MPEG4Source::fragmentedRead"); 3340 3341 CHECK(mStarted); 3342 3343 *out = NULL; 3344 3345 int64_t targetSampleTimeUs = -1; 3346 3347 int64_t seekTimeUs; 3348 ReadOptions::SeekMode mode; 3349 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3350 3351 int numSidxEntries = mSegments.size(); 3352 if (numSidxEntries != 0) { 3353 int64_t totalTime = 0; 3354 off64_t totalOffset = mFirstMoofOffset; 3355 for (int i = 0; i < numSidxEntries; i++) { 3356 const SidxEntry *se = &mSegments[i]; 3357 if (totalTime + se->mDurationUs > seekTimeUs) { 3358 // The requested time is somewhere in this segment 3359 if ((mode == ReadOptions::SEEK_NEXT_SYNC) || 3360 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3361 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3362 // requested next sync, or closest sync and it was closer to the end of 3363 // this segment 3364 totalTime += se->mDurationUs; 3365 totalOffset += se->mSize; 3366 } 3367 break; 3368 } 3369 totalTime += se->mDurationUs; 3370 totalOffset += se->mSize; 3371 } 3372 mCurrentMoofOffset = totalOffset; 3373 mCurrentSamples.clear(); 3374 mCurrentSampleIndex = 0; 3375 parseChunk(&totalOffset); 3376 mCurrentTime = totalTime * mTimescale / 1000000ll; 3377 } 3378 3379 if (mBuffer != NULL) { 3380 mBuffer->release(); 3381 mBuffer = NULL; 3382 } 3383 3384 // fall through 3385 } 3386 3387 off64_t offset = 0; 3388 size_t size; 3389 uint32_t cts = 0; 3390 bool isSyncSample = false; 3391 bool newBuffer = false; 3392 if (mBuffer == NULL) { 3393 newBuffer = true; 3394 3395 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3396 // move to next fragment 3397 Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1]; 3398 off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size; 3399 mCurrentMoofOffset = nextMoof; 3400 mCurrentSamples.clear(); 3401 mCurrentSampleIndex = 0; 3402 parseChunk(&nextMoof); 3403 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3404 return ERROR_END_OF_STREAM; 3405 } 3406 } 3407 3408 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3409 offset = smpl->offset; 3410 size = smpl->size; 3411 cts = mCurrentTime; 3412 mCurrentTime += smpl->duration; 3413 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3414 3415 status_t err = mGroup->acquire_buffer(&mBuffer); 3416 3417 if (err != OK) { 3418 CHECK(mBuffer == NULL); 3419 ALOGV("acquire_buffer returned %d", err); 3420 return err; 3421 } 3422 } 3423 3424 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3425 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3426 bufmeta->clear(); 3427 if (smpl->encryptedsizes.size()) { 3428 // store clear/encrypted lengths in metadata 3429 bufmeta->setData(kKeyPlainSizes, 0, 3430 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3431 bufmeta->setData(kKeyEncryptedSizes, 0, 3432 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3433 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3434 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3435 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3436 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3437 } 3438 3439 if (!mIsAVC || mWantsNALFragments) { 3440 if (newBuffer) { 3441 ssize_t num_bytes_read = 3442 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3443 3444 if (num_bytes_read < (ssize_t)size) { 3445 mBuffer->release(); 3446 mBuffer = NULL; 3447 3448 ALOGV("i/o error"); 3449 return ERROR_IO; 3450 } 3451 3452 CHECK(mBuffer != NULL); 3453 mBuffer->set_range(0, size); 3454 mBuffer->meta_data()->setInt64( 3455 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3456 3457 if (targetSampleTimeUs >= 0) { 3458 mBuffer->meta_data()->setInt64( 3459 kKeyTargetTime, targetSampleTimeUs); 3460 } 3461 3462 if (isSyncSample) { 3463 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3464 } 3465 3466 ++mCurrentSampleIndex; 3467 } 3468 3469 if (!mIsAVC) { 3470 *out = mBuffer; 3471 mBuffer = NULL; 3472 3473 return OK; 3474 } 3475 3476 // Each NAL unit is split up into its constituent fragments and 3477 // each one of them returned in its own buffer. 3478 3479 CHECK(mBuffer->range_length() >= mNALLengthSize); 3480 3481 const uint8_t *src = 3482 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3483 3484 size_t nal_size = parseNALSize(src); 3485 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3486 ALOGE("incomplete NAL unit."); 3487 3488 mBuffer->release(); 3489 mBuffer = NULL; 3490 3491 return ERROR_MALFORMED; 3492 } 3493 3494 MediaBuffer *clone = mBuffer->clone(); 3495 CHECK(clone != NULL); 3496 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3497 3498 CHECK(mBuffer != NULL); 3499 mBuffer->set_range( 3500 mBuffer->range_offset() + mNALLengthSize + nal_size, 3501 mBuffer->range_length() - mNALLengthSize - nal_size); 3502 3503 if (mBuffer->range_length() == 0) { 3504 mBuffer->release(); 3505 mBuffer = NULL; 3506 } 3507 3508 *out = clone; 3509 3510 return OK; 3511 } else { 3512 ALOGV("whole NAL"); 3513 // Whole NAL units are returned but each fragment is prefixed by 3514 // the start code (0x00 00 00 01). 3515 ssize_t num_bytes_read = 0; 3516 int32_t drm = 0; 3517 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3518 if (usesDRM) { 3519 num_bytes_read = 3520 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3521 } else { 3522 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3523 } 3524 3525 if (num_bytes_read < (ssize_t)size) { 3526 mBuffer->release(); 3527 mBuffer = NULL; 3528 3529 ALOGV("i/o error"); 3530 return ERROR_IO; 3531 } 3532 3533 if (usesDRM) { 3534 CHECK(mBuffer != NULL); 3535 mBuffer->set_range(0, size); 3536 3537 } else { 3538 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3539 size_t srcOffset = 0; 3540 size_t dstOffset = 0; 3541 3542 while (srcOffset < size) { 3543 bool isMalFormed = !isInRange(0u, size, srcOffset, mNALLengthSize); 3544 size_t nalLength = 0; 3545 if (!isMalFormed) { 3546 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3547 srcOffset += mNALLengthSize; 3548 isMalFormed = !isInRange(0u, size, srcOffset, nalLength); 3549 } 3550 3551 if (isMalFormed) { 3552 ALOGE("Video is malformed"); 3553 mBuffer->release(); 3554 mBuffer = NULL; 3555 return ERROR_MALFORMED; 3556 } 3557 3558 if (nalLength == 0) { 3559 continue; 3560 } 3561 3562 CHECK(dstOffset + 4 <= mBuffer->size()); 3563 3564 dstData[dstOffset++] = 0; 3565 dstData[dstOffset++] = 0; 3566 dstData[dstOffset++] = 0; 3567 dstData[dstOffset++] = 1; 3568 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3569 srcOffset += nalLength; 3570 dstOffset += nalLength; 3571 } 3572 CHECK_EQ(srcOffset, size); 3573 CHECK(mBuffer != NULL); 3574 mBuffer->set_range(0, dstOffset); 3575 } 3576 3577 mBuffer->meta_data()->setInt64( 3578 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3579 3580 if (targetSampleTimeUs >= 0) { 3581 mBuffer->meta_data()->setInt64( 3582 kKeyTargetTime, targetSampleTimeUs); 3583 } 3584 3585 if (isSyncSample) { 3586 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3587 } 3588 3589 ++mCurrentSampleIndex; 3590 3591 *out = mBuffer; 3592 mBuffer = NULL; 3593 3594 return OK; 3595 } 3596} 3597 3598MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 3599 const char *mimePrefix) { 3600 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 3601 const char *mime; 3602 if (track->meta != NULL 3603 && track->meta->findCString(kKeyMIMEType, &mime) 3604 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 3605 return track; 3606 } 3607 } 3608 3609 return NULL; 3610} 3611 3612static bool LegacySniffMPEG4( 3613 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 3614 uint8_t header[8]; 3615 3616 ssize_t n = source->readAt(4, header, sizeof(header)); 3617 if (n < (ssize_t)sizeof(header)) { 3618 return false; 3619 } 3620 3621 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 3622 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 3623 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 3624 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 3625 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 3626 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 3627 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3628 *confidence = 0.4; 3629 3630 return true; 3631 } 3632 3633 return false; 3634} 3635 3636static bool isCompatibleBrand(uint32_t fourcc) { 3637 static const uint32_t kCompatibleBrands[] = { 3638 FOURCC('i', 's', 'o', 'm'), 3639 FOURCC('i', 's', 'o', '2'), 3640 FOURCC('a', 'v', 'c', '1'), 3641 FOURCC('3', 'g', 'p', '4'), 3642 FOURCC('m', 'p', '4', '1'), 3643 FOURCC('m', 'p', '4', '2'), 3644 3645 // Won't promise that the following file types can be played. 3646 // Just give these file types a chance. 3647 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 3648 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 3649 3650 FOURCC('3', 'g', '2', 'a'), // 3GPP2 3651 FOURCC('3', 'g', '2', 'b'), 3652 }; 3653 3654 for (size_t i = 0; 3655 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 3656 ++i) { 3657 if (kCompatibleBrands[i] == fourcc) { 3658 return true; 3659 } 3660 } 3661 3662 return false; 3663} 3664 3665// Attempt to actually parse the 'ftyp' atom and determine if a suitable 3666// compatible brand is present. 3667// Also try to identify where this file's metadata ends 3668// (end of the 'moov' atom) and report it to the caller as part of 3669// the metadata. 3670static bool BetterSniffMPEG4( 3671 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3672 sp<AMessage> *meta) { 3673 // We scan up to 128 bytes to identify this file as an MP4. 3674 static const off64_t kMaxScanOffset = 128ll; 3675 3676 off64_t offset = 0ll; 3677 bool foundGoodFileType = false; 3678 off64_t moovAtomEndOffset = -1ll; 3679 bool done = false; 3680 3681 while (!done && offset < kMaxScanOffset) { 3682 uint32_t hdr[2]; 3683 if (source->readAt(offset, hdr, 8) < 8) { 3684 return false; 3685 } 3686 3687 uint64_t chunkSize = ntohl(hdr[0]); 3688 uint32_t chunkType = ntohl(hdr[1]); 3689 off64_t chunkDataOffset = offset + 8; 3690 3691 if (chunkSize == 1) { 3692 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 3693 return false; 3694 } 3695 3696 chunkSize = ntoh64(chunkSize); 3697 chunkDataOffset += 8; 3698 3699 if (chunkSize < 16) { 3700 // The smallest valid chunk is 16 bytes long in this case. 3701 return false; 3702 } 3703 } else if (chunkSize < 8) { 3704 // The smallest valid chunk is 8 bytes long. 3705 return false; 3706 } 3707 3708 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 3709 3710 char chunkstring[5]; 3711 MakeFourCCString(chunkType, chunkstring); 3712 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 3713 switch (chunkType) { 3714 case FOURCC('f', 't', 'y', 'p'): 3715 { 3716 if (chunkDataSize < 8) { 3717 return false; 3718 } 3719 3720 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 3721 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 3722 if (i == 1) { 3723 // Skip this index, it refers to the minorVersion, 3724 // not a brand. 3725 continue; 3726 } 3727 3728 uint32_t brand; 3729 if (source->readAt( 3730 chunkDataOffset + 4 * i, &brand, 4) < 4) { 3731 return false; 3732 } 3733 3734 brand = ntohl(brand); 3735 3736 if (isCompatibleBrand(brand)) { 3737 foundGoodFileType = true; 3738 break; 3739 } 3740 } 3741 3742 if (!foundGoodFileType) { 3743 return false; 3744 } 3745 3746 break; 3747 } 3748 3749 case FOURCC('m', 'o', 'o', 'v'): 3750 { 3751 moovAtomEndOffset = offset + chunkSize; 3752 3753 done = true; 3754 break; 3755 } 3756 3757 default: 3758 break; 3759 } 3760 3761 offset += chunkSize; 3762 } 3763 3764 if (!foundGoodFileType) { 3765 return false; 3766 } 3767 3768 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3769 *confidence = 0.4f; 3770 3771 if (moovAtomEndOffset >= 0) { 3772 *meta = new AMessage; 3773 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 3774 3775 ALOGV("found metadata size: %lld", moovAtomEndOffset); 3776 } 3777 3778 return true; 3779} 3780 3781bool SniffMPEG4( 3782 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3783 sp<AMessage> *meta) { 3784 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 3785 return true; 3786 } 3787 3788 if (LegacySniffMPEG4(source, mimeType, confidence)) { 3789 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 3790 return true; 3791 } 3792 3793 return false; 3794} 3795 3796} // namespace android 3797