MPEG4Extractor.cpp revision 62df539321b3079f5ff11bb6aeaaab75ef307d40
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/MediaBuffer.h> 35#include <media/stagefright/MediaBufferGroup.h> 36#include <media/stagefright/MediaDefs.h> 37#include <media/stagefright/MediaSource.h> 38#include <media/stagefright/MetaData.h> 39#include <utils/String8.h> 40 41#include <byteswap.h> 42#include "include/ID3.h" 43 44namespace android { 45 46class MPEG4Source : public MediaSource { 47public: 48 // Caller retains ownership of both "dataSource" and "sampleTable". 49 MPEG4Source(const sp<MetaData> &format, 50 const sp<DataSource> &dataSource, 51 int32_t timeScale, 52 const sp<SampleTable> &sampleTable, 53 Vector<SidxEntry> &sidx, 54 off64_t firstMoofOffset); 55 56 virtual status_t start(MetaData *params = NULL); 57 virtual status_t stop(); 58 59 virtual sp<MetaData> getFormat(); 60 61 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 62 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 63 64protected: 65 virtual ~MPEG4Source(); 66 67private: 68 Mutex mLock; 69 70 sp<MetaData> mFormat; 71 sp<DataSource> mDataSource; 72 int32_t mTimescale; 73 sp<SampleTable> mSampleTable; 74 uint32_t mCurrentSampleIndex; 75 uint32_t mCurrentFragmentIndex; 76 Vector<SidxEntry> &mSegments; 77 off64_t mFirstMoofOffset; 78 off64_t mCurrentMoofOffset; 79 off64_t mNextMoofOffset; 80 uint32_t mCurrentTime; 81 int32_t mLastParsedTrackId; 82 int32_t mTrackId; 83 84 int32_t mCryptoMode; // passed in from extractor 85 int32_t mDefaultIVSize; // passed in from extractor 86 uint8_t mCryptoKey[16]; // passed in from extractor 87 uint32_t mCurrentAuxInfoType; 88 uint32_t mCurrentAuxInfoTypeParameter; 89 int32_t mCurrentDefaultSampleInfoSize; 90 uint32_t mCurrentSampleInfoCount; 91 uint32_t mCurrentSampleInfoAllocSize; 92 uint8_t* mCurrentSampleInfoSizes; 93 uint32_t mCurrentSampleInfoOffsetCount; 94 uint32_t mCurrentSampleInfoOffsetsAllocSize; 95 uint64_t* mCurrentSampleInfoOffsets; 96 97 bool mIsAVC; 98 bool mIsHEVC; 99 size_t mNALLengthSize; 100 101 bool mStarted; 102 103 MediaBufferGroup *mGroup; 104 105 MediaBuffer *mBuffer; 106 107 bool mWantsNALFragments; 108 109 uint8_t *mSrcBuffer; 110 111 size_t parseNALSize(const uint8_t *data) const; 112 status_t parseChunk(off64_t *offset); 113 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 114 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 115 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 116 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 117 118 struct TrackFragmentHeaderInfo { 119 enum Flags { 120 kBaseDataOffsetPresent = 0x01, 121 kSampleDescriptionIndexPresent = 0x02, 122 kDefaultSampleDurationPresent = 0x08, 123 kDefaultSampleSizePresent = 0x10, 124 kDefaultSampleFlagsPresent = 0x20, 125 kDurationIsEmpty = 0x10000, 126 }; 127 128 uint32_t mTrackID; 129 uint32_t mFlags; 130 uint64_t mBaseDataOffset; 131 uint32_t mSampleDescriptionIndex; 132 uint32_t mDefaultSampleDuration; 133 uint32_t mDefaultSampleSize; 134 uint32_t mDefaultSampleFlags; 135 136 uint64_t mDataOffset; 137 }; 138 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 139 140 struct Sample { 141 off64_t offset; 142 size_t size; 143 uint32_t duration; 144 uint8_t iv[16]; 145 Vector<size_t> clearsizes; 146 Vector<size_t> encryptedsizes; 147 }; 148 Vector<Sample> mCurrentSamples; 149 150 MPEG4Source(const MPEG4Source &); 151 MPEG4Source &operator=(const MPEG4Source &); 152}; 153 154// This custom data source wraps an existing one and satisfies requests 155// falling entirely within a cached range from the cache while forwarding 156// all remaining requests to the wrapped datasource. 157// This is used to cache the full sampletable metadata for a single track, 158// possibly wrapping multiple times to cover all tracks, i.e. 159// Each MPEG4DataSource caches the sampletable metadata for a single track. 160 161struct MPEG4DataSource : public DataSource { 162 MPEG4DataSource(const sp<DataSource> &source); 163 164 virtual status_t initCheck() const; 165 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 166 virtual status_t getSize(off64_t *size); 167 virtual uint32_t flags(); 168 169 status_t setCachedRange(off64_t offset, size_t size); 170 171protected: 172 virtual ~MPEG4DataSource(); 173 174private: 175 Mutex mLock; 176 177 sp<DataSource> mSource; 178 off64_t mCachedOffset; 179 size_t mCachedSize; 180 uint8_t *mCache; 181 182 void clearCache(); 183 184 MPEG4DataSource(const MPEG4DataSource &); 185 MPEG4DataSource &operator=(const MPEG4DataSource &); 186}; 187 188MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 189 : mSource(source), 190 mCachedOffset(0), 191 mCachedSize(0), 192 mCache(NULL) { 193} 194 195MPEG4DataSource::~MPEG4DataSource() { 196 clearCache(); 197} 198 199void MPEG4DataSource::clearCache() { 200 if (mCache) { 201 free(mCache); 202 mCache = NULL; 203 } 204 205 mCachedOffset = 0; 206 mCachedSize = 0; 207} 208 209status_t MPEG4DataSource::initCheck() const { 210 return mSource->initCheck(); 211} 212 213ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 214 Mutex::Autolock autoLock(mLock); 215 216 if (offset >= mCachedOffset 217 && offset + size <= mCachedOffset + mCachedSize) { 218 memcpy(data, &mCache[offset - mCachedOffset], size); 219 return size; 220 } 221 222 return mSource->readAt(offset, data, size); 223} 224 225status_t MPEG4DataSource::getSize(off64_t *size) { 226 return mSource->getSize(size); 227} 228 229uint32_t MPEG4DataSource::flags() { 230 return mSource->flags(); 231} 232 233status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 234 Mutex::Autolock autoLock(mLock); 235 236 clearCache(); 237 238 mCache = (uint8_t *)malloc(size); 239 240 if (mCache == NULL) { 241 return -ENOMEM; 242 } 243 244 mCachedOffset = offset; 245 mCachedSize = size; 246 247 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 248 249 if (err < (ssize_t)size) { 250 clearCache(); 251 252 return ERROR_IO; 253 } 254 255 return OK; 256} 257 258//////////////////////////////////////////////////////////////////////////////// 259 260static void hexdump(const void *_data, size_t size) { 261 const uint8_t *data = (const uint8_t *)_data; 262 size_t offset = 0; 263 while (offset < size) { 264 printf("0x%04zx ", offset); 265 266 size_t n = size - offset; 267 if (n > 16) { 268 n = 16; 269 } 270 271 for (size_t i = 0; i < 16; ++i) { 272 if (i == 8) { 273 printf(" "); 274 } 275 276 if (offset + i < size) { 277 printf("%02x ", data[offset + i]); 278 } else { 279 printf(" "); 280 } 281 } 282 283 printf(" "); 284 285 for (size_t i = 0; i < n; ++i) { 286 if (isprint(data[offset + i])) { 287 printf("%c", data[offset + i]); 288 } else { 289 printf("."); 290 } 291 } 292 293 printf("\n"); 294 295 offset += 16; 296 } 297} 298 299static const char *FourCC2MIME(uint32_t fourcc) { 300 switch (fourcc) { 301 case FOURCC('m', 'p', '4', 'a'): 302 return MEDIA_MIMETYPE_AUDIO_AAC; 303 304 case FOURCC('s', 'a', 'm', 'r'): 305 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 306 307 case FOURCC('s', 'a', 'w', 'b'): 308 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 309 310 case FOURCC('m', 'p', '4', 'v'): 311 return MEDIA_MIMETYPE_VIDEO_MPEG4; 312 313 case FOURCC('s', '2', '6', '3'): 314 case FOURCC('h', '2', '6', '3'): 315 case FOURCC('H', '2', '6', '3'): 316 return MEDIA_MIMETYPE_VIDEO_H263; 317 318 case FOURCC('a', 'v', 'c', '1'): 319 return MEDIA_MIMETYPE_VIDEO_AVC; 320 321 case FOURCC('h', 'v', 'c', '1'): 322 case FOURCC('h', 'e', 'v', '1'): 323 return MEDIA_MIMETYPE_VIDEO_HEVC; 324 default: 325 CHECK(!"should not be here."); 326 return NULL; 327 } 328} 329 330static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 331 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 332 // AMR NB audio is always mono, 8kHz 333 *channels = 1; 334 *rate = 8000; 335 return true; 336 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 337 // AMR WB audio is always mono, 16kHz 338 *channels = 1; 339 *rate = 16000; 340 return true; 341 } 342 return false; 343} 344 345MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 346 : mSidxDuration(0), 347 mMoofOffset(0), 348 mDataSource(source), 349 mInitCheck(NO_INIT), 350 mHasVideo(false), 351 mHeaderTimescale(0), 352 mFirstTrack(NULL), 353 mLastTrack(NULL), 354 mFileMetaData(new MetaData), 355 mFirstSINF(NULL), 356 mIsDrm(false) { 357} 358 359MPEG4Extractor::~MPEG4Extractor() { 360 Track *track = mFirstTrack; 361 while (track) { 362 Track *next = track->next; 363 364 delete track; 365 track = next; 366 } 367 mFirstTrack = mLastTrack = NULL; 368 369 SINF *sinf = mFirstSINF; 370 while (sinf) { 371 SINF *next = sinf->next; 372 delete sinf->IPMPData; 373 delete sinf; 374 sinf = next; 375 } 376 mFirstSINF = NULL; 377 378 for (size_t i = 0; i < mPssh.size(); i++) { 379 delete [] mPssh[i].data; 380 } 381} 382 383uint32_t MPEG4Extractor::flags() const { 384 return CAN_PAUSE | 385 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 386 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 387} 388 389sp<MetaData> MPEG4Extractor::getMetaData() { 390 status_t err; 391 if ((err = readMetaData()) != OK) { 392 return new MetaData; 393 } 394 395 return mFileMetaData; 396} 397 398size_t MPEG4Extractor::countTracks() { 399 status_t err; 400 if ((err = readMetaData()) != OK) { 401 ALOGV("MPEG4Extractor::countTracks: no tracks"); 402 return 0; 403 } 404 405 size_t n = 0; 406 Track *track = mFirstTrack; 407 while (track) { 408 ++n; 409 track = track->next; 410 } 411 412 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 413 return n; 414} 415 416sp<MetaData> MPEG4Extractor::getTrackMetaData( 417 size_t index, uint32_t flags) { 418 status_t err; 419 if ((err = readMetaData()) != OK) { 420 return NULL; 421 } 422 423 Track *track = mFirstTrack; 424 while (index > 0) { 425 if (track == NULL) { 426 return NULL; 427 } 428 429 track = track->next; 430 --index; 431 } 432 433 if (track == NULL) { 434 return NULL; 435 } 436 437 if ((flags & kIncludeExtensiveMetaData) 438 && !track->includes_expensive_metadata) { 439 track->includes_expensive_metadata = true; 440 441 const char *mime; 442 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 443 if (!strncasecmp("video/", mime, 6)) { 444 if (mMoofOffset > 0) { 445 int64_t duration; 446 if (track->meta->findInt64(kKeyDuration, &duration)) { 447 // nothing fancy, just pick a frame near 1/4th of the duration 448 track->meta->setInt64( 449 kKeyThumbnailTime, duration / 4); 450 } 451 } else { 452 uint32_t sampleIndex; 453 uint32_t sampleTime; 454 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 455 && track->sampleTable->getMetaDataForSample( 456 sampleIndex, NULL /* offset */, NULL /* size */, 457 &sampleTime) == OK) { 458 track->meta->setInt64( 459 kKeyThumbnailTime, 460 ((int64_t)sampleTime * 1000000) / track->timescale); 461 } 462 } 463 } 464 } 465 466 return track->meta; 467} 468 469static void MakeFourCCString(uint32_t x, char *s) { 470 s[0] = x >> 24; 471 s[1] = (x >> 16) & 0xff; 472 s[2] = (x >> 8) & 0xff; 473 s[3] = x & 0xff; 474 s[4] = '\0'; 475} 476 477status_t MPEG4Extractor::readMetaData() { 478 if (mInitCheck != NO_INIT) { 479 return mInitCheck; 480 } 481 482 off64_t offset = 0; 483 status_t err; 484 while (true) { 485 off64_t orig_offset = offset; 486 err = parseChunk(&offset, 0); 487 488 if (offset <= orig_offset) { 489 // only continue parsing if the offset was advanced, 490 // otherwise we might end up in an infinite loop 491 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 492 err = ERROR_MALFORMED; 493 break; 494 } else if (err == OK) { 495 continue; 496 } else if (err != UNKNOWN_ERROR) { 497 break; 498 } 499 uint32_t hdr[2]; 500 if (mDataSource->readAt(offset, hdr, 8) < 8) { 501 break; 502 } 503 uint32_t chunk_type = ntohl(hdr[1]); 504 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 505 // store the offset of the first segment 506 mMoofOffset = offset; 507 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 508 // keep parsing until we get to the data 509 continue; 510 } 511 break; 512 } 513 514 if (mInitCheck == OK) { 515 if (mHasVideo) { 516 mFileMetaData->setCString( 517 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 518 } else { 519 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 520 } 521 } else { 522 mInitCheck = err; 523 } 524 525 CHECK_NE(err, (status_t)NO_INIT); 526 527 // copy pssh data into file metadata 528 int psshsize = 0; 529 for (size_t i = 0; i < mPssh.size(); i++) { 530 psshsize += 20 + mPssh[i].datalen; 531 } 532 if (psshsize) { 533 char *buf = (char*)malloc(psshsize); 534 char *ptr = buf; 535 for (size_t i = 0; i < mPssh.size(); i++) { 536 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 537 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 538 ptr += (20 + mPssh[i].datalen); 539 } 540 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 541 free(buf); 542 } 543 return mInitCheck; 544} 545 546char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 547 if (mFirstSINF == NULL) { 548 return NULL; 549 } 550 551 SINF *sinf = mFirstSINF; 552 while (sinf && (trackID != sinf->trackID)) { 553 sinf = sinf->next; 554 } 555 556 if (sinf == NULL) { 557 return NULL; 558 } 559 560 *len = sinf->len; 561 return sinf->IPMPData; 562} 563 564// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 565static int32_t readSize(off64_t offset, 566 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 567 uint32_t size = 0; 568 uint8_t data; 569 bool moreData = true; 570 *numOfBytes = 0; 571 572 while (moreData) { 573 if (DataSource->readAt(offset, &data, 1) < 1) { 574 return -1; 575 } 576 offset ++; 577 moreData = (data >= 128) ? true : false; 578 size = (size << 7) | (data & 0x7f); // Take last 7 bits 579 (*numOfBytes) ++; 580 } 581 582 return size; 583} 584 585status_t MPEG4Extractor::parseDrmSINF( 586 off64_t * /* offset */, off64_t data_offset) { 587 uint8_t updateIdTag; 588 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 589 return ERROR_IO; 590 } 591 data_offset ++; 592 593 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 594 return ERROR_MALFORMED; 595 } 596 597 uint8_t numOfBytes; 598 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 599 if (size < 0) { 600 return ERROR_IO; 601 } 602 int32_t classSize = size; 603 data_offset += numOfBytes; 604 605 while(size >= 11 ) { 606 uint8_t descriptorTag; 607 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 608 return ERROR_IO; 609 } 610 data_offset ++; 611 612 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 613 return ERROR_MALFORMED; 614 } 615 616 uint8_t buffer[8]; 617 //ObjectDescriptorID and ObjectDescriptor url flag 618 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 619 return ERROR_IO; 620 } 621 data_offset += 2; 622 623 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 624 return ERROR_MALFORMED; 625 } 626 627 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 628 return ERROR_IO; 629 } 630 data_offset += 8; 631 632 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 633 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 634 return ERROR_MALFORMED; 635 } 636 637 SINF *sinf = new SINF; 638 sinf->trackID = U16_AT(&buffer[3]); 639 sinf->IPMPDescriptorID = buffer[7]; 640 sinf->next = mFirstSINF; 641 mFirstSINF = sinf; 642 643 size -= (8 + 2 + 1); 644 } 645 646 if (size != 0) { 647 return ERROR_MALFORMED; 648 } 649 650 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 651 return ERROR_IO; 652 } 653 data_offset ++; 654 655 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 656 return ERROR_MALFORMED; 657 } 658 659 size = readSize(data_offset, mDataSource, &numOfBytes); 660 if (size < 0) { 661 return ERROR_IO; 662 } 663 classSize = size; 664 data_offset += numOfBytes; 665 666 while (size > 0) { 667 uint8_t tag; 668 int32_t dataLen; 669 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 670 return ERROR_IO; 671 } 672 data_offset ++; 673 674 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 675 uint8_t id; 676 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 677 if (dataLen < 0) { 678 return ERROR_IO; 679 } else if (dataLen < 4) { 680 return ERROR_MALFORMED; 681 } 682 data_offset += numOfBytes; 683 684 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 685 return ERROR_IO; 686 } 687 data_offset ++; 688 689 SINF *sinf = mFirstSINF; 690 while (sinf && (sinf->IPMPDescriptorID != id)) { 691 sinf = sinf->next; 692 } 693 if (sinf == NULL) { 694 return ERROR_MALFORMED; 695 } 696 sinf->len = dataLen - 3; 697 sinf->IPMPData = new char[sinf->len]; 698 data_offset += 2; 699 700 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 701 return ERROR_IO; 702 } 703 data_offset += sinf->len; 704 705 size -= (dataLen + numOfBytes + 1); 706 } 707 } 708 709 if (size != 0) { 710 return ERROR_MALFORMED; 711 } 712 713 return UNKNOWN_ERROR; // Return a dummy error. 714} 715 716struct PathAdder { 717 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 718 : mPath(path) { 719 mPath->push(chunkType); 720 } 721 722 ~PathAdder() { 723 mPath->pop(); 724 } 725 726private: 727 Vector<uint32_t> *mPath; 728 729 PathAdder(const PathAdder &); 730 PathAdder &operator=(const PathAdder &); 731}; 732 733static bool underMetaDataPath(const Vector<uint32_t> &path) { 734 return path.size() >= 5 735 && path[0] == FOURCC('m', 'o', 'o', 'v') 736 && path[1] == FOURCC('u', 'd', 't', 'a') 737 && path[2] == FOURCC('m', 'e', 't', 'a') 738 && path[3] == FOURCC('i', 'l', 's', 't'); 739} 740 741// Given a time in seconds since Jan 1 1904, produce a human-readable string. 742static void convertTimeToDate(int64_t time_1904, String8 *s) { 743 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 744 745 char tmp[32]; 746 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 747 748 s->setTo(tmp); 749} 750 751status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 752 ALOGV("entering parseChunk %lld/%d", *offset, depth); 753 uint32_t hdr[2]; 754 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 755 return ERROR_IO; 756 } 757 uint64_t chunk_size = ntohl(hdr[0]); 758 uint32_t chunk_type = ntohl(hdr[1]); 759 off64_t data_offset = *offset + 8; 760 761 if (chunk_size == 1) { 762 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 763 return ERROR_IO; 764 } 765 chunk_size = ntoh64(chunk_size); 766 data_offset += 8; 767 768 if (chunk_size < 16) { 769 // The smallest valid chunk is 16 bytes long in this case. 770 return ERROR_MALFORMED; 771 } 772 } else if (chunk_size == 0) { 773 if (depth == 0) { 774 // atom extends to end of file 775 off64_t sourceSize; 776 if (mDataSource->getSize(&sourceSize) == OK) { 777 chunk_size = (sourceSize - *offset); 778 } else { 779 // XXX could we just pick a "sufficiently large" value here? 780 ALOGE("atom size is 0, and data source has no size"); 781 return ERROR_MALFORMED; 782 } 783 } else { 784 // not allowed for non-toplevel atoms, skip it 785 *offset += 4; 786 return OK; 787 } 788 } else if (chunk_size < 8) { 789 // The smallest valid chunk is 8 bytes long. 790 ALOGE("invalid chunk size: %d", int(chunk_size)); 791 return ERROR_MALFORMED; 792 } 793 794 char chunk[5]; 795 MakeFourCCString(chunk_type, chunk); 796 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 797 798#if 0 799 static const char kWhitespace[] = " "; 800 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 801 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 802 803 char buffer[256]; 804 size_t n = chunk_size; 805 if (n > sizeof(buffer)) { 806 n = sizeof(buffer); 807 } 808 if (mDataSource->readAt(*offset, buffer, n) 809 < (ssize_t)n) { 810 return ERROR_IO; 811 } 812 813 hexdump(buffer, n); 814#endif 815 816 PathAdder autoAdder(&mPath, chunk_type); 817 818 off64_t chunk_data_size = *offset + chunk_size - data_offset; 819 820 if (chunk_type != FOURCC('c', 'p', 'r', 't') 821 && chunk_type != FOURCC('c', 'o', 'v', 'r') 822 && mPath.size() == 5 && underMetaDataPath(mPath)) { 823 off64_t stop_offset = *offset + chunk_size; 824 *offset = data_offset; 825 while (*offset < stop_offset) { 826 status_t err = parseChunk(offset, depth + 1); 827 if (err != OK) { 828 return err; 829 } 830 } 831 832 if (*offset != stop_offset) { 833 return ERROR_MALFORMED; 834 } 835 836 return OK; 837 } 838 839 switch(chunk_type) { 840 case FOURCC('m', 'o', 'o', 'v'): 841 case FOURCC('t', 'r', 'a', 'k'): 842 case FOURCC('m', 'd', 'i', 'a'): 843 case FOURCC('m', 'i', 'n', 'f'): 844 case FOURCC('d', 'i', 'n', 'f'): 845 case FOURCC('s', 't', 'b', 'l'): 846 case FOURCC('m', 'v', 'e', 'x'): 847 case FOURCC('m', 'o', 'o', 'f'): 848 case FOURCC('t', 'r', 'a', 'f'): 849 case FOURCC('m', 'f', 'r', 'a'): 850 case FOURCC('u', 'd', 't', 'a'): 851 case FOURCC('i', 'l', 's', 't'): 852 case FOURCC('s', 'i', 'n', 'f'): 853 case FOURCC('s', 'c', 'h', 'i'): 854 case FOURCC('e', 'd', 't', 's'): 855 { 856 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 857 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 858 859 if (mDataSource->flags() 860 & (DataSource::kWantsPrefetching 861 | DataSource::kIsCachingDataSource)) { 862 sp<MPEG4DataSource> cachedSource = 863 new MPEG4DataSource(mDataSource); 864 865 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 866 mDataSource = cachedSource; 867 } 868 } 869 870 mLastTrack->sampleTable = new SampleTable(mDataSource); 871 } 872 873 bool isTrack = false; 874 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 875 isTrack = true; 876 877 Track *track = new Track; 878 track->next = NULL; 879 if (mLastTrack) { 880 mLastTrack->next = track; 881 } else { 882 mFirstTrack = track; 883 } 884 mLastTrack = track; 885 886 track->meta = new MetaData; 887 track->includes_expensive_metadata = false; 888 track->skipTrack = false; 889 track->timescale = 0; 890 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 891 } 892 893 off64_t stop_offset = *offset + chunk_size; 894 *offset = data_offset; 895 while (*offset < stop_offset) { 896 status_t err = parseChunk(offset, depth + 1); 897 if (err != OK) { 898 return err; 899 } 900 } 901 902 if (*offset != stop_offset) { 903 return ERROR_MALFORMED; 904 } 905 906 if (isTrack) { 907 if (mLastTrack->skipTrack) { 908 Track *cur = mFirstTrack; 909 910 if (cur == mLastTrack) { 911 delete cur; 912 mFirstTrack = mLastTrack = NULL; 913 } else { 914 while (cur && cur->next != mLastTrack) { 915 cur = cur->next; 916 } 917 cur->next = NULL; 918 delete mLastTrack; 919 mLastTrack = cur; 920 } 921 922 return OK; 923 } 924 925 status_t err = verifyTrack(mLastTrack); 926 927 if (err != OK) { 928 return err; 929 } 930 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 931 mInitCheck = OK; 932 933 if (!mIsDrm) { 934 return UNKNOWN_ERROR; // Return a dummy error. 935 } else { 936 return OK; 937 } 938 } 939 break; 940 } 941 942 case FOURCC('e', 'l', 's', 't'): 943 { 944 *offset += chunk_size; 945 946 // See 14496-12 8.6.6 947 uint8_t version; 948 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 949 return ERROR_IO; 950 } 951 952 uint32_t entry_count; 953 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 954 return ERROR_IO; 955 } 956 957 if (entry_count != 1) { 958 // we only support a single entry at the moment, for gapless playback 959 ALOGW("ignoring edit list with %d entries", entry_count); 960 } else if (mHeaderTimescale == 0) { 961 ALOGW("ignoring edit list because timescale is 0"); 962 } else { 963 off64_t entriesoffset = data_offset + 8; 964 uint64_t segment_duration; 965 int64_t media_time; 966 967 if (version == 1) { 968 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 969 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 970 return ERROR_IO; 971 } 972 } else if (version == 0) { 973 uint32_t sd; 974 int32_t mt; 975 if (!mDataSource->getUInt32(entriesoffset, &sd) || 976 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 977 return ERROR_IO; 978 } 979 segment_duration = sd; 980 media_time = mt; 981 } else { 982 return ERROR_IO; 983 } 984 985 uint64_t halfscale = mHeaderTimescale / 2; 986 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 987 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 988 989 int64_t duration; 990 int32_t samplerate; 991 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 992 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 993 994 int64_t delay = (media_time * samplerate + 500000) / 1000000; 995 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 996 997 int64_t paddingus = duration - (segment_duration + media_time); 998 if (paddingus < 0) { 999 // track duration from media header (which is what kKeyDuration is) might 1000 // be slightly shorter than the segment duration, which would make the 1001 // padding negative. Clamp to zero. 1002 paddingus = 0; 1003 } 1004 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1005 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1006 } 1007 } 1008 break; 1009 } 1010 1011 case FOURCC('f', 'r', 'm', 'a'): 1012 { 1013 *offset += chunk_size; 1014 1015 uint32_t original_fourcc; 1016 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1017 return ERROR_IO; 1018 } 1019 original_fourcc = ntohl(original_fourcc); 1020 ALOGV("read original format: %d", original_fourcc); 1021 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1022 uint32_t num_channels = 0; 1023 uint32_t sample_rate = 0; 1024 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1025 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1026 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1027 } 1028 break; 1029 } 1030 1031 case FOURCC('t', 'e', 'n', 'c'): 1032 { 1033 *offset += chunk_size; 1034 1035 if (chunk_size < 32) { 1036 return ERROR_MALFORMED; 1037 } 1038 1039 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1040 // default IV size, 16 bytes default KeyID 1041 // (ISO 23001-7) 1042 char buf[4]; 1043 memset(buf, 0, 4); 1044 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1045 return ERROR_IO; 1046 } 1047 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1048 if (defaultAlgorithmId > 1) { 1049 // only 0 (clear) and 1 (AES-128) are valid 1050 return ERROR_MALFORMED; 1051 } 1052 1053 memset(buf, 0, 4); 1054 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1055 return ERROR_IO; 1056 } 1057 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1058 1059 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1060 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1061 // only unencrypted data must have 0 IV size 1062 return ERROR_MALFORMED; 1063 } else if (defaultIVSize != 0 && 1064 defaultIVSize != 8 && 1065 defaultIVSize != 16) { 1066 // only supported sizes are 0, 8 and 16 1067 return ERROR_MALFORMED; 1068 } 1069 1070 uint8_t defaultKeyId[16]; 1071 1072 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1073 return ERROR_IO; 1074 } 1075 1076 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1077 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1078 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1079 break; 1080 } 1081 1082 case FOURCC('t', 'k', 'h', 'd'): 1083 { 1084 *offset += chunk_size; 1085 1086 status_t err; 1087 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1088 return err; 1089 } 1090 1091 break; 1092 } 1093 1094 case FOURCC('p', 's', 's', 'h'): 1095 { 1096 *offset += chunk_size; 1097 1098 PsshInfo pssh; 1099 1100 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1101 return ERROR_IO; 1102 } 1103 1104 uint32_t psshdatalen = 0; 1105 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1106 return ERROR_IO; 1107 } 1108 pssh.datalen = ntohl(psshdatalen); 1109 ALOGV("pssh data size: %d", pssh.datalen); 1110 if (pssh.datalen + 20 > chunk_size) { 1111 // pssh data length exceeds size of containing box 1112 return ERROR_MALFORMED; 1113 } 1114 1115 pssh.data = new uint8_t[pssh.datalen]; 1116 ALOGV("allocated pssh @ %p", pssh.data); 1117 ssize_t requested = (ssize_t) pssh.datalen; 1118 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1119 return ERROR_IO; 1120 } 1121 mPssh.push_back(pssh); 1122 1123 break; 1124 } 1125 1126 case FOURCC('m', 'd', 'h', 'd'): 1127 { 1128 *offset += chunk_size; 1129 1130 if (chunk_data_size < 4) { 1131 return ERROR_MALFORMED; 1132 } 1133 1134 uint8_t version; 1135 if (mDataSource->readAt( 1136 data_offset, &version, sizeof(version)) 1137 < (ssize_t)sizeof(version)) { 1138 return ERROR_IO; 1139 } 1140 1141 off64_t timescale_offset; 1142 1143 if (version == 1) { 1144 timescale_offset = data_offset + 4 + 16; 1145 } else if (version == 0) { 1146 timescale_offset = data_offset + 4 + 8; 1147 } else { 1148 return ERROR_IO; 1149 } 1150 1151 uint32_t timescale; 1152 if (mDataSource->readAt( 1153 timescale_offset, ×cale, sizeof(timescale)) 1154 < (ssize_t)sizeof(timescale)) { 1155 return ERROR_IO; 1156 } 1157 1158 mLastTrack->timescale = ntohl(timescale); 1159 1160 int64_t duration = 0; 1161 if (version == 1) { 1162 if (mDataSource->readAt( 1163 timescale_offset + 4, &duration, sizeof(duration)) 1164 < (ssize_t)sizeof(duration)) { 1165 return ERROR_IO; 1166 } 1167 duration = ntoh64(duration); 1168 } else { 1169 uint32_t duration32; 1170 if (mDataSource->readAt( 1171 timescale_offset + 4, &duration32, sizeof(duration32)) 1172 < (ssize_t)sizeof(duration32)) { 1173 return ERROR_IO; 1174 } 1175 // ffmpeg sets duration to -1, which is incorrect. 1176 if (duration32 != 0xffffffff) { 1177 duration = ntohl(duration32); 1178 } 1179 } 1180 mLastTrack->meta->setInt64( 1181 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1182 1183 uint8_t lang[2]; 1184 off64_t lang_offset; 1185 if (version == 1) { 1186 lang_offset = timescale_offset + 4 + 8; 1187 } else if (version == 0) { 1188 lang_offset = timescale_offset + 4 + 4; 1189 } else { 1190 return ERROR_IO; 1191 } 1192 1193 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1194 < (ssize_t)sizeof(lang)) { 1195 return ERROR_IO; 1196 } 1197 1198 // To get the ISO-639-2/T three character language code 1199 // 1 bit pad followed by 3 5-bits characters. Each character 1200 // is packed as the difference between its ASCII value and 0x60. 1201 char lang_code[4]; 1202 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1203 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1204 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1205 lang_code[3] = '\0'; 1206 1207 mLastTrack->meta->setCString( 1208 kKeyMediaLanguage, lang_code); 1209 1210 break; 1211 } 1212 1213 case FOURCC('s', 't', 's', 'd'): 1214 { 1215 if (chunk_data_size < 8) { 1216 return ERROR_MALFORMED; 1217 } 1218 1219 uint8_t buffer[8]; 1220 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1221 return ERROR_MALFORMED; 1222 } 1223 1224 if (mDataSource->readAt( 1225 data_offset, buffer, 8) < 8) { 1226 return ERROR_IO; 1227 } 1228 1229 if (U32_AT(buffer) != 0) { 1230 // Should be version 0, flags 0. 1231 return ERROR_MALFORMED; 1232 } 1233 1234 uint32_t entry_count = U32_AT(&buffer[4]); 1235 1236 if (entry_count > 1) { 1237 // For 3GPP timed text, there could be multiple tx3g boxes contain 1238 // multiple text display formats. These formats will be used to 1239 // display the timed text. 1240 // For encrypted files, there may also be more than one entry. 1241 const char *mime; 1242 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1243 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1244 strcasecmp(mime, "application/octet-stream")) { 1245 // For now we only support a single type of media per track. 1246 mLastTrack->skipTrack = true; 1247 *offset += chunk_size; 1248 break; 1249 } 1250 } 1251 off64_t stop_offset = *offset + chunk_size; 1252 *offset = data_offset + 8; 1253 for (uint32_t i = 0; i < entry_count; ++i) { 1254 status_t err = parseChunk(offset, depth + 1); 1255 if (err != OK) { 1256 return err; 1257 } 1258 } 1259 1260 if (*offset != stop_offset) { 1261 return ERROR_MALFORMED; 1262 } 1263 break; 1264 } 1265 1266 case FOURCC('m', 'p', '4', 'a'): 1267 case FOURCC('e', 'n', 'c', 'a'): 1268 case FOURCC('s', 'a', 'm', 'r'): 1269 case FOURCC('s', 'a', 'w', 'b'): 1270 { 1271 uint8_t buffer[8 + 20]; 1272 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1273 // Basic AudioSampleEntry size. 1274 return ERROR_MALFORMED; 1275 } 1276 1277 if (mDataSource->readAt( 1278 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1279 return ERROR_IO; 1280 } 1281 1282 uint16_t data_ref_index = U16_AT(&buffer[6]); 1283 uint32_t num_channels = U16_AT(&buffer[16]); 1284 1285 uint16_t sample_size = U16_AT(&buffer[18]); 1286 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1287 1288 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1289 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1290 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1291 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1292 } 1293 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1294 chunk, num_channels, sample_size, sample_rate); 1295 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1296 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1297 1298 off64_t stop_offset = *offset + chunk_size; 1299 *offset = data_offset + sizeof(buffer); 1300 while (*offset < stop_offset) { 1301 status_t err = parseChunk(offset, depth + 1); 1302 if (err != OK) { 1303 return err; 1304 } 1305 } 1306 1307 if (*offset != stop_offset) { 1308 return ERROR_MALFORMED; 1309 } 1310 break; 1311 } 1312 1313 case FOURCC('m', 'p', '4', 'v'): 1314 case FOURCC('e', 'n', 'c', 'v'): 1315 case FOURCC('s', '2', '6', '3'): 1316 case FOURCC('H', '2', '6', '3'): 1317 case FOURCC('h', '2', '6', '3'): 1318 case FOURCC('a', 'v', 'c', '1'): 1319 case FOURCC('h', 'v', 'c', '1'): 1320 case FOURCC('h', 'e', 'v', '1'): 1321 { 1322 mHasVideo = true; 1323 1324 uint8_t buffer[78]; 1325 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1326 // Basic VideoSampleEntry size. 1327 return ERROR_MALFORMED; 1328 } 1329 1330 if (mDataSource->readAt( 1331 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1332 return ERROR_IO; 1333 } 1334 1335 uint16_t data_ref_index = U16_AT(&buffer[6]); 1336 uint16_t width = U16_AT(&buffer[6 + 18]); 1337 uint16_t height = U16_AT(&buffer[6 + 20]); 1338 1339 // The video sample is not standard-compliant if it has invalid dimension. 1340 // Use some default width and height value, and 1341 // let the decoder figure out the actual width and height (and thus 1342 // be prepared for INFO_FOMRAT_CHANGED event). 1343 if (width == 0) width = 352; 1344 if (height == 0) height = 288; 1345 1346 // printf("*** coding='%s' width=%d height=%d\n", 1347 // chunk, width, height); 1348 1349 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1350 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1351 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1352 } 1353 mLastTrack->meta->setInt32(kKeyWidth, width); 1354 mLastTrack->meta->setInt32(kKeyHeight, height); 1355 1356 off64_t stop_offset = *offset + chunk_size; 1357 *offset = data_offset + sizeof(buffer); 1358 while (*offset < stop_offset) { 1359 status_t err = parseChunk(offset, depth + 1); 1360 if (err != OK) { 1361 return err; 1362 } 1363 } 1364 1365 if (*offset != stop_offset) { 1366 return ERROR_MALFORMED; 1367 } 1368 break; 1369 } 1370 1371 case FOURCC('s', 't', 'c', 'o'): 1372 case FOURCC('c', 'o', '6', '4'): 1373 { 1374 status_t err = 1375 mLastTrack->sampleTable->setChunkOffsetParams( 1376 chunk_type, data_offset, chunk_data_size); 1377 1378 *offset += chunk_size; 1379 1380 if (err != OK) { 1381 return err; 1382 } 1383 1384 break; 1385 } 1386 1387 case FOURCC('s', 't', 's', 'c'): 1388 { 1389 status_t err = 1390 mLastTrack->sampleTable->setSampleToChunkParams( 1391 data_offset, chunk_data_size); 1392 1393 *offset += chunk_size; 1394 1395 if (err != OK) { 1396 return err; 1397 } 1398 1399 break; 1400 } 1401 1402 case FOURCC('s', 't', 's', 'z'): 1403 case FOURCC('s', 't', 'z', '2'): 1404 { 1405 status_t err = 1406 mLastTrack->sampleTable->setSampleSizeParams( 1407 chunk_type, data_offset, chunk_data_size); 1408 1409 *offset += chunk_size; 1410 1411 if (err != OK) { 1412 return err; 1413 } 1414 1415 size_t max_size; 1416 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1417 1418 if (err != OK) { 1419 return err; 1420 } 1421 1422 if (max_size != 0) { 1423 // Assume that a given buffer only contains at most 10 chunks, 1424 // each chunk originally prefixed with a 2 byte length will 1425 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1426 // and thus will grow by 2 bytes per chunk. 1427 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1428 } else { 1429 // No size was specified. Pick a conservatively large size. 1430 int32_t width, height; 1431 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1432 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1433 ALOGE("No width or height, assuming worst case 1080p"); 1434 width = 1920; 1435 height = 1080; 1436 } 1437 1438 const char *mime; 1439 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1440 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1441 // AVC requires compression ratio of at least 2, and uses 1442 // macroblocks 1443 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1444 } else { 1445 // For all other formats there is no minimum compression 1446 // ratio. Use compression ratio of 1. 1447 max_size = width * height * 3 / 2; 1448 } 1449 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1450 } 1451 1452 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1453 // mimetype) previously obtained, so don't cache them. 1454 const char *mime; 1455 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1456 // Calculate average frame rate. 1457 if (!strncasecmp("video/", mime, 6)) { 1458 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1459 int64_t durationUs; 1460 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1461 if (durationUs > 0) { 1462 int32_t frameRate = (nSamples * 1000000LL + 1463 (durationUs >> 1)) / durationUs; 1464 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1465 } 1466 } 1467 } 1468 1469 break; 1470 } 1471 1472 case FOURCC('s', 't', 't', 's'): 1473 { 1474 *offset += chunk_size; 1475 1476 status_t err = 1477 mLastTrack->sampleTable->setTimeToSampleParams( 1478 data_offset, chunk_data_size); 1479 1480 if (err != OK) { 1481 return err; 1482 } 1483 1484 break; 1485 } 1486 1487 case FOURCC('c', 't', 't', 's'): 1488 { 1489 *offset += chunk_size; 1490 1491 status_t err = 1492 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1493 data_offset, chunk_data_size); 1494 1495 if (err != OK) { 1496 return err; 1497 } 1498 1499 break; 1500 } 1501 1502 case FOURCC('s', 't', 's', 's'): 1503 { 1504 *offset += chunk_size; 1505 1506 status_t err = 1507 mLastTrack->sampleTable->setSyncSampleParams( 1508 data_offset, chunk_data_size); 1509 1510 if (err != OK) { 1511 return err; 1512 } 1513 1514 break; 1515 } 1516 1517 // @xyz 1518 case FOURCC('\xA9', 'x', 'y', 'z'): 1519 { 1520 *offset += chunk_size; 1521 1522 // Best case the total data length inside "@xyz" box 1523 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1524 // where "\x00\x04" is the text string length with value = 4, 1525 // "\0x15\xc7" is the language code = en, and "0+0" is a 1526 // location (string) value with longitude = 0 and latitude = 0. 1527 if (chunk_data_size < 8) { 1528 return ERROR_MALFORMED; 1529 } 1530 1531 // Worst case the location string length would be 18, 1532 // for instance +90.0000-180.0000, without the trailing "/" and 1533 // the string length + language code. 1534 char buffer[18]; 1535 1536 // Substracting 5 from the data size is because the text string length + 1537 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1538 off64_t location_length = chunk_data_size - 5; 1539 if (location_length >= (off64_t) sizeof(buffer)) { 1540 return ERROR_MALFORMED; 1541 } 1542 1543 if (mDataSource->readAt( 1544 data_offset + 4, buffer, location_length) < location_length) { 1545 return ERROR_IO; 1546 } 1547 1548 buffer[location_length] = '\0'; 1549 mFileMetaData->setCString(kKeyLocation, buffer); 1550 break; 1551 } 1552 1553 case FOURCC('e', 's', 'd', 's'): 1554 { 1555 *offset += chunk_size; 1556 1557 if (chunk_data_size < 4) { 1558 return ERROR_MALFORMED; 1559 } 1560 1561 uint8_t buffer[256]; 1562 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1563 return ERROR_BUFFER_TOO_SMALL; 1564 } 1565 1566 if (mDataSource->readAt( 1567 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1568 return ERROR_IO; 1569 } 1570 1571 if (U32_AT(buffer) != 0) { 1572 // Should be version 0, flags 0. 1573 return ERROR_MALFORMED; 1574 } 1575 1576 mLastTrack->meta->setData( 1577 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1578 1579 if (mPath.size() >= 2 1580 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1581 // Information from the ESDS must be relied on for proper 1582 // setup of sample rate and channel count for MPEG4 Audio. 1583 // The generic header appears to only contain generic 1584 // information... 1585 1586 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1587 &buffer[4], chunk_data_size - 4); 1588 1589 if (err != OK) { 1590 return err; 1591 } 1592 } 1593 1594 break; 1595 } 1596 1597 case FOURCC('a', 'v', 'c', 'C'): 1598 { 1599 *offset += chunk_size; 1600 1601 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1602 1603 if (mDataSource->readAt( 1604 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1605 return ERROR_IO; 1606 } 1607 1608 mLastTrack->meta->setData( 1609 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1610 1611 break; 1612 } 1613 case FOURCC('h', 'v', 'c', 'C'): 1614 { 1615 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1616 1617 if (mDataSource->readAt( 1618 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1619 return ERROR_IO; 1620 } 1621 1622 mLastTrack->meta->setData( 1623 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1624 1625 *offset += chunk_size; 1626 break; 1627 } 1628 1629 case FOURCC('d', '2', '6', '3'): 1630 { 1631 *offset += chunk_size; 1632 /* 1633 * d263 contains a fixed 7 bytes part: 1634 * vendor - 4 bytes 1635 * version - 1 byte 1636 * level - 1 byte 1637 * profile - 1 byte 1638 * optionally, "d263" box itself may contain a 16-byte 1639 * bit rate box (bitr) 1640 * average bit rate - 4 bytes 1641 * max bit rate - 4 bytes 1642 */ 1643 char buffer[23]; 1644 if (chunk_data_size != 7 && 1645 chunk_data_size != 23) { 1646 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1647 return ERROR_MALFORMED; 1648 } 1649 1650 if (mDataSource->readAt( 1651 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1652 return ERROR_IO; 1653 } 1654 1655 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1656 1657 break; 1658 } 1659 1660 case FOURCC('m', 'e', 't', 'a'): 1661 { 1662 uint8_t buffer[4]; 1663 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1664 *offset += chunk_size; 1665 return ERROR_MALFORMED; 1666 } 1667 1668 if (mDataSource->readAt( 1669 data_offset, buffer, 4) < 4) { 1670 *offset += chunk_size; 1671 return ERROR_IO; 1672 } 1673 1674 if (U32_AT(buffer) != 0) { 1675 // Should be version 0, flags 0. 1676 1677 // If it's not, let's assume this is one of those 1678 // apparently malformed chunks that don't have flags 1679 // and completely different semantics than what's 1680 // in the MPEG4 specs and skip it. 1681 *offset += chunk_size; 1682 return OK; 1683 } 1684 1685 off64_t stop_offset = *offset + chunk_size; 1686 *offset = data_offset + sizeof(buffer); 1687 while (*offset < stop_offset) { 1688 status_t err = parseChunk(offset, depth + 1); 1689 if (err != OK) { 1690 return err; 1691 } 1692 } 1693 1694 if (*offset != stop_offset) { 1695 return ERROR_MALFORMED; 1696 } 1697 break; 1698 } 1699 1700 case FOURCC('m', 'e', 'a', 'n'): 1701 case FOURCC('n', 'a', 'm', 'e'): 1702 case FOURCC('d', 'a', 't', 'a'): 1703 { 1704 *offset += chunk_size; 1705 1706 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1707 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1708 1709 if (err != OK) { 1710 return err; 1711 } 1712 } 1713 1714 break; 1715 } 1716 1717 case FOURCC('m', 'v', 'h', 'd'): 1718 { 1719 *offset += chunk_size; 1720 1721 if (chunk_data_size < 24) { 1722 return ERROR_MALFORMED; 1723 } 1724 1725 uint8_t header[24]; 1726 if (mDataSource->readAt( 1727 data_offset, header, sizeof(header)) 1728 < (ssize_t)sizeof(header)) { 1729 return ERROR_IO; 1730 } 1731 1732 uint64_t creationTime; 1733 if (header[0] == 1) { 1734 creationTime = U64_AT(&header[4]); 1735 mHeaderTimescale = U32_AT(&header[20]); 1736 } else if (header[0] != 0) { 1737 return ERROR_MALFORMED; 1738 } else { 1739 creationTime = U32_AT(&header[4]); 1740 mHeaderTimescale = U32_AT(&header[12]); 1741 } 1742 1743 String8 s; 1744 convertTimeToDate(creationTime, &s); 1745 1746 mFileMetaData->setCString(kKeyDate, s.string()); 1747 1748 break; 1749 } 1750 1751 case FOURCC('m', 'd', 'a', 't'): 1752 { 1753 ALOGV("mdat chunk, drm: %d", mIsDrm); 1754 if (!mIsDrm) { 1755 *offset += chunk_size; 1756 break; 1757 } 1758 1759 if (chunk_size < 8) { 1760 return ERROR_MALFORMED; 1761 } 1762 1763 return parseDrmSINF(offset, data_offset); 1764 } 1765 1766 case FOURCC('h', 'd', 'l', 'r'): 1767 { 1768 *offset += chunk_size; 1769 1770 uint32_t buffer; 1771 if (mDataSource->readAt( 1772 data_offset + 8, &buffer, 4) < 4) { 1773 return ERROR_IO; 1774 } 1775 1776 uint32_t type = ntohl(buffer); 1777 // For the 3GPP file format, the handler-type within the 'hdlr' box 1778 // shall be 'text'. We also want to support 'sbtl' handler type 1779 // for a practical reason as various MPEG4 containers use it. 1780 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1781 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1782 } 1783 1784 break; 1785 } 1786 1787 case FOURCC('t', 'x', '3', 'g'): 1788 { 1789 uint32_t type; 1790 const void *data; 1791 size_t size = 0; 1792 if (!mLastTrack->meta->findData( 1793 kKeyTextFormatData, &type, &data, &size)) { 1794 size = 0; 1795 } 1796 1797 uint8_t *buffer = new uint8_t[size + chunk_size]; 1798 1799 if (size > 0) { 1800 memcpy(buffer, data, size); 1801 } 1802 1803 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1804 < chunk_size) { 1805 delete[] buffer; 1806 buffer = NULL; 1807 1808 // advance read pointer so we don't end up reading this again 1809 *offset += chunk_size; 1810 return ERROR_IO; 1811 } 1812 1813 mLastTrack->meta->setData( 1814 kKeyTextFormatData, 0, buffer, size + chunk_size); 1815 1816 delete[] buffer; 1817 1818 *offset += chunk_size; 1819 break; 1820 } 1821 1822 case FOURCC('c', 'o', 'v', 'r'): 1823 { 1824 *offset += chunk_size; 1825 1826 if (mFileMetaData != NULL) { 1827 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1828 chunk_data_size, data_offset); 1829 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1830 if (mDataSource->readAt( 1831 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1832 return ERROR_IO; 1833 } 1834 const int kSkipBytesOfDataBox = 16; 1835 mFileMetaData->setData( 1836 kKeyAlbumArt, MetaData::TYPE_NONE, 1837 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1838 } 1839 1840 break; 1841 } 1842 1843 case FOURCC('t', 'i', 't', 'l'): 1844 case FOURCC('p', 'e', 'r', 'f'): 1845 case FOURCC('a', 'u', 't', 'h'): 1846 case FOURCC('g', 'n', 'r', 'e'): 1847 case FOURCC('a', 'l', 'b', 'm'): 1848 case FOURCC('y', 'r', 'r', 'c'): 1849 { 1850 *offset += chunk_size; 1851 1852 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1853 1854 if (err != OK) { 1855 return err; 1856 } 1857 1858 break; 1859 } 1860 1861 case FOURCC('I', 'D', '3', '2'): 1862 { 1863 *offset += chunk_size; 1864 1865 if (chunk_data_size < 6) { 1866 return ERROR_MALFORMED; 1867 } 1868 1869 parseID3v2MetaData(data_offset + 6); 1870 1871 break; 1872 } 1873 1874 case FOURCC('-', '-', '-', '-'): 1875 { 1876 mLastCommentMean.clear(); 1877 mLastCommentName.clear(); 1878 mLastCommentData.clear(); 1879 *offset += chunk_size; 1880 break; 1881 } 1882 1883 case FOURCC('s', 'i', 'd', 'x'): 1884 { 1885 parseSegmentIndex(data_offset, chunk_data_size); 1886 *offset += chunk_size; 1887 return UNKNOWN_ERROR; // stop parsing after sidx 1888 } 1889 1890 default: 1891 { 1892 *offset += chunk_size; 1893 break; 1894 } 1895 } 1896 1897 return OK; 1898} 1899 1900status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1901 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1902 1903 if (size < 12) { 1904 return -EINVAL; 1905 } 1906 1907 uint32_t flags; 1908 if (!mDataSource->getUInt32(offset, &flags)) { 1909 return ERROR_MALFORMED; 1910 } 1911 1912 uint32_t version = flags >> 24; 1913 flags &= 0xffffff; 1914 1915 ALOGV("sidx version %d", version); 1916 1917 uint32_t referenceId; 1918 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1919 return ERROR_MALFORMED; 1920 } 1921 1922 uint32_t timeScale; 1923 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1924 return ERROR_MALFORMED; 1925 } 1926 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1927 1928 uint64_t earliestPresentationTime; 1929 uint64_t firstOffset; 1930 1931 offset += 12; 1932 size -= 12; 1933 1934 if (version == 0) { 1935 if (size < 8) { 1936 return -EINVAL; 1937 } 1938 uint32_t tmp; 1939 if (!mDataSource->getUInt32(offset, &tmp)) { 1940 return ERROR_MALFORMED; 1941 } 1942 earliestPresentationTime = tmp; 1943 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1944 return ERROR_MALFORMED; 1945 } 1946 firstOffset = tmp; 1947 offset += 8; 1948 size -= 8; 1949 } else { 1950 if (size < 16) { 1951 return -EINVAL; 1952 } 1953 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1954 return ERROR_MALFORMED; 1955 } 1956 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1957 return ERROR_MALFORMED; 1958 } 1959 offset += 16; 1960 size -= 16; 1961 } 1962 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1963 1964 if (size < 4) { 1965 return -EINVAL; 1966 } 1967 1968 uint16_t referenceCount; 1969 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1970 return ERROR_MALFORMED; 1971 } 1972 offset += 4; 1973 size -= 4; 1974 ALOGV("refcount: %d", referenceCount); 1975 1976 if (size < referenceCount * 12) { 1977 return -EINVAL; 1978 } 1979 1980 uint64_t total_duration = 0; 1981 for (unsigned int i = 0; i < referenceCount; i++) { 1982 uint32_t d1, d2, d3; 1983 1984 if (!mDataSource->getUInt32(offset, &d1) || // size 1985 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1986 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1987 return ERROR_MALFORMED; 1988 } 1989 1990 if (d1 & 0x80000000) { 1991 ALOGW("sub-sidx boxes not supported yet"); 1992 } 1993 bool sap = d3 & 0x80000000; 1994 uint32_t saptype = (d3 >> 28) & 7; 1995 if (!sap || (saptype != 1 && saptype != 2)) { 1996 // type 1 and 2 are sync samples 1997 ALOGW("not a stream access point, or unsupported type: %08x", d3); 1998 } 1999 total_duration += d2; 2000 offset += 12; 2001 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2002 SidxEntry se; 2003 se.mSize = d1 & 0x7fffffff; 2004 se.mDurationUs = 1000000LL * d2 / timeScale; 2005 mSidxEntries.add(se); 2006 } 2007 2008 mSidxDuration = total_duration * 1000000 / timeScale; 2009 ALOGV("duration: %lld", mSidxDuration); 2010 2011 int64_t metaDuration; 2012 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2013 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 2014 } 2015 return OK; 2016} 2017 2018 2019 2020status_t MPEG4Extractor::parseTrackHeader( 2021 off64_t data_offset, off64_t data_size) { 2022 if (data_size < 4) { 2023 return ERROR_MALFORMED; 2024 } 2025 2026 uint8_t version; 2027 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2028 return ERROR_IO; 2029 } 2030 2031 size_t dynSize = (version == 1) ? 36 : 24; 2032 2033 uint8_t buffer[36 + 60]; 2034 2035 if (data_size != (off64_t)dynSize + 60) { 2036 return ERROR_MALFORMED; 2037 } 2038 2039 if (mDataSource->readAt( 2040 data_offset, buffer, data_size) < (ssize_t)data_size) { 2041 return ERROR_IO; 2042 } 2043 2044 uint64_t ctime, mtime, duration; 2045 int32_t id; 2046 2047 if (version == 1) { 2048 ctime = U64_AT(&buffer[4]); 2049 mtime = U64_AT(&buffer[12]); 2050 id = U32_AT(&buffer[20]); 2051 duration = U64_AT(&buffer[28]); 2052 } else if (version == 0) { 2053 ctime = U32_AT(&buffer[4]); 2054 mtime = U32_AT(&buffer[8]); 2055 id = U32_AT(&buffer[12]); 2056 duration = U32_AT(&buffer[20]); 2057 } else { 2058 return ERROR_UNSUPPORTED; 2059 } 2060 2061 mLastTrack->meta->setInt32(kKeyTrackID, id); 2062 2063 size_t matrixOffset = dynSize + 16; 2064 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2065 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2066 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2067 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2068 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2069 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2070 2071#if 0 2072 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2073 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2074 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2075 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2076#endif 2077 2078 uint32_t rotationDegrees; 2079 2080 static const int32_t kFixedOne = 0x10000; 2081 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2082 // Identity, no rotation 2083 rotationDegrees = 0; 2084 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2085 rotationDegrees = 90; 2086 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2087 rotationDegrees = 270; 2088 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2089 rotationDegrees = 180; 2090 } else { 2091 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2092 rotationDegrees = 0; 2093 } 2094 2095 if (rotationDegrees != 0) { 2096 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2097 } 2098 2099 // Handle presentation display size, which could be different 2100 // from the image size indicated by kKeyWidth and kKeyHeight. 2101 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2102 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2103 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2104 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2105 2106 return OK; 2107} 2108 2109status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2110 if (size < 4) { 2111 return ERROR_MALFORMED; 2112 } 2113 2114 uint8_t *buffer = new uint8_t[size + 1]; 2115 if (mDataSource->readAt( 2116 offset, buffer, size) != (ssize_t)size) { 2117 delete[] buffer; 2118 buffer = NULL; 2119 2120 return ERROR_IO; 2121 } 2122 2123 uint32_t flags = U32_AT(buffer); 2124 2125 uint32_t metadataKey = 0; 2126 char chunk[5]; 2127 MakeFourCCString(mPath[4], chunk); 2128 ALOGV("meta: %s @ %lld", chunk, offset); 2129 switch (mPath[4]) { 2130 case FOURCC(0xa9, 'a', 'l', 'b'): 2131 { 2132 metadataKey = kKeyAlbum; 2133 break; 2134 } 2135 case FOURCC(0xa9, 'A', 'R', 'T'): 2136 { 2137 metadataKey = kKeyArtist; 2138 break; 2139 } 2140 case FOURCC('a', 'A', 'R', 'T'): 2141 { 2142 metadataKey = kKeyAlbumArtist; 2143 break; 2144 } 2145 case FOURCC(0xa9, 'd', 'a', 'y'): 2146 { 2147 metadataKey = kKeyYear; 2148 break; 2149 } 2150 case FOURCC(0xa9, 'n', 'a', 'm'): 2151 { 2152 metadataKey = kKeyTitle; 2153 break; 2154 } 2155 case FOURCC(0xa9, 'w', 'r', 't'): 2156 { 2157 metadataKey = kKeyWriter; 2158 break; 2159 } 2160 case FOURCC('c', 'o', 'v', 'r'): 2161 { 2162 metadataKey = kKeyAlbumArt; 2163 break; 2164 } 2165 case FOURCC('g', 'n', 'r', 'e'): 2166 { 2167 metadataKey = kKeyGenre; 2168 break; 2169 } 2170 case FOURCC(0xa9, 'g', 'e', 'n'): 2171 { 2172 metadataKey = kKeyGenre; 2173 break; 2174 } 2175 case FOURCC('c', 'p', 'i', 'l'): 2176 { 2177 if (size == 9 && flags == 21) { 2178 char tmp[16]; 2179 sprintf(tmp, "%d", 2180 (int)buffer[size - 1]); 2181 2182 mFileMetaData->setCString(kKeyCompilation, tmp); 2183 } 2184 break; 2185 } 2186 case FOURCC('t', 'r', 'k', 'n'): 2187 { 2188 if (size == 16 && flags == 0) { 2189 char tmp[16]; 2190 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2191 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2192 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2193 2194 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2195 } 2196 break; 2197 } 2198 case FOURCC('d', 'i', 's', 'k'): 2199 { 2200 if ((size == 14 || size == 16) && flags == 0) { 2201 char tmp[16]; 2202 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2203 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2204 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2205 2206 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2207 } 2208 break; 2209 } 2210 case FOURCC('-', '-', '-', '-'): 2211 { 2212 buffer[size] = '\0'; 2213 switch (mPath[5]) { 2214 case FOURCC('m', 'e', 'a', 'n'): 2215 mLastCommentMean.setTo((const char *)buffer + 4); 2216 break; 2217 case FOURCC('n', 'a', 'm', 'e'): 2218 mLastCommentName.setTo((const char *)buffer + 4); 2219 break; 2220 case FOURCC('d', 'a', 't', 'a'): 2221 mLastCommentData.setTo((const char *)buffer + 8); 2222 break; 2223 } 2224 2225 // Once we have a set of mean/name/data info, go ahead and process 2226 // it to see if its something we are interested in. Whether or not 2227 // were are interested in the specific tag, make sure to clear out 2228 // the set so we can be ready to process another tuple should one 2229 // show up later in the file. 2230 if ((mLastCommentMean.length() != 0) && 2231 (mLastCommentName.length() != 0) && 2232 (mLastCommentData.length() != 0)) { 2233 2234 if (mLastCommentMean == "com.apple.iTunes" 2235 && mLastCommentName == "iTunSMPB") { 2236 int32_t delay, padding; 2237 if (sscanf(mLastCommentData, 2238 " %*x %x %x %*x", &delay, &padding) == 2) { 2239 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2240 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2241 } 2242 } 2243 2244 mLastCommentMean.clear(); 2245 mLastCommentName.clear(); 2246 mLastCommentData.clear(); 2247 } 2248 break; 2249 } 2250 2251 default: 2252 break; 2253 } 2254 2255 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2256 if (metadataKey == kKeyAlbumArt) { 2257 mFileMetaData->setData( 2258 kKeyAlbumArt, MetaData::TYPE_NONE, 2259 buffer + 8, size - 8); 2260 } else if (metadataKey == kKeyGenre) { 2261 if (flags == 0) { 2262 // uint8_t genre code, iTunes genre codes are 2263 // the standard id3 codes, except they start 2264 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2265 // We use standard id3 numbering, so subtract 1. 2266 int genrecode = (int)buffer[size - 1]; 2267 genrecode--; 2268 if (genrecode < 0) { 2269 genrecode = 255; // reserved for 'unknown genre' 2270 } 2271 char genre[10]; 2272 sprintf(genre, "%d", genrecode); 2273 2274 mFileMetaData->setCString(metadataKey, genre); 2275 } else if (flags == 1) { 2276 // custom genre string 2277 buffer[size] = '\0'; 2278 2279 mFileMetaData->setCString( 2280 metadataKey, (const char *)buffer + 8); 2281 } 2282 } else { 2283 buffer[size] = '\0'; 2284 2285 mFileMetaData->setCString( 2286 metadataKey, (const char *)buffer + 8); 2287 } 2288 } 2289 2290 delete[] buffer; 2291 buffer = NULL; 2292 2293 return OK; 2294} 2295 2296status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2297 if (size < 4) { 2298 return ERROR_MALFORMED; 2299 } 2300 2301 uint8_t *buffer = new uint8_t[size]; 2302 if (mDataSource->readAt( 2303 offset, buffer, size) != (ssize_t)size) { 2304 delete[] buffer; 2305 buffer = NULL; 2306 2307 return ERROR_IO; 2308 } 2309 2310 uint32_t metadataKey = 0; 2311 switch (mPath[depth]) { 2312 case FOURCC('t', 'i', 't', 'l'): 2313 { 2314 metadataKey = kKeyTitle; 2315 break; 2316 } 2317 case FOURCC('p', 'e', 'r', 'f'): 2318 { 2319 metadataKey = kKeyArtist; 2320 break; 2321 } 2322 case FOURCC('a', 'u', 't', 'h'): 2323 { 2324 metadataKey = kKeyWriter; 2325 break; 2326 } 2327 case FOURCC('g', 'n', 'r', 'e'): 2328 { 2329 metadataKey = kKeyGenre; 2330 break; 2331 } 2332 case FOURCC('a', 'l', 'b', 'm'): 2333 { 2334 if (buffer[size - 1] != '\0') { 2335 char tmp[4]; 2336 sprintf(tmp, "%u", buffer[size - 1]); 2337 2338 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2339 } 2340 2341 metadataKey = kKeyAlbum; 2342 break; 2343 } 2344 case FOURCC('y', 'r', 'r', 'c'): 2345 { 2346 char tmp[5]; 2347 uint16_t year = U16_AT(&buffer[4]); 2348 2349 if (year < 10000) { 2350 sprintf(tmp, "%u", year); 2351 2352 mFileMetaData->setCString(kKeyYear, tmp); 2353 } 2354 break; 2355 } 2356 2357 default: 2358 break; 2359 } 2360 2361 if (metadataKey > 0) { 2362 bool isUTF8 = true; // Common case 2363 char16_t *framedata = NULL; 2364 int len16 = 0; // Number of UTF-16 characters 2365 2366 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2367 if (size - 6 >= 4) { 2368 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2369 framedata = (char16_t *)(buffer + 6); 2370 if (0xfffe == *framedata) { 2371 // endianness marker (BOM) doesn't match host endianness 2372 for (int i = 0; i < len16; i++) { 2373 framedata[i] = bswap_16(framedata[i]); 2374 } 2375 // BOM is now swapped to 0xfeff, we will execute next block too 2376 } 2377 2378 if (0xfeff == *framedata) { 2379 // Remove the BOM 2380 framedata++; 2381 len16--; 2382 isUTF8 = false; 2383 } 2384 // else normal non-zero-length UTF-8 string 2385 // we can't handle UTF-16 without BOM as there is no other 2386 // indication of encoding. 2387 } 2388 2389 if (isUTF8) { 2390 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2391 } else { 2392 // Convert from UTF-16 string to UTF-8 string. 2393 String8 tmpUTF8str(framedata, len16); 2394 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2395 } 2396 } 2397 2398 delete[] buffer; 2399 buffer = NULL; 2400 2401 return OK; 2402} 2403 2404void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2405 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2406 2407 if (id3.isValid()) { 2408 struct Map { 2409 int key; 2410 const char *tag1; 2411 const char *tag2; 2412 }; 2413 static const Map kMap[] = { 2414 { kKeyAlbum, "TALB", "TAL" }, 2415 { kKeyArtist, "TPE1", "TP1" }, 2416 { kKeyAlbumArtist, "TPE2", "TP2" }, 2417 { kKeyComposer, "TCOM", "TCM" }, 2418 { kKeyGenre, "TCON", "TCO" }, 2419 { kKeyTitle, "TIT2", "TT2" }, 2420 { kKeyYear, "TYE", "TYER" }, 2421 { kKeyAuthor, "TXT", "TEXT" }, 2422 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2423 { kKeyDiscNumber, "TPA", "TPOS" }, 2424 { kKeyCompilation, "TCP", "TCMP" }, 2425 }; 2426 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2427 2428 for (size_t i = 0; i < kNumMapEntries; ++i) { 2429 if (!mFileMetaData->hasData(kMap[i].key)) { 2430 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2431 if (it->done()) { 2432 delete it; 2433 it = new ID3::Iterator(id3, kMap[i].tag2); 2434 } 2435 2436 if (it->done()) { 2437 delete it; 2438 continue; 2439 } 2440 2441 String8 s; 2442 it->getString(&s); 2443 delete it; 2444 2445 mFileMetaData->setCString(kMap[i].key, s); 2446 } 2447 } 2448 2449 size_t dataSize; 2450 String8 mime; 2451 const void *data = id3.getAlbumArt(&dataSize, &mime); 2452 2453 if (data) { 2454 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2455 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2456 } 2457 } 2458} 2459 2460sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2461 status_t err; 2462 if ((err = readMetaData()) != OK) { 2463 return NULL; 2464 } 2465 2466 Track *track = mFirstTrack; 2467 while (index > 0) { 2468 if (track == NULL) { 2469 return NULL; 2470 } 2471 2472 track = track->next; 2473 --index; 2474 } 2475 2476 if (track == NULL) { 2477 return NULL; 2478 } 2479 2480 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2481 2482 return new MPEG4Source( 2483 track->meta, mDataSource, track->timescale, track->sampleTable, 2484 mSidxEntries, mMoofOffset); 2485} 2486 2487// static 2488status_t MPEG4Extractor::verifyTrack(Track *track) { 2489 const char *mime; 2490 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2491 2492 uint32_t type; 2493 const void *data; 2494 size_t size; 2495 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2496 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2497 || type != kTypeAVCC) { 2498 return ERROR_MALFORMED; 2499 } 2500 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2501 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2502 || type != kTypeHVCC) { 2503 return ERROR_MALFORMED; 2504 } 2505 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2506 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2507 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2508 || type != kTypeESDS) { 2509 return ERROR_MALFORMED; 2510 } 2511 } 2512 2513 if (track->sampleTable == NULL || !track->sampleTable->isValid()) { 2514 // Make sure we have all the metadata we need. 2515 ALOGE("stbl atom missing/invalid."); 2516 return ERROR_MALFORMED; 2517 } 2518 2519 return OK; 2520} 2521 2522typedef enum { 2523 //AOT_NONE = -1, 2524 //AOT_NULL_OBJECT = 0, 2525 //AOT_AAC_MAIN = 1, /**< Main profile */ 2526 AOT_AAC_LC = 2, /**< Low Complexity object */ 2527 //AOT_AAC_SSR = 3, 2528 //AOT_AAC_LTP = 4, 2529 AOT_SBR = 5, 2530 //AOT_AAC_SCAL = 6, 2531 //AOT_TWIN_VQ = 7, 2532 //AOT_CELP = 8, 2533 //AOT_HVXC = 9, 2534 //AOT_RSVD_10 = 10, /**< (reserved) */ 2535 //AOT_RSVD_11 = 11, /**< (reserved) */ 2536 //AOT_TTSI = 12, /**< TTSI Object */ 2537 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2538 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2539 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2540 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2541 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2542 //AOT_RSVD_18 = 18, /**< (reserved) */ 2543 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2544 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2545 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2546 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2547 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2548 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2549 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2550 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2551 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2552 //AOT_RSVD_28 = 28, /**< might become SSC */ 2553 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2554 //AOT_MPEGS = 30, /**< MPEG Surround */ 2555 2556 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2557 2558 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2559 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2560 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2561 //AOT_RSVD_35 = 35, /**< might become DST */ 2562 //AOT_RSVD_36 = 36, /**< might become ALS */ 2563 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2564 //AOT_SLS = 38, /**< SLS */ 2565 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2566 2567 //AOT_USAC = 42, /**< USAC */ 2568 //AOT_SAOC = 43, /**< SAOC */ 2569 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2570 2571 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2572} AUDIO_OBJECT_TYPE; 2573 2574status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2575 const void *esds_data, size_t esds_size) { 2576 ESDS esds(esds_data, esds_size); 2577 2578 uint8_t objectTypeIndication; 2579 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2580 return ERROR_MALFORMED; 2581 } 2582 2583 if (objectTypeIndication == 0xe1) { 2584 // This isn't MPEG4 audio at all, it's QCELP 14k... 2585 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2586 return OK; 2587 } 2588 2589 if (objectTypeIndication == 0x6b) { 2590 // The media subtype is MP3 audio 2591 // Our software MP3 audio decoder may not be able to handle 2592 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2593 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2594 return ERROR_UNSUPPORTED; 2595 } 2596 2597 const uint8_t *csd; 2598 size_t csd_size; 2599 if (esds.getCodecSpecificInfo( 2600 (const void **)&csd, &csd_size) != OK) { 2601 return ERROR_MALFORMED; 2602 } 2603 2604#if 0 2605 printf("ESD of size %d\n", csd_size); 2606 hexdump(csd, csd_size); 2607#endif 2608 2609 if (csd_size == 0) { 2610 // There's no further information, i.e. no codec specific data 2611 // Let's assume that the information provided in the mpeg4 headers 2612 // is accurate and hope for the best. 2613 2614 return OK; 2615 } 2616 2617 if (csd_size < 2) { 2618 return ERROR_MALFORMED; 2619 } 2620 2621 static uint32_t kSamplingRate[] = { 2622 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2623 16000, 12000, 11025, 8000, 7350 2624 }; 2625 2626 ABitReader br(csd, csd_size); 2627 uint32_t objectType = br.getBits(5); 2628 2629 if (objectType == 31) { // AAC-ELD => additional 6 bits 2630 objectType = 32 + br.getBits(6); 2631 } 2632 2633 //keep AOT type 2634 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2635 2636 uint32_t freqIndex = br.getBits(4); 2637 2638 int32_t sampleRate = 0; 2639 int32_t numChannels = 0; 2640 if (freqIndex == 15) { 2641 if (csd_size < 5) { 2642 return ERROR_MALFORMED; 2643 } 2644 sampleRate = br.getBits(24); 2645 numChannels = br.getBits(4); 2646 } else { 2647 numChannels = br.getBits(4); 2648 2649 if (freqIndex == 13 || freqIndex == 14) { 2650 return ERROR_MALFORMED; 2651 } 2652 2653 sampleRate = kSamplingRate[freqIndex]; 2654 } 2655 2656 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2657 uint32_t extFreqIndex = br.getBits(4); 2658 int32_t extSampleRate; 2659 if (extFreqIndex == 15) { 2660 if (csd_size < 8) { 2661 return ERROR_MALFORMED; 2662 } 2663 extSampleRate = br.getBits(24); 2664 } else { 2665 if (extFreqIndex == 13 || extFreqIndex == 14) { 2666 return ERROR_MALFORMED; 2667 } 2668 extSampleRate = kSamplingRate[extFreqIndex]; 2669 } 2670 //TODO: save the extension sampling rate value in meta data => 2671 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2672 } 2673 2674 switch (numChannels) { 2675 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2676 case 0: 2677 case 1:// FC 2678 case 2:// FL FR 2679 case 3:// FC, FL FR 2680 case 4:// FC, FL FR, RC 2681 case 5:// FC, FL FR, SL SR 2682 case 6:// FC, FL FR, SL SR, LFE 2683 //numChannels already contains the right value 2684 break; 2685 case 11:// FC, FL FR, SL SR, RC, LFE 2686 numChannels = 7; 2687 break; 2688 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2689 case 12:// FC, FL FR, SL SR, RL RR, LFE 2690 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2691 numChannels = 8; 2692 break; 2693 default: 2694 return ERROR_UNSUPPORTED; 2695 } 2696 2697 { 2698 if (objectType == AOT_SBR || objectType == AOT_PS) { 2699 const int32_t extensionSamplingFrequency = br.getBits(4); 2700 objectType = br.getBits(5); 2701 2702 if (objectType == AOT_ESCAPE) { 2703 objectType = 32 + br.getBits(6); 2704 } 2705 } 2706 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2707 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2708 objectType == AOT_ER_BSAC) { 2709 const int32_t frameLengthFlag = br.getBits(1); 2710 2711 const int32_t dependsOnCoreCoder = br.getBits(1); 2712 2713 if (dependsOnCoreCoder ) { 2714 const int32_t coreCoderDelay = br.getBits(14); 2715 } 2716 2717 const int32_t extensionFlag = br.getBits(1); 2718 2719 if (numChannels == 0 ) { 2720 int32_t channelsEffectiveNum = 0; 2721 int32_t channelsNum = 0; 2722 const int32_t ElementInstanceTag = br.getBits(4); 2723 const int32_t Profile = br.getBits(2); 2724 const int32_t SamplingFrequencyIndex = br.getBits(4); 2725 const int32_t NumFrontChannelElements = br.getBits(4); 2726 const int32_t NumSideChannelElements = br.getBits(4); 2727 const int32_t NumBackChannelElements = br.getBits(4); 2728 const int32_t NumLfeChannelElements = br.getBits(2); 2729 const int32_t NumAssocDataElements = br.getBits(3); 2730 const int32_t NumValidCcElements = br.getBits(4); 2731 2732 const int32_t MonoMixdownPresent = br.getBits(1); 2733 if (MonoMixdownPresent != 0) { 2734 const int32_t MonoMixdownElementNumber = br.getBits(4); 2735 } 2736 2737 const int32_t StereoMixdownPresent = br.getBits(1); 2738 if (StereoMixdownPresent != 0) { 2739 const int32_t StereoMixdownElementNumber = br.getBits(4); 2740 } 2741 2742 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2743 if (MatrixMixdownIndexPresent != 0) { 2744 const int32_t MatrixMixdownIndex = br.getBits(2); 2745 const int32_t PseudoSurroundEnable = br.getBits(1); 2746 } 2747 2748 int i; 2749 for (i=0; i < NumFrontChannelElements; i++) { 2750 const int32_t FrontElementIsCpe = br.getBits(1); 2751 const int32_t FrontElementTagSelect = br.getBits(4); 2752 channelsNum += FrontElementIsCpe ? 2 : 1; 2753 } 2754 2755 for (i=0; i < NumSideChannelElements; i++) { 2756 const int32_t SideElementIsCpe = br.getBits(1); 2757 const int32_t SideElementTagSelect = br.getBits(4); 2758 channelsNum += SideElementIsCpe ? 2 : 1; 2759 } 2760 2761 for (i=0; i < NumBackChannelElements; i++) { 2762 const int32_t BackElementIsCpe = br.getBits(1); 2763 const int32_t BackElementTagSelect = br.getBits(4); 2764 channelsNum += BackElementIsCpe ? 2 : 1; 2765 } 2766 channelsEffectiveNum = channelsNum; 2767 2768 for (i=0; i < NumLfeChannelElements; i++) { 2769 const int32_t LfeElementTagSelect = br.getBits(4); 2770 channelsNum += 1; 2771 } 2772 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2773 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2774 numChannels = channelsNum; 2775 } 2776 } 2777 } 2778 2779 if (numChannels == 0) { 2780 return ERROR_UNSUPPORTED; 2781 } 2782 2783 int32_t prevSampleRate; 2784 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2785 2786 if (prevSampleRate != sampleRate) { 2787 ALOGV("mpeg4 audio sample rate different from previous setting. " 2788 "was: %d, now: %d", prevSampleRate, sampleRate); 2789 } 2790 2791 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2792 2793 int32_t prevChannelCount; 2794 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2795 2796 if (prevChannelCount != numChannels) { 2797 ALOGV("mpeg4 audio channel count different from previous setting. " 2798 "was: %d, now: %d", prevChannelCount, numChannels); 2799 } 2800 2801 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2802 2803 return OK; 2804} 2805 2806//////////////////////////////////////////////////////////////////////////////// 2807 2808MPEG4Source::MPEG4Source( 2809 const sp<MetaData> &format, 2810 const sp<DataSource> &dataSource, 2811 int32_t timeScale, 2812 const sp<SampleTable> &sampleTable, 2813 Vector<SidxEntry> &sidx, 2814 off64_t firstMoofOffset) 2815 : mFormat(format), 2816 mDataSource(dataSource), 2817 mTimescale(timeScale), 2818 mSampleTable(sampleTable), 2819 mCurrentSampleIndex(0), 2820 mCurrentFragmentIndex(0), 2821 mSegments(sidx), 2822 mFirstMoofOffset(firstMoofOffset), 2823 mCurrentMoofOffset(firstMoofOffset), 2824 mCurrentTime(0), 2825 mCurrentSampleInfoAllocSize(0), 2826 mCurrentSampleInfoSizes(NULL), 2827 mCurrentSampleInfoOffsetsAllocSize(0), 2828 mCurrentSampleInfoOffsets(NULL), 2829 mIsAVC(false), 2830 mIsHEVC(false), 2831 mNALLengthSize(0), 2832 mStarted(false), 2833 mGroup(NULL), 2834 mBuffer(NULL), 2835 mWantsNALFragments(false), 2836 mSrcBuffer(NULL) { 2837 2838 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2839 mDefaultIVSize = 0; 2840 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2841 uint32_t keytype; 2842 const void *key; 2843 size_t keysize; 2844 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2845 CHECK(keysize <= 16); 2846 memset(mCryptoKey, 0, 16); 2847 memcpy(mCryptoKey, key, keysize); 2848 } 2849 2850 const char *mime; 2851 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2852 CHECK(success); 2853 2854 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2855 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 2856 2857 if (mIsAVC) { 2858 uint32_t type; 2859 const void *data; 2860 size_t size; 2861 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2862 2863 const uint8_t *ptr = (const uint8_t *)data; 2864 2865 CHECK(size >= 7); 2866 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2867 2868 // The number of bytes used to encode the length of a NAL unit. 2869 mNALLengthSize = 1 + (ptr[4] & 3); 2870 } else if (mIsHEVC) { 2871 uint32_t type; 2872 const void *data; 2873 size_t size; 2874 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 2875 2876 const uint8_t *ptr = (const uint8_t *)data; 2877 2878 CHECK(size >= 7); 2879 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2880 2881 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 2882 } 2883 2884 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2885 2886 if (mFirstMoofOffset != 0) { 2887 off64_t offset = mFirstMoofOffset; 2888 parseChunk(&offset); 2889 } 2890} 2891 2892MPEG4Source::~MPEG4Source() { 2893 if (mStarted) { 2894 stop(); 2895 } 2896 free(mCurrentSampleInfoSizes); 2897 free(mCurrentSampleInfoOffsets); 2898} 2899 2900status_t MPEG4Source::start(MetaData *params) { 2901 Mutex::Autolock autoLock(mLock); 2902 2903 CHECK(!mStarted); 2904 2905 int32_t val; 2906 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2907 && val != 0) { 2908 mWantsNALFragments = true; 2909 } else { 2910 mWantsNALFragments = false; 2911 } 2912 2913 mGroup = new MediaBufferGroup; 2914 2915 int32_t max_size; 2916 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2917 2918 mGroup->add_buffer(new MediaBuffer(max_size)); 2919 2920 mSrcBuffer = new uint8_t[max_size]; 2921 2922 mStarted = true; 2923 2924 return OK; 2925} 2926 2927status_t MPEG4Source::stop() { 2928 Mutex::Autolock autoLock(mLock); 2929 2930 CHECK(mStarted); 2931 2932 if (mBuffer != NULL) { 2933 mBuffer->release(); 2934 mBuffer = NULL; 2935 } 2936 2937 delete[] mSrcBuffer; 2938 mSrcBuffer = NULL; 2939 2940 delete mGroup; 2941 mGroup = NULL; 2942 2943 mStarted = false; 2944 mCurrentSampleIndex = 0; 2945 2946 return OK; 2947} 2948 2949status_t MPEG4Source::parseChunk(off64_t *offset) { 2950 uint32_t hdr[2]; 2951 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2952 return ERROR_IO; 2953 } 2954 uint64_t chunk_size = ntohl(hdr[0]); 2955 uint32_t chunk_type = ntohl(hdr[1]); 2956 off64_t data_offset = *offset + 8; 2957 2958 if (chunk_size == 1) { 2959 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2960 return ERROR_IO; 2961 } 2962 chunk_size = ntoh64(chunk_size); 2963 data_offset += 8; 2964 2965 if (chunk_size < 16) { 2966 // The smallest valid chunk is 16 bytes long in this case. 2967 return ERROR_MALFORMED; 2968 } 2969 } else if (chunk_size < 8) { 2970 // The smallest valid chunk is 8 bytes long. 2971 return ERROR_MALFORMED; 2972 } 2973 2974 char chunk[5]; 2975 MakeFourCCString(chunk_type, chunk); 2976 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2977 2978 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2979 2980 switch(chunk_type) { 2981 2982 case FOURCC('t', 'r', 'a', 'f'): 2983 case FOURCC('m', 'o', 'o', 'f'): { 2984 off64_t stop_offset = *offset + chunk_size; 2985 *offset = data_offset; 2986 while (*offset < stop_offset) { 2987 status_t err = parseChunk(offset); 2988 if (err != OK) { 2989 return err; 2990 } 2991 } 2992 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 2993 // *offset points to the box following this moof. Find the next moof from there. 2994 2995 while (true) { 2996 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2997 return ERROR_END_OF_STREAM; 2998 } 2999 chunk_size = ntohl(hdr[0]); 3000 chunk_type = ntohl(hdr[1]); 3001 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3002 mNextMoofOffset = *offset; 3003 break; 3004 } 3005 *offset += chunk_size; 3006 } 3007 } 3008 break; 3009 } 3010 3011 case FOURCC('t', 'f', 'h', 'd'): { 3012 status_t err; 3013 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3014 return err; 3015 } 3016 *offset += chunk_size; 3017 break; 3018 } 3019 3020 case FOURCC('t', 'r', 'u', 'n'): { 3021 status_t err; 3022 if (mLastParsedTrackId == mTrackId) { 3023 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3024 return err; 3025 } 3026 } 3027 3028 *offset += chunk_size; 3029 break; 3030 } 3031 3032 case FOURCC('s', 'a', 'i', 'z'): { 3033 status_t err; 3034 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3035 return err; 3036 } 3037 *offset += chunk_size; 3038 break; 3039 } 3040 case FOURCC('s', 'a', 'i', 'o'): { 3041 status_t err; 3042 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3043 return err; 3044 } 3045 *offset += chunk_size; 3046 break; 3047 } 3048 3049 case FOURCC('m', 'd', 'a', 't'): { 3050 // parse DRM info if present 3051 ALOGV("MPEG4Source::parseChunk mdat"); 3052 // if saiz/saoi was previously observed, do something with the sampleinfos 3053 *offset += chunk_size; 3054 break; 3055 } 3056 3057 default: { 3058 *offset += chunk_size; 3059 break; 3060 } 3061 } 3062 return OK; 3063} 3064 3065status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3066 off64_t offset, off64_t /* size */) { 3067 ALOGV("parseSampleAuxiliaryInformationSizes"); 3068 // 14496-12 8.7.12 3069 uint8_t version; 3070 if (mDataSource->readAt( 3071 offset, &version, sizeof(version)) 3072 < (ssize_t)sizeof(version)) { 3073 return ERROR_IO; 3074 } 3075 3076 if (version != 0) { 3077 return ERROR_UNSUPPORTED; 3078 } 3079 offset++; 3080 3081 uint32_t flags; 3082 if (!mDataSource->getUInt24(offset, &flags)) { 3083 return ERROR_IO; 3084 } 3085 offset += 3; 3086 3087 if (flags & 1) { 3088 uint32_t tmp; 3089 if (!mDataSource->getUInt32(offset, &tmp)) { 3090 return ERROR_MALFORMED; 3091 } 3092 mCurrentAuxInfoType = tmp; 3093 offset += 4; 3094 if (!mDataSource->getUInt32(offset, &tmp)) { 3095 return ERROR_MALFORMED; 3096 } 3097 mCurrentAuxInfoTypeParameter = tmp; 3098 offset += 4; 3099 } 3100 3101 uint8_t defsize; 3102 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3103 return ERROR_MALFORMED; 3104 } 3105 mCurrentDefaultSampleInfoSize = defsize; 3106 offset++; 3107 3108 uint32_t smplcnt; 3109 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3110 return ERROR_MALFORMED; 3111 } 3112 mCurrentSampleInfoCount = smplcnt; 3113 offset += 4; 3114 3115 if (mCurrentDefaultSampleInfoSize != 0) { 3116 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3117 return OK; 3118 } 3119 if (smplcnt > mCurrentSampleInfoAllocSize) { 3120 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3121 mCurrentSampleInfoAllocSize = smplcnt; 3122 } 3123 3124 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3125 return OK; 3126} 3127 3128status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3129 off64_t offset, off64_t /* size */) { 3130 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3131 // 14496-12 8.7.13 3132 uint8_t version; 3133 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3134 return ERROR_IO; 3135 } 3136 offset++; 3137 3138 uint32_t flags; 3139 if (!mDataSource->getUInt24(offset, &flags)) { 3140 return ERROR_IO; 3141 } 3142 offset += 3; 3143 3144 uint32_t entrycount; 3145 if (!mDataSource->getUInt32(offset, &entrycount)) { 3146 return ERROR_IO; 3147 } 3148 offset += 4; 3149 3150 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3151 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3152 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3153 } 3154 mCurrentSampleInfoOffsetCount = entrycount; 3155 3156 for (size_t i = 0; i < entrycount; i++) { 3157 if (version == 0) { 3158 uint32_t tmp; 3159 if (!mDataSource->getUInt32(offset, &tmp)) { 3160 return ERROR_IO; 3161 } 3162 mCurrentSampleInfoOffsets[i] = tmp; 3163 offset += 4; 3164 } else { 3165 uint64_t tmp; 3166 if (!mDataSource->getUInt64(offset, &tmp)) { 3167 return ERROR_IO; 3168 } 3169 mCurrentSampleInfoOffsets[i] = tmp; 3170 offset += 8; 3171 } 3172 } 3173 3174 // parse clear/encrypted data 3175 3176 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3177 3178 drmoffset += mCurrentMoofOffset; 3179 int ivlength; 3180 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3181 3182 // read CencSampleAuxiliaryDataFormats 3183 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3184 Sample *smpl = &mCurrentSamples.editItemAt(i); 3185 3186 memset(smpl->iv, 0, 16); 3187 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3188 return ERROR_IO; 3189 } 3190 3191 drmoffset += ivlength; 3192 3193 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3194 if (smplinfosize == 0) { 3195 smplinfosize = mCurrentSampleInfoSizes[i]; 3196 } 3197 if (smplinfosize > ivlength) { 3198 uint16_t numsubsamples; 3199 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3200 return ERROR_IO; 3201 } 3202 drmoffset += 2; 3203 for (size_t j = 0; j < numsubsamples; j++) { 3204 uint16_t numclear; 3205 uint32_t numencrypted; 3206 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3207 return ERROR_IO; 3208 } 3209 drmoffset += 2; 3210 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3211 return ERROR_IO; 3212 } 3213 drmoffset += 4; 3214 smpl->clearsizes.add(numclear); 3215 smpl->encryptedsizes.add(numencrypted); 3216 } 3217 } else { 3218 smpl->clearsizes.add(0); 3219 smpl->encryptedsizes.add(smpl->size); 3220 } 3221 } 3222 3223 3224 return OK; 3225} 3226 3227status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3228 3229 if (size < 8) { 3230 return -EINVAL; 3231 } 3232 3233 uint32_t flags; 3234 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3235 return ERROR_MALFORMED; 3236 } 3237 3238 if (flags & 0xff000000) { 3239 return -EINVAL; 3240 } 3241 3242 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3243 return ERROR_MALFORMED; 3244 } 3245 3246 if (mLastParsedTrackId != mTrackId) { 3247 // this is not the right track, skip it 3248 return OK; 3249 } 3250 3251 mTrackFragmentHeaderInfo.mFlags = flags; 3252 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3253 offset += 8; 3254 size -= 8; 3255 3256 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3257 3258 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3259 if (size < 8) { 3260 return -EINVAL; 3261 } 3262 3263 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3264 return ERROR_MALFORMED; 3265 } 3266 offset += 8; 3267 size -= 8; 3268 } 3269 3270 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3271 if (size < 4) { 3272 return -EINVAL; 3273 } 3274 3275 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3276 return ERROR_MALFORMED; 3277 } 3278 offset += 4; 3279 size -= 4; 3280 } 3281 3282 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3283 if (size < 4) { 3284 return -EINVAL; 3285 } 3286 3287 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3288 return ERROR_MALFORMED; 3289 } 3290 offset += 4; 3291 size -= 4; 3292 } 3293 3294 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3295 if (size < 4) { 3296 return -EINVAL; 3297 } 3298 3299 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3300 return ERROR_MALFORMED; 3301 } 3302 offset += 4; 3303 size -= 4; 3304 } 3305 3306 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3307 if (size < 4) { 3308 return -EINVAL; 3309 } 3310 3311 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3312 return ERROR_MALFORMED; 3313 } 3314 offset += 4; 3315 size -= 4; 3316 } 3317 3318 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3319 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3320 } 3321 3322 mTrackFragmentHeaderInfo.mDataOffset = 0; 3323 return OK; 3324} 3325 3326status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3327 3328 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3329 if (size < 8) { 3330 return -EINVAL; 3331 } 3332 3333 enum { 3334 kDataOffsetPresent = 0x01, 3335 kFirstSampleFlagsPresent = 0x04, 3336 kSampleDurationPresent = 0x100, 3337 kSampleSizePresent = 0x200, 3338 kSampleFlagsPresent = 0x400, 3339 kSampleCompositionTimeOffsetPresent = 0x800, 3340 }; 3341 3342 uint32_t flags; 3343 if (!mDataSource->getUInt32(offset, &flags)) { 3344 return ERROR_MALFORMED; 3345 } 3346 ALOGV("fragment run flags: %08x", flags); 3347 3348 if (flags & 0xff000000) { 3349 return -EINVAL; 3350 } 3351 3352 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3353 // These two shall not be used together. 3354 return -EINVAL; 3355 } 3356 3357 uint32_t sampleCount; 3358 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3359 return ERROR_MALFORMED; 3360 } 3361 offset += 8; 3362 size -= 8; 3363 3364 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3365 3366 uint32_t firstSampleFlags = 0; 3367 3368 if (flags & kDataOffsetPresent) { 3369 if (size < 4) { 3370 return -EINVAL; 3371 } 3372 3373 int32_t dataOffsetDelta; 3374 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3375 return ERROR_MALFORMED; 3376 } 3377 3378 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3379 3380 offset += 4; 3381 size -= 4; 3382 } 3383 3384 if (flags & kFirstSampleFlagsPresent) { 3385 if (size < 4) { 3386 return -EINVAL; 3387 } 3388 3389 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3390 return ERROR_MALFORMED; 3391 } 3392 offset += 4; 3393 size -= 4; 3394 } 3395 3396 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3397 sampleCtsOffset = 0; 3398 3399 size_t bytesPerSample = 0; 3400 if (flags & kSampleDurationPresent) { 3401 bytesPerSample += 4; 3402 } else if (mTrackFragmentHeaderInfo.mFlags 3403 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3404 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3405 } else { 3406 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3407 } 3408 3409 if (flags & kSampleSizePresent) { 3410 bytesPerSample += 4; 3411 } else if (mTrackFragmentHeaderInfo.mFlags 3412 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3413 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3414 } else { 3415 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3416 } 3417 3418 if (flags & kSampleFlagsPresent) { 3419 bytesPerSample += 4; 3420 } else if (mTrackFragmentHeaderInfo.mFlags 3421 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3422 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3423 } else { 3424 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3425 } 3426 3427 if (flags & kSampleCompositionTimeOffsetPresent) { 3428 bytesPerSample += 4; 3429 } else { 3430 sampleCtsOffset = 0; 3431 } 3432 3433 if (size < sampleCount * bytesPerSample) { 3434 return -EINVAL; 3435 } 3436 3437 Sample tmp; 3438 for (uint32_t i = 0; i < sampleCount; ++i) { 3439 if (flags & kSampleDurationPresent) { 3440 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3441 return ERROR_MALFORMED; 3442 } 3443 offset += 4; 3444 } 3445 3446 if (flags & kSampleSizePresent) { 3447 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3448 return ERROR_MALFORMED; 3449 } 3450 offset += 4; 3451 } 3452 3453 if (flags & kSampleFlagsPresent) { 3454 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3455 return ERROR_MALFORMED; 3456 } 3457 offset += 4; 3458 } 3459 3460 if (flags & kSampleCompositionTimeOffsetPresent) { 3461 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3462 return ERROR_MALFORMED; 3463 } 3464 offset += 4; 3465 } 3466 3467 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 3468 " flags 0x%08x", i + 1, 3469 dataOffset, sampleSize, sampleDuration, 3470 (flags & kFirstSampleFlagsPresent) && i == 0 3471 ? firstSampleFlags : sampleFlags); 3472 tmp.offset = dataOffset; 3473 tmp.size = sampleSize; 3474 tmp.duration = sampleDuration; 3475 mCurrentSamples.add(tmp); 3476 3477 dataOffset += sampleSize; 3478 } 3479 3480 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3481 3482 return OK; 3483} 3484 3485sp<MetaData> MPEG4Source::getFormat() { 3486 Mutex::Autolock autoLock(mLock); 3487 3488 return mFormat; 3489} 3490 3491size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3492 switch (mNALLengthSize) { 3493 case 1: 3494 return *data; 3495 case 2: 3496 return U16_AT(data); 3497 case 3: 3498 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3499 case 4: 3500 return U32_AT(data); 3501 } 3502 3503 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3504 // a 2-bit integer. 3505 CHECK(!"Should not be here."); 3506 3507 return 0; 3508} 3509 3510status_t MPEG4Source::read( 3511 MediaBuffer **out, const ReadOptions *options) { 3512 Mutex::Autolock autoLock(mLock); 3513 3514 CHECK(mStarted); 3515 3516 if (mFirstMoofOffset > 0) { 3517 return fragmentedRead(out, options); 3518 } 3519 3520 *out = NULL; 3521 3522 int64_t targetSampleTimeUs = -1; 3523 3524 int64_t seekTimeUs; 3525 ReadOptions::SeekMode mode; 3526 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3527 uint32_t findFlags = 0; 3528 switch (mode) { 3529 case ReadOptions::SEEK_PREVIOUS_SYNC: 3530 findFlags = SampleTable::kFlagBefore; 3531 break; 3532 case ReadOptions::SEEK_NEXT_SYNC: 3533 findFlags = SampleTable::kFlagAfter; 3534 break; 3535 case ReadOptions::SEEK_CLOSEST_SYNC: 3536 case ReadOptions::SEEK_CLOSEST: 3537 findFlags = SampleTable::kFlagClosest; 3538 break; 3539 default: 3540 CHECK(!"Should not be here."); 3541 break; 3542 } 3543 3544 uint32_t sampleIndex; 3545 status_t err = mSampleTable->findSampleAtTime( 3546 seekTimeUs * mTimescale / 1000000, 3547 &sampleIndex, findFlags); 3548 3549 if (mode == ReadOptions::SEEK_CLOSEST) { 3550 // We found the closest sample already, now we want the sync 3551 // sample preceding it (or the sample itself of course), even 3552 // if the subsequent sync sample is closer. 3553 findFlags = SampleTable::kFlagBefore; 3554 } 3555 3556 uint32_t syncSampleIndex; 3557 if (err == OK) { 3558 err = mSampleTable->findSyncSampleNear( 3559 sampleIndex, &syncSampleIndex, findFlags); 3560 } 3561 3562 uint32_t sampleTime; 3563 if (err == OK) { 3564 err = mSampleTable->getMetaDataForSample( 3565 sampleIndex, NULL, NULL, &sampleTime); 3566 } 3567 3568 if (err != OK) { 3569 if (err == ERROR_OUT_OF_RANGE) { 3570 // An attempt to seek past the end of the stream would 3571 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3572 // this all the way to the MediaPlayer would cause abnormal 3573 // termination. Legacy behaviour appears to be to behave as if 3574 // we had seeked to the end of stream, ending normally. 3575 err = ERROR_END_OF_STREAM; 3576 } 3577 ALOGV("end of stream"); 3578 return err; 3579 } 3580 3581 if (mode == ReadOptions::SEEK_CLOSEST) { 3582 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3583 } 3584 3585#if 0 3586 uint32_t syncSampleTime; 3587 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3588 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3589 3590 ALOGI("seek to time %lld us => sample at time %lld us, " 3591 "sync sample at time %lld us", 3592 seekTimeUs, 3593 sampleTime * 1000000ll / mTimescale, 3594 syncSampleTime * 1000000ll / mTimescale); 3595#endif 3596 3597 mCurrentSampleIndex = syncSampleIndex; 3598 if (mBuffer != NULL) { 3599 mBuffer->release(); 3600 mBuffer = NULL; 3601 } 3602 3603 // fall through 3604 } 3605 3606 off64_t offset; 3607 size_t size; 3608 uint32_t cts, stts; 3609 bool isSyncSample; 3610 bool newBuffer = false; 3611 if (mBuffer == NULL) { 3612 newBuffer = true; 3613 3614 status_t err = 3615 mSampleTable->getMetaDataForSample( 3616 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3617 3618 if (err != OK) { 3619 return err; 3620 } 3621 3622 err = mGroup->acquire_buffer(&mBuffer); 3623 3624 if (err != OK) { 3625 CHECK(mBuffer == NULL); 3626 return err; 3627 } 3628 } 3629 3630 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3631 if (newBuffer) { 3632 ssize_t num_bytes_read = 3633 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3634 3635 if (num_bytes_read < (ssize_t)size) { 3636 mBuffer->release(); 3637 mBuffer = NULL; 3638 3639 return ERROR_IO; 3640 } 3641 3642 CHECK(mBuffer != NULL); 3643 mBuffer->set_range(0, size); 3644 mBuffer->meta_data()->clear(); 3645 mBuffer->meta_data()->setInt64( 3646 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3647 mBuffer->meta_data()->setInt64( 3648 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3649 3650 if (targetSampleTimeUs >= 0) { 3651 mBuffer->meta_data()->setInt64( 3652 kKeyTargetTime, targetSampleTimeUs); 3653 } 3654 3655 if (isSyncSample) { 3656 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3657 } 3658 3659 ++mCurrentSampleIndex; 3660 } 3661 3662 if (!mIsAVC && !mIsHEVC) { 3663 *out = mBuffer; 3664 mBuffer = NULL; 3665 3666 return OK; 3667 } 3668 3669 // Each NAL unit is split up into its constituent fragments and 3670 // each one of them returned in its own buffer. 3671 3672 CHECK(mBuffer->range_length() >= mNALLengthSize); 3673 3674 const uint8_t *src = 3675 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3676 3677 size_t nal_size = parseNALSize(src); 3678 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3679 ALOGE("incomplete NAL unit."); 3680 3681 mBuffer->release(); 3682 mBuffer = NULL; 3683 3684 return ERROR_MALFORMED; 3685 } 3686 3687 MediaBuffer *clone = mBuffer->clone(); 3688 CHECK(clone != NULL); 3689 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3690 3691 CHECK(mBuffer != NULL); 3692 mBuffer->set_range( 3693 mBuffer->range_offset() + mNALLengthSize + nal_size, 3694 mBuffer->range_length() - mNALLengthSize - nal_size); 3695 3696 if (mBuffer->range_length() == 0) { 3697 mBuffer->release(); 3698 mBuffer = NULL; 3699 } 3700 3701 *out = clone; 3702 3703 return OK; 3704 } else { 3705 // Whole NAL units are returned but each fragment is prefixed by 3706 // the start code (0x00 00 00 01). 3707 ssize_t num_bytes_read = 0; 3708 int32_t drm = 0; 3709 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3710 if (usesDRM) { 3711 num_bytes_read = 3712 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3713 } else { 3714 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3715 } 3716 3717 if (num_bytes_read < (ssize_t)size) { 3718 mBuffer->release(); 3719 mBuffer = NULL; 3720 3721 return ERROR_IO; 3722 } 3723 3724 if (usesDRM) { 3725 CHECK(mBuffer != NULL); 3726 mBuffer->set_range(0, size); 3727 3728 } else { 3729 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3730 size_t srcOffset = 0; 3731 size_t dstOffset = 0; 3732 3733 while (srcOffset < size) { 3734 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3735 size_t nalLength = 0; 3736 if (!isMalFormed) { 3737 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3738 srcOffset += mNALLengthSize; 3739 isMalFormed = srcOffset + nalLength > size; 3740 } 3741 3742 if (isMalFormed) { 3743 ALOGE("Video is malformed"); 3744 mBuffer->release(); 3745 mBuffer = NULL; 3746 return ERROR_MALFORMED; 3747 } 3748 3749 if (nalLength == 0) { 3750 continue; 3751 } 3752 3753 CHECK(dstOffset + 4 <= mBuffer->size()); 3754 3755 dstData[dstOffset++] = 0; 3756 dstData[dstOffset++] = 0; 3757 dstData[dstOffset++] = 0; 3758 dstData[dstOffset++] = 1; 3759 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3760 srcOffset += nalLength; 3761 dstOffset += nalLength; 3762 } 3763 CHECK_EQ(srcOffset, size); 3764 CHECK(mBuffer != NULL); 3765 mBuffer->set_range(0, dstOffset); 3766 } 3767 3768 mBuffer->meta_data()->clear(); 3769 mBuffer->meta_data()->setInt64( 3770 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3771 mBuffer->meta_data()->setInt64( 3772 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3773 3774 if (targetSampleTimeUs >= 0) { 3775 mBuffer->meta_data()->setInt64( 3776 kKeyTargetTime, targetSampleTimeUs); 3777 } 3778 3779 if (isSyncSample) { 3780 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3781 } 3782 3783 ++mCurrentSampleIndex; 3784 3785 *out = mBuffer; 3786 mBuffer = NULL; 3787 3788 return OK; 3789 } 3790} 3791 3792status_t MPEG4Source::fragmentedRead( 3793 MediaBuffer **out, const ReadOptions *options) { 3794 3795 ALOGV("MPEG4Source::fragmentedRead"); 3796 3797 CHECK(mStarted); 3798 3799 *out = NULL; 3800 3801 int64_t targetSampleTimeUs = -1; 3802 3803 int64_t seekTimeUs; 3804 ReadOptions::SeekMode mode; 3805 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3806 3807 int numSidxEntries = mSegments.size(); 3808 if (numSidxEntries != 0) { 3809 int64_t totalTime = 0; 3810 off64_t totalOffset = mFirstMoofOffset; 3811 for (int i = 0; i < numSidxEntries; i++) { 3812 const SidxEntry *se = &mSegments[i]; 3813 if (totalTime + se->mDurationUs > seekTimeUs) { 3814 // The requested time is somewhere in this segment 3815 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 3816 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3817 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3818 // requested next sync, or closest sync and it was closer to the end of 3819 // this segment 3820 totalTime += se->mDurationUs; 3821 totalOffset += se->mSize; 3822 } 3823 break; 3824 } 3825 totalTime += se->mDurationUs; 3826 totalOffset += se->mSize; 3827 } 3828 mCurrentMoofOffset = totalOffset; 3829 mCurrentSamples.clear(); 3830 mCurrentSampleIndex = 0; 3831 parseChunk(&totalOffset); 3832 mCurrentTime = totalTime * mTimescale / 1000000ll; 3833 } else { 3834 // without sidx boxes, we can only seek to 0 3835 mCurrentMoofOffset = mFirstMoofOffset; 3836 mCurrentSamples.clear(); 3837 mCurrentSampleIndex = 0; 3838 off64_t tmp = mCurrentMoofOffset; 3839 parseChunk(&tmp); 3840 mCurrentTime = 0; 3841 } 3842 3843 if (mBuffer != NULL) { 3844 mBuffer->release(); 3845 mBuffer = NULL; 3846 } 3847 3848 // fall through 3849 } 3850 3851 off64_t offset = 0; 3852 size_t size = 0; 3853 uint32_t cts = 0; 3854 bool isSyncSample = false; 3855 bool newBuffer = false; 3856 if (mBuffer == NULL) { 3857 newBuffer = true; 3858 3859 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3860 // move to next fragment if there is one 3861 if (mNextMoofOffset <= mCurrentMoofOffset) { 3862 return ERROR_END_OF_STREAM; 3863 } 3864 off64_t nextMoof = mNextMoofOffset; 3865 mCurrentMoofOffset = nextMoof; 3866 mCurrentSamples.clear(); 3867 mCurrentSampleIndex = 0; 3868 parseChunk(&nextMoof); 3869 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3870 return ERROR_END_OF_STREAM; 3871 } 3872 } 3873 3874 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3875 offset = smpl->offset; 3876 size = smpl->size; 3877 cts = mCurrentTime; 3878 mCurrentTime += smpl->duration; 3879 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3880 3881 status_t err = mGroup->acquire_buffer(&mBuffer); 3882 3883 if (err != OK) { 3884 CHECK(mBuffer == NULL); 3885 ALOGV("acquire_buffer returned %d", err); 3886 return err; 3887 } 3888 } 3889 3890 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3891 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3892 bufmeta->clear(); 3893 if (smpl->encryptedsizes.size()) { 3894 // store clear/encrypted lengths in metadata 3895 bufmeta->setData(kKeyPlainSizes, 0, 3896 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3897 bufmeta->setData(kKeyEncryptedSizes, 0, 3898 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3899 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3900 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3901 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3902 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3903 } 3904 3905 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 3906 if (newBuffer) { 3907 ssize_t num_bytes_read = 3908 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3909 3910 if (num_bytes_read < (ssize_t)size) { 3911 mBuffer->release(); 3912 mBuffer = NULL; 3913 3914 ALOGV("i/o error"); 3915 return ERROR_IO; 3916 } 3917 3918 CHECK(mBuffer != NULL); 3919 mBuffer->set_range(0, size); 3920 mBuffer->meta_data()->setInt64( 3921 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3922 mBuffer->meta_data()->setInt64( 3923 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 3924 3925 if (targetSampleTimeUs >= 0) { 3926 mBuffer->meta_data()->setInt64( 3927 kKeyTargetTime, targetSampleTimeUs); 3928 } 3929 3930 if (isSyncSample) { 3931 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3932 } 3933 3934 ++mCurrentSampleIndex; 3935 } 3936 3937 if (!mIsAVC && !mIsHEVC) { 3938 *out = mBuffer; 3939 mBuffer = NULL; 3940 3941 return OK; 3942 } 3943 3944 // Each NAL unit is split up into its constituent fragments and 3945 // each one of them returned in its own buffer. 3946 3947 CHECK(mBuffer->range_length() >= mNALLengthSize); 3948 3949 const uint8_t *src = 3950 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3951 3952 size_t nal_size = parseNALSize(src); 3953 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3954 ALOGE("incomplete NAL unit."); 3955 3956 mBuffer->release(); 3957 mBuffer = NULL; 3958 3959 return ERROR_MALFORMED; 3960 } 3961 3962 MediaBuffer *clone = mBuffer->clone(); 3963 CHECK(clone != NULL); 3964 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3965 3966 CHECK(mBuffer != NULL); 3967 mBuffer->set_range( 3968 mBuffer->range_offset() + mNALLengthSize + nal_size, 3969 mBuffer->range_length() - mNALLengthSize - nal_size); 3970 3971 if (mBuffer->range_length() == 0) { 3972 mBuffer->release(); 3973 mBuffer = NULL; 3974 } 3975 3976 *out = clone; 3977 3978 return OK; 3979 } else { 3980 ALOGV("whole NAL"); 3981 // Whole NAL units are returned but each fragment is prefixed by 3982 // the start code (0x00 00 00 01). 3983 ssize_t num_bytes_read = 0; 3984 int32_t drm = 0; 3985 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3986 if (usesDRM) { 3987 num_bytes_read = 3988 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3989 } else { 3990 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3991 } 3992 3993 if (num_bytes_read < (ssize_t)size) { 3994 mBuffer->release(); 3995 mBuffer = NULL; 3996 3997 ALOGV("i/o error"); 3998 return ERROR_IO; 3999 } 4000 4001 if (usesDRM) { 4002 CHECK(mBuffer != NULL); 4003 mBuffer->set_range(0, size); 4004 4005 } else { 4006 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4007 size_t srcOffset = 0; 4008 size_t dstOffset = 0; 4009 4010 while (srcOffset < size) { 4011 bool isMalFormed = (srcOffset + mNALLengthSize > size); 4012 size_t nalLength = 0; 4013 if (!isMalFormed) { 4014 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4015 srcOffset += mNALLengthSize; 4016 isMalFormed = srcOffset + nalLength > size; 4017 } 4018 4019 if (isMalFormed) { 4020 ALOGE("Video is malformed"); 4021 mBuffer->release(); 4022 mBuffer = NULL; 4023 return ERROR_MALFORMED; 4024 } 4025 4026 if (nalLength == 0) { 4027 continue; 4028 } 4029 4030 CHECK(dstOffset + 4 <= mBuffer->size()); 4031 4032 dstData[dstOffset++] = 0; 4033 dstData[dstOffset++] = 0; 4034 dstData[dstOffset++] = 0; 4035 dstData[dstOffset++] = 1; 4036 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4037 srcOffset += nalLength; 4038 dstOffset += nalLength; 4039 } 4040 CHECK_EQ(srcOffset, size); 4041 CHECK(mBuffer != NULL); 4042 mBuffer->set_range(0, dstOffset); 4043 } 4044 4045 mBuffer->meta_data()->setInt64( 4046 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4047 mBuffer->meta_data()->setInt64( 4048 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4049 4050 if (targetSampleTimeUs >= 0) { 4051 mBuffer->meta_data()->setInt64( 4052 kKeyTargetTime, targetSampleTimeUs); 4053 } 4054 4055 if (isSyncSample) { 4056 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4057 } 4058 4059 ++mCurrentSampleIndex; 4060 4061 *out = mBuffer; 4062 mBuffer = NULL; 4063 4064 return OK; 4065 } 4066} 4067 4068MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4069 const char *mimePrefix) { 4070 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4071 const char *mime; 4072 if (track->meta != NULL 4073 && track->meta->findCString(kKeyMIMEType, &mime) 4074 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4075 return track; 4076 } 4077 } 4078 4079 return NULL; 4080} 4081 4082static bool LegacySniffMPEG4( 4083 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4084 uint8_t header[8]; 4085 4086 ssize_t n = source->readAt(4, header, sizeof(header)); 4087 if (n < (ssize_t)sizeof(header)) { 4088 return false; 4089 } 4090 4091 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4092 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4093 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4094 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4095 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4096 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4097 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4098 *confidence = 0.4; 4099 4100 return true; 4101 } 4102 4103 return false; 4104} 4105 4106static bool isCompatibleBrand(uint32_t fourcc) { 4107 static const uint32_t kCompatibleBrands[] = { 4108 FOURCC('i', 's', 'o', 'm'), 4109 FOURCC('i', 's', 'o', '2'), 4110 FOURCC('a', 'v', 'c', '1'), 4111 FOURCC('h', 'v', 'c', '1'), 4112 FOURCC('h', 'e', 'v', '1'), 4113 FOURCC('3', 'g', 'p', '4'), 4114 FOURCC('m', 'p', '4', '1'), 4115 FOURCC('m', 'p', '4', '2'), 4116 4117 // Won't promise that the following file types can be played. 4118 // Just give these file types a chance. 4119 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4120 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4121 4122 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4123 FOURCC('3', 'g', '2', 'b'), 4124 }; 4125 4126 for (size_t i = 0; 4127 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4128 ++i) { 4129 if (kCompatibleBrands[i] == fourcc) { 4130 return true; 4131 } 4132 } 4133 4134 return false; 4135} 4136 4137// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4138// compatible brand is present. 4139// Also try to identify where this file's metadata ends 4140// (end of the 'moov' atom) and report it to the caller as part of 4141// the metadata. 4142static bool BetterSniffMPEG4( 4143 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4144 sp<AMessage> *meta) { 4145 // We scan up to 128 bytes to identify this file as an MP4. 4146 static const off64_t kMaxScanOffset = 128ll; 4147 4148 off64_t offset = 0ll; 4149 bool foundGoodFileType = false; 4150 off64_t moovAtomEndOffset = -1ll; 4151 bool done = false; 4152 4153 while (!done && offset < kMaxScanOffset) { 4154 uint32_t hdr[2]; 4155 if (source->readAt(offset, hdr, 8) < 8) { 4156 return false; 4157 } 4158 4159 uint64_t chunkSize = ntohl(hdr[0]); 4160 uint32_t chunkType = ntohl(hdr[1]); 4161 off64_t chunkDataOffset = offset + 8; 4162 4163 if (chunkSize == 1) { 4164 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4165 return false; 4166 } 4167 4168 chunkSize = ntoh64(chunkSize); 4169 chunkDataOffset += 8; 4170 4171 if (chunkSize < 16) { 4172 // The smallest valid chunk is 16 bytes long in this case. 4173 return false; 4174 } 4175 } else if (chunkSize < 8) { 4176 // The smallest valid chunk is 8 bytes long. 4177 return false; 4178 } 4179 4180 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4181 4182 char chunkstring[5]; 4183 MakeFourCCString(chunkType, chunkstring); 4184 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 4185 switch (chunkType) { 4186 case FOURCC('f', 't', 'y', 'p'): 4187 { 4188 if (chunkDataSize < 8) { 4189 return false; 4190 } 4191 4192 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4193 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4194 if (i == 1) { 4195 // Skip this index, it refers to the minorVersion, 4196 // not a brand. 4197 continue; 4198 } 4199 4200 uint32_t brand; 4201 if (source->readAt( 4202 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4203 return false; 4204 } 4205 4206 brand = ntohl(brand); 4207 4208 if (isCompatibleBrand(brand)) { 4209 foundGoodFileType = true; 4210 break; 4211 } 4212 } 4213 4214 if (!foundGoodFileType) { 4215 return false; 4216 } 4217 4218 break; 4219 } 4220 4221 case FOURCC('m', 'o', 'o', 'v'): 4222 { 4223 moovAtomEndOffset = offset + chunkSize; 4224 4225 done = true; 4226 break; 4227 } 4228 4229 default: 4230 break; 4231 } 4232 4233 offset += chunkSize; 4234 } 4235 4236 if (!foundGoodFileType) { 4237 return false; 4238 } 4239 4240 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4241 *confidence = 0.4f; 4242 4243 if (moovAtomEndOffset >= 0) { 4244 *meta = new AMessage; 4245 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4246 4247 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4248 } 4249 4250 return true; 4251} 4252 4253bool SniffMPEG4( 4254 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4255 sp<AMessage> *meta) { 4256 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4257 return true; 4258 } 4259 4260 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4261 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4262 return true; 4263 } 4264 4265 return false; 4266} 4267 4268} // namespace android 4269