MPEG4Extractor.cpp revision d7e8d9a7393b5429f8f13a6794b9b04d37390fb5
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/MediaBuffer.h> 35#include <media/stagefright/MediaBufferGroup.h> 36#include <media/stagefright/MediaDefs.h> 37#include <media/stagefright/MediaSource.h> 38#include <media/stagefright/MetaData.h> 39#include <utils/String8.h> 40 41#include <byteswap.h> 42#include "include/ID3.h" 43 44namespace android { 45 46class MPEG4Source : public MediaSource { 47public: 48 // Caller retains ownership of both "dataSource" and "sampleTable". 49 MPEG4Source(const sp<MetaData> &format, 50 const sp<DataSource> &dataSource, 51 int32_t timeScale, 52 const sp<SampleTable> &sampleTable, 53 Vector<SidxEntry> &sidx, 54 off64_t firstMoofOffset); 55 56 virtual status_t start(MetaData *params = NULL); 57 virtual status_t stop(); 58 59 virtual sp<MetaData> getFormat(); 60 61 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 62 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 63 64protected: 65 virtual ~MPEG4Source(); 66 67private: 68 Mutex mLock; 69 70 sp<MetaData> mFormat; 71 sp<DataSource> mDataSource; 72 int32_t mTimescale; 73 sp<SampleTable> mSampleTable; 74 uint32_t mCurrentSampleIndex; 75 uint32_t mCurrentFragmentIndex; 76 Vector<SidxEntry> &mSegments; 77 off64_t mFirstMoofOffset; 78 off64_t mCurrentMoofOffset; 79 off64_t mNextMoofOffset; 80 uint32_t mCurrentTime; 81 int32_t mLastParsedTrackId; 82 int32_t mTrackId; 83 84 int32_t mCryptoMode; // passed in from extractor 85 int32_t mDefaultIVSize; // passed in from extractor 86 uint8_t mCryptoKey[16]; // passed in from extractor 87 uint32_t mCurrentAuxInfoType; 88 uint32_t mCurrentAuxInfoTypeParameter; 89 int32_t mCurrentDefaultSampleInfoSize; 90 uint32_t mCurrentSampleInfoCount; 91 uint32_t mCurrentSampleInfoAllocSize; 92 uint8_t* mCurrentSampleInfoSizes; 93 uint32_t mCurrentSampleInfoOffsetCount; 94 uint32_t mCurrentSampleInfoOffsetsAllocSize; 95 uint64_t* mCurrentSampleInfoOffsets; 96 97 bool mIsAVC; 98 bool mIsHEVC; 99 size_t mNALLengthSize; 100 101 bool mStarted; 102 103 MediaBufferGroup *mGroup; 104 105 MediaBuffer *mBuffer; 106 107 bool mWantsNALFragments; 108 109 uint8_t *mSrcBuffer; 110 111 size_t parseNALSize(const uint8_t *data) const; 112 status_t parseChunk(off64_t *offset); 113 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 114 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 115 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 116 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 117 118 struct TrackFragmentHeaderInfo { 119 enum Flags { 120 kBaseDataOffsetPresent = 0x01, 121 kSampleDescriptionIndexPresent = 0x02, 122 kDefaultSampleDurationPresent = 0x08, 123 kDefaultSampleSizePresent = 0x10, 124 kDefaultSampleFlagsPresent = 0x20, 125 kDurationIsEmpty = 0x10000, 126 }; 127 128 uint32_t mTrackID; 129 uint32_t mFlags; 130 uint64_t mBaseDataOffset; 131 uint32_t mSampleDescriptionIndex; 132 uint32_t mDefaultSampleDuration; 133 uint32_t mDefaultSampleSize; 134 uint32_t mDefaultSampleFlags; 135 136 uint64_t mDataOffset; 137 }; 138 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 139 140 struct Sample { 141 off64_t offset; 142 size_t size; 143 uint32_t duration; 144 uint8_t iv[16]; 145 Vector<size_t> clearsizes; 146 Vector<size_t> encryptedsizes; 147 }; 148 Vector<Sample> mCurrentSamples; 149 150 MPEG4Source(const MPEG4Source &); 151 MPEG4Source &operator=(const MPEG4Source &); 152}; 153 154// This custom data source wraps an existing one and satisfies requests 155// falling entirely within a cached range from the cache while forwarding 156// all remaining requests to the wrapped datasource. 157// This is used to cache the full sampletable metadata for a single track, 158// possibly wrapping multiple times to cover all tracks, i.e. 159// Each MPEG4DataSource caches the sampletable metadata for a single track. 160 161struct MPEG4DataSource : public DataSource { 162 MPEG4DataSource(const sp<DataSource> &source); 163 164 virtual status_t initCheck() const; 165 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 166 virtual status_t getSize(off64_t *size); 167 virtual uint32_t flags(); 168 169 status_t setCachedRange(off64_t offset, size_t size); 170 171protected: 172 virtual ~MPEG4DataSource(); 173 174private: 175 Mutex mLock; 176 177 sp<DataSource> mSource; 178 off64_t mCachedOffset; 179 size_t mCachedSize; 180 uint8_t *mCache; 181 182 void clearCache(); 183 184 MPEG4DataSource(const MPEG4DataSource &); 185 MPEG4DataSource &operator=(const MPEG4DataSource &); 186}; 187 188MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 189 : mSource(source), 190 mCachedOffset(0), 191 mCachedSize(0), 192 mCache(NULL) { 193} 194 195MPEG4DataSource::~MPEG4DataSource() { 196 clearCache(); 197} 198 199void MPEG4DataSource::clearCache() { 200 if (mCache) { 201 free(mCache); 202 mCache = NULL; 203 } 204 205 mCachedOffset = 0; 206 mCachedSize = 0; 207} 208 209status_t MPEG4DataSource::initCheck() const { 210 return mSource->initCheck(); 211} 212 213ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 214 Mutex::Autolock autoLock(mLock); 215 216 if (offset >= mCachedOffset 217 && offset + size <= mCachedOffset + mCachedSize) { 218 memcpy(data, &mCache[offset - mCachedOffset], size); 219 return size; 220 } 221 222 return mSource->readAt(offset, data, size); 223} 224 225status_t MPEG4DataSource::getSize(off64_t *size) { 226 return mSource->getSize(size); 227} 228 229uint32_t MPEG4DataSource::flags() { 230 return mSource->flags(); 231} 232 233status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 234 Mutex::Autolock autoLock(mLock); 235 236 clearCache(); 237 238 mCache = (uint8_t *)malloc(size); 239 240 if (mCache == NULL) { 241 return -ENOMEM; 242 } 243 244 mCachedOffset = offset; 245 mCachedSize = size; 246 247 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 248 249 if (err < (ssize_t)size) { 250 clearCache(); 251 252 return ERROR_IO; 253 } 254 255 return OK; 256} 257 258//////////////////////////////////////////////////////////////////////////////// 259 260static void hexdump(const void *_data, size_t size) { 261 const uint8_t *data = (const uint8_t *)_data; 262 size_t offset = 0; 263 while (offset < size) { 264 printf("0x%04zx ", offset); 265 266 size_t n = size - offset; 267 if (n > 16) { 268 n = 16; 269 } 270 271 for (size_t i = 0; i < 16; ++i) { 272 if (i == 8) { 273 printf(" "); 274 } 275 276 if (offset + i < size) { 277 printf("%02x ", data[offset + i]); 278 } else { 279 printf(" "); 280 } 281 } 282 283 printf(" "); 284 285 for (size_t i = 0; i < n; ++i) { 286 if (isprint(data[offset + i])) { 287 printf("%c", data[offset + i]); 288 } else { 289 printf("."); 290 } 291 } 292 293 printf("\n"); 294 295 offset += 16; 296 } 297} 298 299static const char *FourCC2MIME(uint32_t fourcc) { 300 switch (fourcc) { 301 case FOURCC('m', 'p', '4', 'a'): 302 return MEDIA_MIMETYPE_AUDIO_AAC; 303 304 case FOURCC('s', 'a', 'm', 'r'): 305 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 306 307 case FOURCC('s', 'a', 'w', 'b'): 308 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 309 310 case FOURCC('m', 'p', '4', 'v'): 311 return MEDIA_MIMETYPE_VIDEO_MPEG4; 312 313 case FOURCC('s', '2', '6', '3'): 314 case FOURCC('h', '2', '6', '3'): 315 case FOURCC('H', '2', '6', '3'): 316 return MEDIA_MIMETYPE_VIDEO_H263; 317 318 case FOURCC('a', 'v', 'c', '1'): 319 return MEDIA_MIMETYPE_VIDEO_AVC; 320 321 case FOURCC('h', 'v', 'c', '1'): 322 case FOURCC('h', 'e', 'v', '1'): 323 return MEDIA_MIMETYPE_VIDEO_HEVC; 324 default: 325 CHECK(!"should not be here."); 326 return NULL; 327 } 328} 329 330static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 331 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 332 // AMR NB audio is always mono, 8kHz 333 *channels = 1; 334 *rate = 8000; 335 return true; 336 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 337 // AMR WB audio is always mono, 16kHz 338 *channels = 1; 339 *rate = 16000; 340 return true; 341 } 342 return false; 343} 344 345MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 346 : mSidxDuration(0), 347 mMoofOffset(0), 348 mDataSource(source), 349 mInitCheck(NO_INIT), 350 mHasVideo(false), 351 mHeaderTimescale(0), 352 mFirstTrack(NULL), 353 mLastTrack(NULL), 354 mFileMetaData(new MetaData), 355 mFirstSINF(NULL), 356 mIsDrm(false) { 357} 358 359MPEG4Extractor::~MPEG4Extractor() { 360 Track *track = mFirstTrack; 361 while (track) { 362 Track *next = track->next; 363 364 delete track; 365 track = next; 366 } 367 mFirstTrack = mLastTrack = NULL; 368 369 SINF *sinf = mFirstSINF; 370 while (sinf) { 371 SINF *next = sinf->next; 372 delete sinf->IPMPData; 373 delete sinf; 374 sinf = next; 375 } 376 mFirstSINF = NULL; 377 378 for (size_t i = 0; i < mPssh.size(); i++) { 379 delete [] mPssh[i].data; 380 } 381} 382 383uint32_t MPEG4Extractor::flags() const { 384 return CAN_PAUSE | 385 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 386 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 387} 388 389sp<MetaData> MPEG4Extractor::getMetaData() { 390 status_t err; 391 if ((err = readMetaData()) != OK) { 392 return new MetaData; 393 } 394 395 return mFileMetaData; 396} 397 398size_t MPEG4Extractor::countTracks() { 399 status_t err; 400 if ((err = readMetaData()) != OK) { 401 ALOGV("MPEG4Extractor::countTracks: no tracks"); 402 return 0; 403 } 404 405 size_t n = 0; 406 Track *track = mFirstTrack; 407 while (track) { 408 ++n; 409 track = track->next; 410 } 411 412 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 413 return n; 414} 415 416sp<MetaData> MPEG4Extractor::getTrackMetaData( 417 size_t index, uint32_t flags) { 418 status_t err; 419 if ((err = readMetaData()) != OK) { 420 return NULL; 421 } 422 423 Track *track = mFirstTrack; 424 while (index > 0) { 425 if (track == NULL) { 426 return NULL; 427 } 428 429 track = track->next; 430 --index; 431 } 432 433 if (track == NULL) { 434 return NULL; 435 } 436 437 if ((flags & kIncludeExtensiveMetaData) 438 && !track->includes_expensive_metadata) { 439 track->includes_expensive_metadata = true; 440 441 const char *mime; 442 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 443 if (!strncasecmp("video/", mime, 6)) { 444 if (mMoofOffset > 0) { 445 int64_t duration; 446 if (track->meta->findInt64(kKeyDuration, &duration)) { 447 // nothing fancy, just pick a frame near 1/4th of the duration 448 track->meta->setInt64( 449 kKeyThumbnailTime, duration / 4); 450 } 451 } else { 452 uint32_t sampleIndex; 453 uint32_t sampleTime; 454 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 455 && track->sampleTable->getMetaDataForSample( 456 sampleIndex, NULL /* offset */, NULL /* size */, 457 &sampleTime) == OK) { 458 track->meta->setInt64( 459 kKeyThumbnailTime, 460 ((int64_t)sampleTime * 1000000) / track->timescale); 461 } 462 } 463 } 464 } 465 466 return track->meta; 467} 468 469static void MakeFourCCString(uint32_t x, char *s) { 470 s[0] = x >> 24; 471 s[1] = (x >> 16) & 0xff; 472 s[2] = (x >> 8) & 0xff; 473 s[3] = x & 0xff; 474 s[4] = '\0'; 475} 476 477status_t MPEG4Extractor::readMetaData() { 478 if (mInitCheck != NO_INIT) { 479 return mInitCheck; 480 } 481 482 off64_t offset = 0; 483 status_t err; 484 while (true) { 485 off64_t orig_offset = offset; 486 err = parseChunk(&offset, 0); 487 488 if (offset <= orig_offset) { 489 // only continue parsing if the offset was advanced, 490 // otherwise we might end up in an infinite loop 491 ALOGE("did not advance: 0x%lld->0x%lld", orig_offset, offset); 492 err = ERROR_MALFORMED; 493 break; 494 } else if (err == OK) { 495 continue; 496 } else if (err != UNKNOWN_ERROR) { 497 break; 498 } 499 uint32_t hdr[2]; 500 if (mDataSource->readAt(offset, hdr, 8) < 8) { 501 break; 502 } 503 uint32_t chunk_type = ntohl(hdr[1]); 504 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 505 // store the offset of the first segment 506 mMoofOffset = offset; 507 } else if (chunk_type != FOURCC('m', 'd', 'a', 't')) { 508 // keep parsing until we get to the data 509 continue; 510 } 511 break; 512 } 513 514 if (mInitCheck == OK) { 515 if (mHasVideo) { 516 mFileMetaData->setCString( 517 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 518 } else { 519 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 520 } 521 } else { 522 mInitCheck = err; 523 } 524 525 CHECK_NE(err, (status_t)NO_INIT); 526 527 // copy pssh data into file metadata 528 int psshsize = 0; 529 for (size_t i = 0; i < mPssh.size(); i++) { 530 psshsize += 20 + mPssh[i].datalen; 531 } 532 if (psshsize) { 533 char *buf = (char*)malloc(psshsize); 534 char *ptr = buf; 535 for (size_t i = 0; i < mPssh.size(); i++) { 536 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 537 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 538 ptr += (20 + mPssh[i].datalen); 539 } 540 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 541 free(buf); 542 } 543 return mInitCheck; 544} 545 546char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 547 if (mFirstSINF == NULL) { 548 return NULL; 549 } 550 551 SINF *sinf = mFirstSINF; 552 while (sinf && (trackID != sinf->trackID)) { 553 sinf = sinf->next; 554 } 555 556 if (sinf == NULL) { 557 return NULL; 558 } 559 560 *len = sinf->len; 561 return sinf->IPMPData; 562} 563 564// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 565static int32_t readSize(off64_t offset, 566 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 567 uint32_t size = 0; 568 uint8_t data; 569 bool moreData = true; 570 *numOfBytes = 0; 571 572 while (moreData) { 573 if (DataSource->readAt(offset, &data, 1) < 1) { 574 return -1; 575 } 576 offset ++; 577 moreData = (data >= 128) ? true : false; 578 size = (size << 7) | (data & 0x7f); // Take last 7 bits 579 (*numOfBytes) ++; 580 } 581 582 return size; 583} 584 585status_t MPEG4Extractor::parseDrmSINF( 586 off64_t * /* offset */, off64_t data_offset) { 587 uint8_t updateIdTag; 588 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 589 return ERROR_IO; 590 } 591 data_offset ++; 592 593 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 594 return ERROR_MALFORMED; 595 } 596 597 uint8_t numOfBytes; 598 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 599 if (size < 0) { 600 return ERROR_IO; 601 } 602 int32_t classSize = size; 603 data_offset += numOfBytes; 604 605 while(size >= 11 ) { 606 uint8_t descriptorTag; 607 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 608 return ERROR_IO; 609 } 610 data_offset ++; 611 612 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 613 return ERROR_MALFORMED; 614 } 615 616 uint8_t buffer[8]; 617 //ObjectDescriptorID and ObjectDescriptor url flag 618 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 619 return ERROR_IO; 620 } 621 data_offset += 2; 622 623 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 624 return ERROR_MALFORMED; 625 } 626 627 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 628 return ERROR_IO; 629 } 630 data_offset += 8; 631 632 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 633 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 634 return ERROR_MALFORMED; 635 } 636 637 SINF *sinf = new SINF; 638 sinf->trackID = U16_AT(&buffer[3]); 639 sinf->IPMPDescriptorID = buffer[7]; 640 sinf->next = mFirstSINF; 641 mFirstSINF = sinf; 642 643 size -= (8 + 2 + 1); 644 } 645 646 if (size != 0) { 647 return ERROR_MALFORMED; 648 } 649 650 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 651 return ERROR_IO; 652 } 653 data_offset ++; 654 655 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 656 return ERROR_MALFORMED; 657 } 658 659 size = readSize(data_offset, mDataSource, &numOfBytes); 660 if (size < 0) { 661 return ERROR_IO; 662 } 663 classSize = size; 664 data_offset += numOfBytes; 665 666 while (size > 0) { 667 uint8_t tag; 668 int32_t dataLen; 669 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 670 return ERROR_IO; 671 } 672 data_offset ++; 673 674 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 675 uint8_t id; 676 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 677 if (dataLen < 0) { 678 return ERROR_IO; 679 } else if (dataLen < 4) { 680 return ERROR_MALFORMED; 681 } 682 data_offset += numOfBytes; 683 684 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 685 return ERROR_IO; 686 } 687 data_offset ++; 688 689 SINF *sinf = mFirstSINF; 690 while (sinf && (sinf->IPMPDescriptorID != id)) { 691 sinf = sinf->next; 692 } 693 if (sinf == NULL) { 694 return ERROR_MALFORMED; 695 } 696 sinf->len = dataLen - 3; 697 sinf->IPMPData = new char[sinf->len]; 698 data_offset += 2; 699 700 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) { 701 return ERROR_IO; 702 } 703 data_offset += sinf->len; 704 705 size -= (dataLen + numOfBytes + 1); 706 } 707 } 708 709 if (size != 0) { 710 return ERROR_MALFORMED; 711 } 712 713 return UNKNOWN_ERROR; // Return a dummy error. 714} 715 716struct PathAdder { 717 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 718 : mPath(path) { 719 mPath->push(chunkType); 720 } 721 722 ~PathAdder() { 723 mPath->pop(); 724 } 725 726private: 727 Vector<uint32_t> *mPath; 728 729 PathAdder(const PathAdder &); 730 PathAdder &operator=(const PathAdder &); 731}; 732 733static bool underMetaDataPath(const Vector<uint32_t> &path) { 734 return path.size() >= 5 735 && path[0] == FOURCC('m', 'o', 'o', 'v') 736 && path[1] == FOURCC('u', 'd', 't', 'a') 737 && path[2] == FOURCC('m', 'e', 't', 'a') 738 && path[3] == FOURCC('i', 'l', 's', 't'); 739} 740 741// Given a time in seconds since Jan 1 1904, produce a human-readable string. 742static void convertTimeToDate(int64_t time_1904, String8 *s) { 743 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 744 745 char tmp[32]; 746 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 747 748 s->setTo(tmp); 749} 750 751status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 752 ALOGV("entering parseChunk %lld/%d", *offset, depth); 753 uint32_t hdr[2]; 754 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 755 return ERROR_IO; 756 } 757 uint64_t chunk_size = ntohl(hdr[0]); 758 uint32_t chunk_type = ntohl(hdr[1]); 759 off64_t data_offset = *offset + 8; 760 761 if (chunk_size == 1) { 762 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 763 return ERROR_IO; 764 } 765 chunk_size = ntoh64(chunk_size); 766 data_offset += 8; 767 768 if (chunk_size < 16) { 769 // The smallest valid chunk is 16 bytes long in this case. 770 return ERROR_MALFORMED; 771 } 772 } else if (chunk_size == 0) { 773 if (depth == 0) { 774 // atom extends to end of file 775 off64_t sourceSize; 776 if (mDataSource->getSize(&sourceSize) == OK) { 777 chunk_size = (sourceSize - *offset); 778 } else { 779 // XXX could we just pick a "sufficiently large" value here? 780 ALOGE("atom size is 0, and data source has no size"); 781 return ERROR_MALFORMED; 782 } 783 } else { 784 // not allowed for non-toplevel atoms, skip it 785 *offset += 4; 786 return OK; 787 } 788 } else if (chunk_size < 8) { 789 // The smallest valid chunk is 8 bytes long. 790 ALOGE("invalid chunk size: %d", int(chunk_size)); 791 return ERROR_MALFORMED; 792 } 793 794 char chunk[5]; 795 MakeFourCCString(chunk_type, chunk); 796 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 797 798#if 0 799 static const char kWhitespace[] = " "; 800 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 801 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 802 803 char buffer[256]; 804 size_t n = chunk_size; 805 if (n > sizeof(buffer)) { 806 n = sizeof(buffer); 807 } 808 if (mDataSource->readAt(*offset, buffer, n) 809 < (ssize_t)n) { 810 return ERROR_IO; 811 } 812 813 hexdump(buffer, n); 814#endif 815 816 PathAdder autoAdder(&mPath, chunk_type); 817 818 off64_t chunk_data_size = *offset + chunk_size - data_offset; 819 820 if (chunk_type != FOURCC('c', 'p', 'r', 't') 821 && chunk_type != FOURCC('c', 'o', 'v', 'r') 822 && mPath.size() == 5 && underMetaDataPath(mPath)) { 823 off64_t stop_offset = *offset + chunk_size; 824 *offset = data_offset; 825 while (*offset < stop_offset) { 826 status_t err = parseChunk(offset, depth + 1); 827 if (err != OK) { 828 return err; 829 } 830 } 831 832 if (*offset != stop_offset) { 833 return ERROR_MALFORMED; 834 } 835 836 return OK; 837 } 838 839 switch(chunk_type) { 840 case FOURCC('m', 'o', 'o', 'v'): 841 case FOURCC('t', 'r', 'a', 'k'): 842 case FOURCC('m', 'd', 'i', 'a'): 843 case FOURCC('m', 'i', 'n', 'f'): 844 case FOURCC('d', 'i', 'n', 'f'): 845 case FOURCC('s', 't', 'b', 'l'): 846 case FOURCC('m', 'v', 'e', 'x'): 847 case FOURCC('m', 'o', 'o', 'f'): 848 case FOURCC('t', 'r', 'a', 'f'): 849 case FOURCC('m', 'f', 'r', 'a'): 850 case FOURCC('u', 'd', 't', 'a'): 851 case FOURCC('i', 'l', 's', 't'): 852 case FOURCC('s', 'i', 'n', 'f'): 853 case FOURCC('s', 'c', 'h', 'i'): 854 case FOURCC('e', 'd', 't', 's'): 855 { 856 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 857 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 858 859 if (mDataSource->flags() 860 & (DataSource::kWantsPrefetching 861 | DataSource::kIsCachingDataSource)) { 862 sp<MPEG4DataSource> cachedSource = 863 new MPEG4DataSource(mDataSource); 864 865 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 866 mDataSource = cachedSource; 867 } 868 } 869 870 mLastTrack->sampleTable = new SampleTable(mDataSource); 871 } 872 873 bool isTrack = false; 874 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 875 isTrack = true; 876 877 Track *track = new Track; 878 track->next = NULL; 879 if (mLastTrack) { 880 mLastTrack->next = track; 881 } else { 882 mFirstTrack = track; 883 } 884 mLastTrack = track; 885 886 track->meta = new MetaData; 887 track->includes_expensive_metadata = false; 888 track->skipTrack = false; 889 track->timescale = 0; 890 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 891 } 892 893 off64_t stop_offset = *offset + chunk_size; 894 *offset = data_offset; 895 while (*offset < stop_offset) { 896 status_t err = parseChunk(offset, depth + 1); 897 if (err != OK) { 898 return err; 899 } 900 } 901 902 if (*offset != stop_offset) { 903 return ERROR_MALFORMED; 904 } 905 906 if (isTrack) { 907 if (mLastTrack->skipTrack) { 908 Track *cur = mFirstTrack; 909 910 if (cur == mLastTrack) { 911 delete cur; 912 mFirstTrack = mLastTrack = NULL; 913 } else { 914 while (cur && cur->next != mLastTrack) { 915 cur = cur->next; 916 } 917 cur->next = NULL; 918 delete mLastTrack; 919 mLastTrack = cur; 920 } 921 922 return OK; 923 } 924 925 status_t err = verifyTrack(mLastTrack); 926 927 if (err != OK) { 928 return err; 929 } 930 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 931 mInitCheck = OK; 932 933 if (!mIsDrm) { 934 return UNKNOWN_ERROR; // Return a dummy error. 935 } else { 936 return OK; 937 } 938 } 939 break; 940 } 941 942 case FOURCC('e', 'l', 's', 't'): 943 { 944 *offset += chunk_size; 945 946 // See 14496-12 8.6.6 947 uint8_t version; 948 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 949 return ERROR_IO; 950 } 951 952 uint32_t entry_count; 953 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 954 return ERROR_IO; 955 } 956 957 if (entry_count != 1) { 958 // we only support a single entry at the moment, for gapless playback 959 ALOGW("ignoring edit list with %d entries", entry_count); 960 } else if (mHeaderTimescale == 0) { 961 ALOGW("ignoring edit list because timescale is 0"); 962 } else { 963 off64_t entriesoffset = data_offset + 8; 964 uint64_t segment_duration; 965 int64_t media_time; 966 967 if (version == 1) { 968 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 969 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 970 return ERROR_IO; 971 } 972 } else if (version == 0) { 973 uint32_t sd; 974 int32_t mt; 975 if (!mDataSource->getUInt32(entriesoffset, &sd) || 976 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 977 return ERROR_IO; 978 } 979 segment_duration = sd; 980 media_time = mt; 981 } else { 982 return ERROR_IO; 983 } 984 985 uint64_t halfscale = mHeaderTimescale / 2; 986 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 987 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 988 989 int64_t duration; 990 int32_t samplerate; 991 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 992 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 993 994 int64_t delay = (media_time * samplerate + 500000) / 1000000; 995 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 996 997 int64_t paddingus = duration - (segment_duration + media_time); 998 if (paddingus < 0) { 999 // track duration from media header (which is what kKeyDuration is) might 1000 // be slightly shorter than the segment duration, which would make the 1001 // padding negative. Clamp to zero. 1002 paddingus = 0; 1003 } 1004 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 1005 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 1006 } 1007 } 1008 break; 1009 } 1010 1011 case FOURCC('f', 'r', 'm', 'a'): 1012 { 1013 *offset += chunk_size; 1014 1015 uint32_t original_fourcc; 1016 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 1017 return ERROR_IO; 1018 } 1019 original_fourcc = ntohl(original_fourcc); 1020 ALOGV("read original format: %d", original_fourcc); 1021 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 1022 uint32_t num_channels = 0; 1023 uint32_t sample_rate = 0; 1024 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 1025 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1026 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1027 } 1028 break; 1029 } 1030 1031 case FOURCC('t', 'e', 'n', 'c'): 1032 { 1033 *offset += chunk_size; 1034 1035 if (chunk_size < 32) { 1036 return ERROR_MALFORMED; 1037 } 1038 1039 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1040 // default IV size, 16 bytes default KeyID 1041 // (ISO 23001-7) 1042 char buf[4]; 1043 memset(buf, 0, 4); 1044 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1045 return ERROR_IO; 1046 } 1047 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1048 if (defaultAlgorithmId > 1) { 1049 // only 0 (clear) and 1 (AES-128) are valid 1050 return ERROR_MALFORMED; 1051 } 1052 1053 memset(buf, 0, 4); 1054 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1055 return ERROR_IO; 1056 } 1057 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1058 1059 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1060 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1061 // only unencrypted data must have 0 IV size 1062 return ERROR_MALFORMED; 1063 } else if (defaultIVSize != 0 && 1064 defaultIVSize != 8 && 1065 defaultIVSize != 16) { 1066 // only supported sizes are 0, 8 and 16 1067 return ERROR_MALFORMED; 1068 } 1069 1070 uint8_t defaultKeyId[16]; 1071 1072 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1073 return ERROR_IO; 1074 } 1075 1076 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1077 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1078 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1079 break; 1080 } 1081 1082 case FOURCC('t', 'k', 'h', 'd'): 1083 { 1084 *offset += chunk_size; 1085 1086 status_t err; 1087 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1088 return err; 1089 } 1090 1091 break; 1092 } 1093 1094 case FOURCC('p', 's', 's', 'h'): 1095 { 1096 *offset += chunk_size; 1097 1098 PsshInfo pssh; 1099 1100 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1101 return ERROR_IO; 1102 } 1103 1104 uint32_t psshdatalen = 0; 1105 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1106 return ERROR_IO; 1107 } 1108 pssh.datalen = ntohl(psshdatalen); 1109 ALOGV("pssh data size: %d", pssh.datalen); 1110 if (pssh.datalen + 20 > chunk_size) { 1111 // pssh data length exceeds size of containing box 1112 return ERROR_MALFORMED; 1113 } 1114 1115 pssh.data = new uint8_t[pssh.datalen]; 1116 ALOGV("allocated pssh @ %p", pssh.data); 1117 ssize_t requested = (ssize_t) pssh.datalen; 1118 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1119 return ERROR_IO; 1120 } 1121 mPssh.push_back(pssh); 1122 1123 break; 1124 } 1125 1126 case FOURCC('m', 'd', 'h', 'd'): 1127 { 1128 *offset += chunk_size; 1129 1130 if (chunk_data_size < 4) { 1131 return ERROR_MALFORMED; 1132 } 1133 1134 uint8_t version; 1135 if (mDataSource->readAt( 1136 data_offset, &version, sizeof(version)) 1137 < (ssize_t)sizeof(version)) { 1138 return ERROR_IO; 1139 } 1140 1141 off64_t timescale_offset; 1142 1143 if (version == 1) { 1144 timescale_offset = data_offset + 4 + 16; 1145 } else if (version == 0) { 1146 timescale_offset = data_offset + 4 + 8; 1147 } else { 1148 return ERROR_IO; 1149 } 1150 1151 uint32_t timescale; 1152 if (mDataSource->readAt( 1153 timescale_offset, ×cale, sizeof(timescale)) 1154 < (ssize_t)sizeof(timescale)) { 1155 return ERROR_IO; 1156 } 1157 1158 mLastTrack->timescale = ntohl(timescale); 1159 1160 int64_t duration = 0; 1161 if (version == 1) { 1162 if (mDataSource->readAt( 1163 timescale_offset + 4, &duration, sizeof(duration)) 1164 < (ssize_t)sizeof(duration)) { 1165 return ERROR_IO; 1166 } 1167 duration = ntoh64(duration); 1168 } else { 1169 uint32_t duration32; 1170 if (mDataSource->readAt( 1171 timescale_offset + 4, &duration32, sizeof(duration32)) 1172 < (ssize_t)sizeof(duration32)) { 1173 return ERROR_IO; 1174 } 1175 // ffmpeg sets duration to -1, which is incorrect. 1176 if (duration32 != 0xffffffff) { 1177 duration = ntohl(duration32); 1178 } 1179 } 1180 mLastTrack->meta->setInt64( 1181 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1182 1183 uint8_t lang[2]; 1184 off64_t lang_offset; 1185 if (version == 1) { 1186 lang_offset = timescale_offset + 4 + 8; 1187 } else if (version == 0) { 1188 lang_offset = timescale_offset + 4 + 4; 1189 } else { 1190 return ERROR_IO; 1191 } 1192 1193 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1194 < (ssize_t)sizeof(lang)) { 1195 return ERROR_IO; 1196 } 1197 1198 // To get the ISO-639-2/T three character language code 1199 // 1 bit pad followed by 3 5-bits characters. Each character 1200 // is packed as the difference between its ASCII value and 0x60. 1201 char lang_code[4]; 1202 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1203 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1204 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1205 lang_code[3] = '\0'; 1206 1207 mLastTrack->meta->setCString( 1208 kKeyMediaLanguage, lang_code); 1209 1210 break; 1211 } 1212 1213 case FOURCC('s', 't', 's', 'd'): 1214 { 1215 if (chunk_data_size < 8) { 1216 return ERROR_MALFORMED; 1217 } 1218 1219 uint8_t buffer[8]; 1220 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1221 return ERROR_MALFORMED; 1222 } 1223 1224 if (mDataSource->readAt( 1225 data_offset, buffer, 8) < 8) { 1226 return ERROR_IO; 1227 } 1228 1229 if (U32_AT(buffer) != 0) { 1230 // Should be version 0, flags 0. 1231 return ERROR_MALFORMED; 1232 } 1233 1234 uint32_t entry_count = U32_AT(&buffer[4]); 1235 1236 if (entry_count > 1) { 1237 // For 3GPP timed text, there could be multiple tx3g boxes contain 1238 // multiple text display formats. These formats will be used to 1239 // display the timed text. 1240 // For encrypted files, there may also be more than one entry. 1241 const char *mime; 1242 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1243 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1244 strcasecmp(mime, "application/octet-stream")) { 1245 // For now we only support a single type of media per track. 1246 mLastTrack->skipTrack = true; 1247 *offset += chunk_size; 1248 break; 1249 } 1250 } 1251 off64_t stop_offset = *offset + chunk_size; 1252 *offset = data_offset + 8; 1253 for (uint32_t i = 0; i < entry_count; ++i) { 1254 status_t err = parseChunk(offset, depth + 1); 1255 if (err != OK) { 1256 return err; 1257 } 1258 } 1259 1260 if (*offset != stop_offset) { 1261 return ERROR_MALFORMED; 1262 } 1263 break; 1264 } 1265 1266 case FOURCC('m', 'p', '4', 'a'): 1267 case FOURCC('e', 'n', 'c', 'a'): 1268 case FOURCC('s', 'a', 'm', 'r'): 1269 case FOURCC('s', 'a', 'w', 'b'): 1270 { 1271 uint8_t buffer[8 + 20]; 1272 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1273 // Basic AudioSampleEntry size. 1274 return ERROR_MALFORMED; 1275 } 1276 1277 if (mDataSource->readAt( 1278 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1279 return ERROR_IO; 1280 } 1281 1282 uint16_t data_ref_index = U16_AT(&buffer[6]); 1283 uint32_t num_channels = U16_AT(&buffer[16]); 1284 1285 uint16_t sample_size = U16_AT(&buffer[18]); 1286 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1287 1288 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1289 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1290 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1291 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1292 } 1293 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1294 chunk, num_channels, sample_size, sample_rate); 1295 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1296 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1297 1298 off64_t stop_offset = *offset + chunk_size; 1299 *offset = data_offset + sizeof(buffer); 1300 while (*offset < stop_offset) { 1301 status_t err = parseChunk(offset, depth + 1); 1302 if (err != OK) { 1303 return err; 1304 } 1305 } 1306 1307 if (*offset != stop_offset) { 1308 return ERROR_MALFORMED; 1309 } 1310 break; 1311 } 1312 1313 case FOURCC('m', 'p', '4', 'v'): 1314 case FOURCC('e', 'n', 'c', 'v'): 1315 case FOURCC('s', '2', '6', '3'): 1316 case FOURCC('H', '2', '6', '3'): 1317 case FOURCC('h', '2', '6', '3'): 1318 case FOURCC('a', 'v', 'c', '1'): 1319 case FOURCC('h', 'v', 'c', '1'): 1320 case FOURCC('h', 'e', 'v', '1'): 1321 { 1322 mHasVideo = true; 1323 1324 uint8_t buffer[78]; 1325 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1326 // Basic VideoSampleEntry size. 1327 return ERROR_MALFORMED; 1328 } 1329 1330 if (mDataSource->readAt( 1331 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1332 return ERROR_IO; 1333 } 1334 1335 uint16_t data_ref_index = U16_AT(&buffer[6]); 1336 uint16_t width = U16_AT(&buffer[6 + 18]); 1337 uint16_t height = U16_AT(&buffer[6 + 20]); 1338 1339 // The video sample is not standard-compliant if it has invalid dimension. 1340 // Use some default width and height value, and 1341 // let the decoder figure out the actual width and height (and thus 1342 // be prepared for INFO_FOMRAT_CHANGED event). 1343 if (width == 0) width = 352; 1344 if (height == 0) height = 288; 1345 1346 // printf("*** coding='%s' width=%d height=%d\n", 1347 // chunk, width, height); 1348 1349 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1350 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1351 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1352 } 1353 mLastTrack->meta->setInt32(kKeyWidth, width); 1354 mLastTrack->meta->setInt32(kKeyHeight, height); 1355 1356 off64_t stop_offset = *offset + chunk_size; 1357 *offset = data_offset + sizeof(buffer); 1358 while (*offset < stop_offset) { 1359 status_t err = parseChunk(offset, depth + 1); 1360 if (err != OK) { 1361 return err; 1362 } 1363 } 1364 1365 if (*offset != stop_offset) { 1366 return ERROR_MALFORMED; 1367 } 1368 break; 1369 } 1370 1371 case FOURCC('s', 't', 'c', 'o'): 1372 case FOURCC('c', 'o', '6', '4'): 1373 { 1374 status_t err = 1375 mLastTrack->sampleTable->setChunkOffsetParams( 1376 chunk_type, data_offset, chunk_data_size); 1377 1378 *offset += chunk_size; 1379 1380 if (err != OK) { 1381 return err; 1382 } 1383 1384 break; 1385 } 1386 1387 case FOURCC('s', 't', 's', 'c'): 1388 { 1389 status_t err = 1390 mLastTrack->sampleTable->setSampleToChunkParams( 1391 data_offset, chunk_data_size); 1392 1393 *offset += chunk_size; 1394 1395 if (err != OK) { 1396 return err; 1397 } 1398 1399 break; 1400 } 1401 1402 case FOURCC('s', 't', 's', 'z'): 1403 case FOURCC('s', 't', 'z', '2'): 1404 { 1405 status_t err = 1406 mLastTrack->sampleTable->setSampleSizeParams( 1407 chunk_type, data_offset, chunk_data_size); 1408 1409 *offset += chunk_size; 1410 1411 if (err != OK) { 1412 return err; 1413 } 1414 1415 size_t max_size; 1416 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1417 1418 if (err != OK) { 1419 return err; 1420 } 1421 1422 if (max_size != 0) { 1423 // Assume that a given buffer only contains at most 10 chunks, 1424 // each chunk originally prefixed with a 2 byte length will 1425 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1426 // and thus will grow by 2 bytes per chunk. 1427 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1428 } else { 1429 // No size was specified. Pick a conservatively large size. 1430 int32_t width, height; 1431 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1432 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1433 ALOGE("No width or height, assuming worst case 1080p"); 1434 width = 1920; 1435 height = 1080; 1436 } 1437 1438 const char *mime; 1439 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1440 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1441 // AVC requires compression ratio of at least 2, and uses 1442 // macroblocks 1443 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1444 } else { 1445 // For all other formats there is no minimum compression 1446 // ratio. Use compression ratio of 1. 1447 max_size = width * height * 3 / 2; 1448 } 1449 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1450 } 1451 1452 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1453 // mimetype) previously obtained, so don't cache them. 1454 const char *mime; 1455 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1456 // Calculate average frame rate. 1457 if (!strncasecmp("video/", mime, 6)) { 1458 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1459 int64_t durationUs; 1460 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1461 if (durationUs > 0) { 1462 int32_t frameRate = (nSamples * 1000000LL + 1463 (durationUs >> 1)) / durationUs; 1464 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1465 } 1466 } 1467 } 1468 1469 break; 1470 } 1471 1472 case FOURCC('s', 't', 't', 's'): 1473 { 1474 *offset += chunk_size; 1475 1476 status_t err = 1477 mLastTrack->sampleTable->setTimeToSampleParams( 1478 data_offset, chunk_data_size); 1479 1480 if (err != OK) { 1481 return err; 1482 } 1483 1484 break; 1485 } 1486 1487 case FOURCC('c', 't', 't', 's'): 1488 { 1489 *offset += chunk_size; 1490 1491 status_t err = 1492 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1493 data_offset, chunk_data_size); 1494 1495 if (err != OK) { 1496 return err; 1497 } 1498 1499 break; 1500 } 1501 1502 case FOURCC('s', 't', 's', 's'): 1503 { 1504 *offset += chunk_size; 1505 1506 status_t err = 1507 mLastTrack->sampleTable->setSyncSampleParams( 1508 data_offset, chunk_data_size); 1509 1510 if (err != OK) { 1511 return err; 1512 } 1513 1514 break; 1515 } 1516 1517 // @xyz 1518 case FOURCC('\xA9', 'x', 'y', 'z'): 1519 { 1520 *offset += chunk_size; 1521 1522 // Best case the total data length inside "@xyz" box 1523 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1524 // where "\x00\x04" is the text string length with value = 4, 1525 // "\0x15\xc7" is the language code = en, and "0+0" is a 1526 // location (string) value with longitude = 0 and latitude = 0. 1527 if (chunk_data_size < 8) { 1528 return ERROR_MALFORMED; 1529 } 1530 1531 // Worst case the location string length would be 18, 1532 // for instance +90.0000-180.0000, without the trailing "/" and 1533 // the string length + language code. 1534 char buffer[18]; 1535 1536 // Substracting 5 from the data size is because the text string length + 1537 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1538 off64_t location_length = chunk_data_size - 5; 1539 if (location_length >= (off64_t) sizeof(buffer)) { 1540 return ERROR_MALFORMED; 1541 } 1542 1543 if (mDataSource->readAt( 1544 data_offset + 4, buffer, location_length) < location_length) { 1545 return ERROR_IO; 1546 } 1547 1548 buffer[location_length] = '\0'; 1549 mFileMetaData->setCString(kKeyLocation, buffer); 1550 break; 1551 } 1552 1553 case FOURCC('e', 's', 'd', 's'): 1554 { 1555 *offset += chunk_size; 1556 1557 if (chunk_data_size < 4) { 1558 return ERROR_MALFORMED; 1559 } 1560 1561 uint8_t buffer[256]; 1562 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1563 return ERROR_BUFFER_TOO_SMALL; 1564 } 1565 1566 if (mDataSource->readAt( 1567 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1568 return ERROR_IO; 1569 } 1570 1571 if (U32_AT(buffer) != 0) { 1572 // Should be version 0, flags 0. 1573 return ERROR_MALFORMED; 1574 } 1575 1576 mLastTrack->meta->setData( 1577 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1578 1579 if (mPath.size() >= 2 1580 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1581 // Information from the ESDS must be relied on for proper 1582 // setup of sample rate and channel count for MPEG4 Audio. 1583 // The generic header appears to only contain generic 1584 // information... 1585 1586 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1587 &buffer[4], chunk_data_size - 4); 1588 1589 if (err != OK) { 1590 return err; 1591 } 1592 } 1593 1594 break; 1595 } 1596 1597 case FOURCC('a', 'v', 'c', 'C'): 1598 { 1599 *offset += chunk_size; 1600 1601 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1602 1603 if (mDataSource->readAt( 1604 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1605 return ERROR_IO; 1606 } 1607 1608 mLastTrack->meta->setData( 1609 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1610 1611 break; 1612 } 1613 case FOURCC('h', 'v', 'c', 'C'): 1614 { 1615 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1616 1617 if (mDataSource->readAt( 1618 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1619 return ERROR_IO; 1620 } 1621 1622 mLastTrack->meta->setData( 1623 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size); 1624 1625 *offset += chunk_size; 1626 break; 1627 } 1628 1629 case FOURCC('d', '2', '6', '3'): 1630 { 1631 *offset += chunk_size; 1632 /* 1633 * d263 contains a fixed 7 bytes part: 1634 * vendor - 4 bytes 1635 * version - 1 byte 1636 * level - 1 byte 1637 * profile - 1 byte 1638 * optionally, "d263" box itself may contain a 16-byte 1639 * bit rate box (bitr) 1640 * average bit rate - 4 bytes 1641 * max bit rate - 4 bytes 1642 */ 1643 char buffer[23]; 1644 if (chunk_data_size != 7 && 1645 chunk_data_size != 23) { 1646 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1647 return ERROR_MALFORMED; 1648 } 1649 1650 if (mDataSource->readAt( 1651 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1652 return ERROR_IO; 1653 } 1654 1655 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1656 1657 break; 1658 } 1659 1660 case FOURCC('m', 'e', 't', 'a'): 1661 { 1662 uint8_t buffer[4]; 1663 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1664 *offset += chunk_size; 1665 return ERROR_MALFORMED; 1666 } 1667 1668 if (mDataSource->readAt( 1669 data_offset, buffer, 4) < 4) { 1670 *offset += chunk_size; 1671 return ERROR_IO; 1672 } 1673 1674 if (U32_AT(buffer) != 0) { 1675 // Should be version 0, flags 0. 1676 1677 // If it's not, let's assume this is one of those 1678 // apparently malformed chunks that don't have flags 1679 // and completely different semantics than what's 1680 // in the MPEG4 specs and skip it. 1681 *offset += chunk_size; 1682 return OK; 1683 } 1684 1685 off64_t stop_offset = *offset + chunk_size; 1686 *offset = data_offset + sizeof(buffer); 1687 while (*offset < stop_offset) { 1688 status_t err = parseChunk(offset, depth + 1); 1689 if (err != OK) { 1690 return err; 1691 } 1692 } 1693 1694 if (*offset != stop_offset) { 1695 return ERROR_MALFORMED; 1696 } 1697 break; 1698 } 1699 1700 case FOURCC('m', 'e', 'a', 'n'): 1701 case FOURCC('n', 'a', 'm', 'e'): 1702 case FOURCC('d', 'a', 't', 'a'): 1703 { 1704 *offset += chunk_size; 1705 1706 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1707 status_t err = parseITunesMetaData(data_offset, chunk_data_size); 1708 1709 if (err != OK) { 1710 return err; 1711 } 1712 } 1713 1714 break; 1715 } 1716 1717 case FOURCC('m', 'v', 'h', 'd'): 1718 { 1719 *offset += chunk_size; 1720 1721 if (chunk_data_size < 24) { 1722 return ERROR_MALFORMED; 1723 } 1724 1725 uint8_t header[24]; 1726 if (mDataSource->readAt( 1727 data_offset, header, sizeof(header)) 1728 < (ssize_t)sizeof(header)) { 1729 return ERROR_IO; 1730 } 1731 1732 uint64_t creationTime; 1733 if (header[0] == 1) { 1734 creationTime = U64_AT(&header[4]); 1735 mHeaderTimescale = U32_AT(&header[20]); 1736 } else if (header[0] != 0) { 1737 return ERROR_MALFORMED; 1738 } else { 1739 creationTime = U32_AT(&header[4]); 1740 mHeaderTimescale = U32_AT(&header[12]); 1741 } 1742 1743 String8 s; 1744 convertTimeToDate(creationTime, &s); 1745 1746 mFileMetaData->setCString(kKeyDate, s.string()); 1747 1748 break; 1749 } 1750 1751 case FOURCC('m', 'd', 'a', 't'): 1752 { 1753 ALOGV("mdat chunk, drm: %d", mIsDrm); 1754 if (!mIsDrm) { 1755 *offset += chunk_size; 1756 break; 1757 } 1758 1759 if (chunk_size < 8) { 1760 return ERROR_MALFORMED; 1761 } 1762 1763 return parseDrmSINF(offset, data_offset); 1764 } 1765 1766 case FOURCC('h', 'd', 'l', 'r'): 1767 { 1768 *offset += chunk_size; 1769 1770 uint32_t buffer; 1771 if (mDataSource->readAt( 1772 data_offset + 8, &buffer, 4) < 4) { 1773 return ERROR_IO; 1774 } 1775 1776 uint32_t type = ntohl(buffer); 1777 // For the 3GPP file format, the handler-type within the 'hdlr' box 1778 // shall be 'text'. We also want to support 'sbtl' handler type 1779 // for a practical reason as various MPEG4 containers use it. 1780 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1781 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1782 } 1783 1784 break; 1785 } 1786 1787 case FOURCC('t', 'x', '3', 'g'): 1788 { 1789 uint32_t type; 1790 const void *data; 1791 size_t size = 0; 1792 if (!mLastTrack->meta->findData( 1793 kKeyTextFormatData, &type, &data, &size)) { 1794 size = 0; 1795 } 1796 1797 uint8_t *buffer = new uint8_t[size + chunk_size]; 1798 1799 if (size > 0) { 1800 memcpy(buffer, data, size); 1801 } 1802 1803 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1804 < chunk_size) { 1805 delete[] buffer; 1806 buffer = NULL; 1807 1808 // advance read pointer so we don't end up reading this again 1809 *offset += chunk_size; 1810 return ERROR_IO; 1811 } 1812 1813 mLastTrack->meta->setData( 1814 kKeyTextFormatData, 0, buffer, size + chunk_size); 1815 1816 delete[] buffer; 1817 1818 *offset += chunk_size; 1819 break; 1820 } 1821 1822 case FOURCC('c', 'o', 'v', 'r'): 1823 { 1824 *offset += chunk_size; 1825 1826 if (mFileMetaData != NULL) { 1827 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1828 chunk_data_size, data_offset); 1829 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1830 if (mDataSource->readAt( 1831 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1832 return ERROR_IO; 1833 } 1834 const int kSkipBytesOfDataBox = 16; 1835 mFileMetaData->setData( 1836 kKeyAlbumArt, MetaData::TYPE_NONE, 1837 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1838 } 1839 1840 break; 1841 } 1842 1843 case FOURCC('t', 'i', 't', 'l'): 1844 case FOURCC('p', 'e', 'r', 'f'): 1845 case FOURCC('a', 'u', 't', 'h'): 1846 case FOURCC('g', 'n', 'r', 'e'): 1847 case FOURCC('a', 'l', 'b', 'm'): 1848 case FOURCC('y', 'r', 'r', 'c'): 1849 { 1850 *offset += chunk_size; 1851 1852 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth); 1853 1854 if (err != OK) { 1855 return err; 1856 } 1857 1858 break; 1859 } 1860 1861 case FOURCC('I', 'D', '3', '2'): 1862 { 1863 *offset += chunk_size; 1864 1865 if (chunk_data_size < 6) { 1866 return ERROR_MALFORMED; 1867 } 1868 1869 parseID3v2MetaData(data_offset + 6); 1870 1871 break; 1872 } 1873 1874 case FOURCC('-', '-', '-', '-'): 1875 { 1876 mLastCommentMean.clear(); 1877 mLastCommentName.clear(); 1878 mLastCommentData.clear(); 1879 *offset += chunk_size; 1880 break; 1881 } 1882 1883 case FOURCC('s', 'i', 'd', 'x'): 1884 { 1885 parseSegmentIndex(data_offset, chunk_data_size); 1886 *offset += chunk_size; 1887 return UNKNOWN_ERROR; // stop parsing after sidx 1888 } 1889 1890 default: 1891 { 1892 *offset += chunk_size; 1893 break; 1894 } 1895 } 1896 1897 return OK; 1898} 1899 1900status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1901 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1902 1903 if (size < 12) { 1904 return -EINVAL; 1905 } 1906 1907 uint32_t flags; 1908 if (!mDataSource->getUInt32(offset, &flags)) { 1909 return ERROR_MALFORMED; 1910 } 1911 1912 uint32_t version = flags >> 24; 1913 flags &= 0xffffff; 1914 1915 ALOGV("sidx version %d", version); 1916 1917 uint32_t referenceId; 1918 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1919 return ERROR_MALFORMED; 1920 } 1921 1922 uint32_t timeScale; 1923 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1924 return ERROR_MALFORMED; 1925 } 1926 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1927 1928 uint64_t earliestPresentationTime; 1929 uint64_t firstOffset; 1930 1931 offset += 12; 1932 size -= 12; 1933 1934 if (version == 0) { 1935 if (size < 8) { 1936 return -EINVAL; 1937 } 1938 uint32_t tmp; 1939 if (!mDataSource->getUInt32(offset, &tmp)) { 1940 return ERROR_MALFORMED; 1941 } 1942 earliestPresentationTime = tmp; 1943 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1944 return ERROR_MALFORMED; 1945 } 1946 firstOffset = tmp; 1947 offset += 8; 1948 size -= 8; 1949 } else { 1950 if (size < 16) { 1951 return -EINVAL; 1952 } 1953 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1954 return ERROR_MALFORMED; 1955 } 1956 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1957 return ERROR_MALFORMED; 1958 } 1959 offset += 16; 1960 size -= 16; 1961 } 1962 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1963 1964 if (size < 4) { 1965 return -EINVAL; 1966 } 1967 1968 uint16_t referenceCount; 1969 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1970 return ERROR_MALFORMED; 1971 } 1972 offset += 4; 1973 size -= 4; 1974 ALOGV("refcount: %d", referenceCount); 1975 1976 if (size < referenceCount * 12) { 1977 return -EINVAL; 1978 } 1979 1980 uint64_t total_duration = 0; 1981 for (unsigned int i = 0; i < referenceCount; i++) { 1982 uint32_t d1, d2, d3; 1983 1984 if (!mDataSource->getUInt32(offset, &d1) || // size 1985 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1986 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1987 return ERROR_MALFORMED; 1988 } 1989 1990 if (d1 & 0x80000000) { 1991 ALOGW("sub-sidx boxes not supported yet"); 1992 } 1993 bool sap = d3 & 0x80000000; 1994 uint32_t saptype = (d3 >> 28) & 7; 1995 if (!sap || (saptype != 1 && saptype != 2)) { 1996 // type 1 and 2 are sync samples 1997 ALOGW("not a stream access point, or unsupported type: %08x", d3); 1998 } 1999 total_duration += d2; 2000 offset += 12; 2001 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 2002 SidxEntry se; 2003 se.mSize = d1 & 0x7fffffff; 2004 se.mDurationUs = 1000000LL * d2 / timeScale; 2005 mSidxEntries.add(se); 2006 } 2007 2008 mSidxDuration = total_duration * 1000000 / timeScale; 2009 ALOGV("duration: %lld", mSidxDuration); 2010 2011 int64_t metaDuration; 2012 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 2013 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 2014 } 2015 return OK; 2016} 2017 2018 2019 2020status_t MPEG4Extractor::parseTrackHeader( 2021 off64_t data_offset, off64_t data_size) { 2022 if (data_size < 4) { 2023 return ERROR_MALFORMED; 2024 } 2025 2026 uint8_t version; 2027 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 2028 return ERROR_IO; 2029 } 2030 2031 size_t dynSize = (version == 1) ? 36 : 24; 2032 2033 uint8_t buffer[36 + 60]; 2034 2035 if (data_size != (off64_t)dynSize + 60) { 2036 return ERROR_MALFORMED; 2037 } 2038 2039 if (mDataSource->readAt( 2040 data_offset, buffer, data_size) < (ssize_t)data_size) { 2041 return ERROR_IO; 2042 } 2043 2044 uint64_t ctime, mtime, duration; 2045 int32_t id; 2046 2047 if (version == 1) { 2048 ctime = U64_AT(&buffer[4]); 2049 mtime = U64_AT(&buffer[12]); 2050 id = U32_AT(&buffer[20]); 2051 duration = U64_AT(&buffer[28]); 2052 } else if (version == 0) { 2053 ctime = U32_AT(&buffer[4]); 2054 mtime = U32_AT(&buffer[8]); 2055 id = U32_AT(&buffer[12]); 2056 duration = U32_AT(&buffer[20]); 2057 } else { 2058 return ERROR_UNSUPPORTED; 2059 } 2060 2061 mLastTrack->meta->setInt32(kKeyTrackID, id); 2062 2063 size_t matrixOffset = dynSize + 16; 2064 int32_t a00 = U32_AT(&buffer[matrixOffset]); 2065 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 2066 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 2067 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 2068 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 2069 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 2070 2071#if 0 2072 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 2073 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 2074 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 2075 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 2076#endif 2077 2078 uint32_t rotationDegrees; 2079 2080 static const int32_t kFixedOne = 0x10000; 2081 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 2082 // Identity, no rotation 2083 rotationDegrees = 0; 2084 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 2085 rotationDegrees = 90; 2086 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 2087 rotationDegrees = 270; 2088 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 2089 rotationDegrees = 180; 2090 } else { 2091 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 2092 rotationDegrees = 0; 2093 } 2094 2095 if (rotationDegrees != 0) { 2096 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 2097 } 2098 2099 // Handle presentation display size, which could be different 2100 // from the image size indicated by kKeyWidth and kKeyHeight. 2101 uint32_t width = U32_AT(&buffer[dynSize + 52]); 2102 uint32_t height = U32_AT(&buffer[dynSize + 56]); 2103 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 2104 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2105 2106 return OK; 2107} 2108 2109status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) { 2110 if (size < 4) { 2111 return ERROR_MALFORMED; 2112 } 2113 2114 uint8_t *buffer = new uint8_t[size + 1]; 2115 if (mDataSource->readAt( 2116 offset, buffer, size) != (ssize_t)size) { 2117 delete[] buffer; 2118 buffer = NULL; 2119 2120 return ERROR_IO; 2121 } 2122 2123 uint32_t flags = U32_AT(buffer); 2124 2125 uint32_t metadataKey = 0; 2126 char chunk[5]; 2127 MakeFourCCString(mPath[4], chunk); 2128 ALOGV("meta: %s @ %lld", chunk, offset); 2129 switch (mPath[4]) { 2130 case FOURCC(0xa9, 'a', 'l', 'b'): 2131 { 2132 metadataKey = kKeyAlbum; 2133 break; 2134 } 2135 case FOURCC(0xa9, 'A', 'R', 'T'): 2136 { 2137 metadataKey = kKeyArtist; 2138 break; 2139 } 2140 case FOURCC('a', 'A', 'R', 'T'): 2141 { 2142 metadataKey = kKeyAlbumArtist; 2143 break; 2144 } 2145 case FOURCC(0xa9, 'd', 'a', 'y'): 2146 { 2147 metadataKey = kKeyYear; 2148 break; 2149 } 2150 case FOURCC(0xa9, 'n', 'a', 'm'): 2151 { 2152 metadataKey = kKeyTitle; 2153 break; 2154 } 2155 case FOURCC(0xa9, 'w', 'r', 't'): 2156 { 2157 metadataKey = kKeyWriter; 2158 break; 2159 } 2160 case FOURCC('c', 'o', 'v', 'r'): 2161 { 2162 metadataKey = kKeyAlbumArt; 2163 break; 2164 } 2165 case FOURCC('g', 'n', 'r', 'e'): 2166 { 2167 metadataKey = kKeyGenre; 2168 break; 2169 } 2170 case FOURCC(0xa9, 'g', 'e', 'n'): 2171 { 2172 metadataKey = kKeyGenre; 2173 break; 2174 } 2175 case FOURCC('c', 'p', 'i', 'l'): 2176 { 2177 if (size == 9 && flags == 21) { 2178 char tmp[16]; 2179 sprintf(tmp, "%d", 2180 (int)buffer[size - 1]); 2181 2182 mFileMetaData->setCString(kKeyCompilation, tmp); 2183 } 2184 break; 2185 } 2186 case FOURCC('t', 'r', 'k', 'n'): 2187 { 2188 if (size == 16 && flags == 0) { 2189 char tmp[16]; 2190 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2191 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2192 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2193 2194 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2195 } 2196 break; 2197 } 2198 case FOURCC('d', 'i', 's', 'k'): 2199 { 2200 if ((size == 14 || size == 16) && flags == 0) { 2201 char tmp[16]; 2202 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2203 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2204 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2205 2206 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2207 } 2208 break; 2209 } 2210 case FOURCC('-', '-', '-', '-'): 2211 { 2212 buffer[size] = '\0'; 2213 switch (mPath[5]) { 2214 case FOURCC('m', 'e', 'a', 'n'): 2215 mLastCommentMean.setTo((const char *)buffer + 4); 2216 break; 2217 case FOURCC('n', 'a', 'm', 'e'): 2218 mLastCommentName.setTo((const char *)buffer + 4); 2219 break; 2220 case FOURCC('d', 'a', 't', 'a'): 2221 mLastCommentData.setTo((const char *)buffer + 8); 2222 break; 2223 } 2224 2225 // Once we have a set of mean/name/data info, go ahead and process 2226 // it to see if its something we are interested in. Whether or not 2227 // were are interested in the specific tag, make sure to clear out 2228 // the set so we can be ready to process another tuple should one 2229 // show up later in the file. 2230 if ((mLastCommentMean.length() != 0) && 2231 (mLastCommentName.length() != 0) && 2232 (mLastCommentData.length() != 0)) { 2233 2234 if (mLastCommentMean == "com.apple.iTunes" 2235 && mLastCommentName == "iTunSMPB") { 2236 int32_t delay, padding; 2237 if (sscanf(mLastCommentData, 2238 " %*x %x %x %*x", &delay, &padding) == 2) { 2239 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2240 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2241 } 2242 } 2243 2244 mLastCommentMean.clear(); 2245 mLastCommentName.clear(); 2246 mLastCommentData.clear(); 2247 } 2248 break; 2249 } 2250 2251 default: 2252 break; 2253 } 2254 2255 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) { 2256 if (metadataKey == kKeyAlbumArt) { 2257 mFileMetaData->setData( 2258 kKeyAlbumArt, MetaData::TYPE_NONE, 2259 buffer + 8, size - 8); 2260 } else if (metadataKey == kKeyGenre) { 2261 if (flags == 0) { 2262 // uint8_t genre code, iTunes genre codes are 2263 // the standard id3 codes, except they start 2264 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2265 // We use standard id3 numbering, so subtract 1. 2266 int genrecode = (int)buffer[size - 1]; 2267 genrecode--; 2268 if (genrecode < 0) { 2269 genrecode = 255; // reserved for 'unknown genre' 2270 } 2271 char genre[10]; 2272 sprintf(genre, "%d", genrecode); 2273 2274 mFileMetaData->setCString(metadataKey, genre); 2275 } else if (flags == 1) { 2276 // custom genre string 2277 buffer[size] = '\0'; 2278 2279 mFileMetaData->setCString( 2280 metadataKey, (const char *)buffer + 8); 2281 } 2282 } else { 2283 buffer[size] = '\0'; 2284 2285 mFileMetaData->setCString( 2286 metadataKey, (const char *)buffer + 8); 2287 } 2288 } 2289 2290 delete[] buffer; 2291 buffer = NULL; 2292 2293 return OK; 2294} 2295 2296status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) { 2297 if (size < 4) { 2298 return ERROR_MALFORMED; 2299 } 2300 2301 uint8_t *buffer = new uint8_t[size]; 2302 if (mDataSource->readAt( 2303 offset, buffer, size) != (ssize_t)size) { 2304 delete[] buffer; 2305 buffer = NULL; 2306 2307 return ERROR_IO; 2308 } 2309 2310 uint32_t metadataKey = 0; 2311 switch (mPath[depth]) { 2312 case FOURCC('t', 'i', 't', 'l'): 2313 { 2314 metadataKey = kKeyTitle; 2315 break; 2316 } 2317 case FOURCC('p', 'e', 'r', 'f'): 2318 { 2319 metadataKey = kKeyArtist; 2320 break; 2321 } 2322 case FOURCC('a', 'u', 't', 'h'): 2323 { 2324 metadataKey = kKeyWriter; 2325 break; 2326 } 2327 case FOURCC('g', 'n', 'r', 'e'): 2328 { 2329 metadataKey = kKeyGenre; 2330 break; 2331 } 2332 case FOURCC('a', 'l', 'b', 'm'): 2333 { 2334 if (buffer[size - 1] != '\0') { 2335 char tmp[4]; 2336 sprintf(tmp, "%u", buffer[size - 1]); 2337 2338 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2339 } 2340 2341 metadataKey = kKeyAlbum; 2342 break; 2343 } 2344 case FOURCC('y', 'r', 'r', 'c'): 2345 { 2346 char tmp[5]; 2347 uint16_t year = U16_AT(&buffer[4]); 2348 2349 if (year < 10000) { 2350 sprintf(tmp, "%u", year); 2351 2352 mFileMetaData->setCString(kKeyYear, tmp); 2353 } 2354 break; 2355 } 2356 2357 default: 2358 break; 2359 } 2360 2361 if (metadataKey > 0) { 2362 bool isUTF8 = true; // Common case 2363 char16_t *framedata = NULL; 2364 int len16 = 0; // Number of UTF-16 characters 2365 2366 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00 2367 if (size - 6 >= 4) { 2368 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator 2369 framedata = (char16_t *)(buffer + 6); 2370 if (0xfffe == *framedata) { 2371 // endianness marker (BOM) doesn't match host endianness 2372 for (int i = 0; i < len16; i++) { 2373 framedata[i] = bswap_16(framedata[i]); 2374 } 2375 // BOM is now swapped to 0xfeff, we will execute next block too 2376 } 2377 2378 if (0xfeff == *framedata) { 2379 // Remove the BOM 2380 framedata++; 2381 len16--; 2382 isUTF8 = false; 2383 } 2384 // else normal non-zero-length UTF-8 string 2385 // we can't handle UTF-16 without BOM as there is no other 2386 // indication of encoding. 2387 } 2388 2389 if (isUTF8) { 2390 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6); 2391 } else { 2392 // Convert from UTF-16 string to UTF-8 string. 2393 String8 tmpUTF8str(framedata, len16); 2394 mFileMetaData->setCString(metadataKey, tmpUTF8str.string()); 2395 } 2396 } 2397 2398 delete[] buffer; 2399 buffer = NULL; 2400 2401 return OK; 2402} 2403 2404void MPEG4Extractor::parseID3v2MetaData(off64_t offset) { 2405 ID3 id3(mDataSource, true /* ignorev1 */, offset); 2406 2407 if (id3.isValid()) { 2408 struct Map { 2409 int key; 2410 const char *tag1; 2411 const char *tag2; 2412 }; 2413 static const Map kMap[] = { 2414 { kKeyAlbum, "TALB", "TAL" }, 2415 { kKeyArtist, "TPE1", "TP1" }, 2416 { kKeyAlbumArtist, "TPE2", "TP2" }, 2417 { kKeyComposer, "TCOM", "TCM" }, 2418 { kKeyGenre, "TCON", "TCO" }, 2419 { kKeyTitle, "TIT2", "TT2" }, 2420 { kKeyYear, "TYE", "TYER" }, 2421 { kKeyAuthor, "TXT", "TEXT" }, 2422 { kKeyCDTrackNumber, "TRK", "TRCK" }, 2423 { kKeyDiscNumber, "TPA", "TPOS" }, 2424 { kKeyCompilation, "TCP", "TCMP" }, 2425 }; 2426 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]); 2427 2428 for (size_t i = 0; i < kNumMapEntries; ++i) { 2429 if (!mFileMetaData->hasData(kMap[i].key)) { 2430 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1); 2431 if (it->done()) { 2432 delete it; 2433 it = new ID3::Iterator(id3, kMap[i].tag2); 2434 } 2435 2436 if (it->done()) { 2437 delete it; 2438 continue; 2439 } 2440 2441 String8 s; 2442 it->getString(&s); 2443 delete it; 2444 2445 mFileMetaData->setCString(kMap[i].key, s); 2446 } 2447 } 2448 2449 size_t dataSize; 2450 String8 mime; 2451 const void *data = id3.getAlbumArt(&dataSize, &mime); 2452 2453 if (data) { 2454 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize); 2455 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string()); 2456 } 2457 } 2458} 2459 2460sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2461 status_t err; 2462 if ((err = readMetaData()) != OK) { 2463 return NULL; 2464 } 2465 2466 Track *track = mFirstTrack; 2467 while (index > 0) { 2468 if (track == NULL) { 2469 return NULL; 2470 } 2471 2472 track = track->next; 2473 --index; 2474 } 2475 2476 if (track == NULL) { 2477 return NULL; 2478 } 2479 2480 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2481 2482 return new MPEG4Source( 2483 track->meta, mDataSource, track->timescale, track->sampleTable, 2484 mSidxEntries, mMoofOffset); 2485} 2486 2487// static 2488status_t MPEG4Extractor::verifyTrack(Track *track) { 2489 const char *mime; 2490 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2491 2492 uint32_t type; 2493 const void *data; 2494 size_t size; 2495 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2496 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2497 || type != kTypeAVCC) { 2498 return ERROR_MALFORMED; 2499 } 2500 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) { 2501 if (!track->meta->findData(kKeyHVCC, &type, &data, &size) 2502 || type != kTypeHVCC) { 2503 return ERROR_MALFORMED; 2504 } 2505 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2506 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2507 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2508 || type != kTypeESDS) { 2509 return ERROR_MALFORMED; 2510 } 2511 } 2512 2513 if (!track->sampleTable->isValid()) { 2514 // Make sure we have all the metadata we need. 2515 return ERROR_MALFORMED; 2516 } 2517 2518 return OK; 2519} 2520 2521typedef enum { 2522 //AOT_NONE = -1, 2523 //AOT_NULL_OBJECT = 0, 2524 //AOT_AAC_MAIN = 1, /**< Main profile */ 2525 AOT_AAC_LC = 2, /**< Low Complexity object */ 2526 //AOT_AAC_SSR = 3, 2527 //AOT_AAC_LTP = 4, 2528 AOT_SBR = 5, 2529 //AOT_AAC_SCAL = 6, 2530 //AOT_TWIN_VQ = 7, 2531 //AOT_CELP = 8, 2532 //AOT_HVXC = 9, 2533 //AOT_RSVD_10 = 10, /**< (reserved) */ 2534 //AOT_RSVD_11 = 11, /**< (reserved) */ 2535 //AOT_TTSI = 12, /**< TTSI Object */ 2536 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ 2537 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ 2538 //AOT_GEN_MIDI = 15, /**< General MIDI object */ 2539 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ 2540 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ 2541 //AOT_RSVD_18 = 18, /**< (reserved) */ 2542 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ 2543 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ 2544 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ 2545 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ 2546 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ 2547 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ 2548 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ 2549 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ 2550 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ 2551 //AOT_RSVD_28 = 28, /**< might become SSC */ 2552 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ 2553 //AOT_MPEGS = 30, /**< MPEG Surround */ 2554 2555 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ 2556 2557 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ 2558 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ 2559 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ 2560 //AOT_RSVD_35 = 35, /**< might become DST */ 2561 //AOT_RSVD_36 = 36, /**< might become ALS */ 2562 //AOT_AAC_SLS = 37, /**< AAC + SLS */ 2563 //AOT_SLS = 38, /**< SLS */ 2564 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ 2565 2566 //AOT_USAC = 42, /**< USAC */ 2567 //AOT_SAOC = 43, /**< SAOC */ 2568 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ 2569 2570 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ 2571} AUDIO_OBJECT_TYPE; 2572 2573status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2574 const void *esds_data, size_t esds_size) { 2575 ESDS esds(esds_data, esds_size); 2576 2577 uint8_t objectTypeIndication; 2578 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2579 return ERROR_MALFORMED; 2580 } 2581 2582 if (objectTypeIndication == 0xe1) { 2583 // This isn't MPEG4 audio at all, it's QCELP 14k... 2584 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2585 return OK; 2586 } 2587 2588 if (objectTypeIndication == 0x6b) { 2589 // The media subtype is MP3 audio 2590 // Our software MP3 audio decoder may not be able to handle 2591 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2592 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2593 return ERROR_UNSUPPORTED; 2594 } 2595 2596 const uint8_t *csd; 2597 size_t csd_size; 2598 if (esds.getCodecSpecificInfo( 2599 (const void **)&csd, &csd_size) != OK) { 2600 return ERROR_MALFORMED; 2601 } 2602 2603#if 0 2604 printf("ESD of size %d\n", csd_size); 2605 hexdump(csd, csd_size); 2606#endif 2607 2608 if (csd_size == 0) { 2609 // There's no further information, i.e. no codec specific data 2610 // Let's assume that the information provided in the mpeg4 headers 2611 // is accurate and hope for the best. 2612 2613 return OK; 2614 } 2615 2616 if (csd_size < 2) { 2617 return ERROR_MALFORMED; 2618 } 2619 2620 static uint32_t kSamplingRate[] = { 2621 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2622 16000, 12000, 11025, 8000, 7350 2623 }; 2624 2625 ABitReader br(csd, csd_size); 2626 uint32_t objectType = br.getBits(5); 2627 2628 if (objectType == 31) { // AAC-ELD => additional 6 bits 2629 objectType = 32 + br.getBits(6); 2630 } 2631 2632 //keep AOT type 2633 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2634 2635 uint32_t freqIndex = br.getBits(4); 2636 2637 int32_t sampleRate = 0; 2638 int32_t numChannels = 0; 2639 if (freqIndex == 15) { 2640 if (csd_size < 5) { 2641 return ERROR_MALFORMED; 2642 } 2643 sampleRate = br.getBits(24); 2644 numChannels = br.getBits(4); 2645 } else { 2646 numChannels = br.getBits(4); 2647 2648 if (freqIndex == 13 || freqIndex == 14) { 2649 return ERROR_MALFORMED; 2650 } 2651 2652 sampleRate = kSamplingRate[freqIndex]; 2653 } 2654 2655 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 2656 uint32_t extFreqIndex = br.getBits(4); 2657 int32_t extSampleRate; 2658 if (extFreqIndex == 15) { 2659 if (csd_size < 8) { 2660 return ERROR_MALFORMED; 2661 } 2662 extSampleRate = br.getBits(24); 2663 } else { 2664 if (extFreqIndex == 13 || extFreqIndex == 14) { 2665 return ERROR_MALFORMED; 2666 } 2667 extSampleRate = kSamplingRate[extFreqIndex]; 2668 } 2669 //TODO: save the extension sampling rate value in meta data => 2670 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2671 } 2672 2673 switch (numChannels) { 2674 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration 2675 case 0: 2676 case 1:// FC 2677 case 2:// FL FR 2678 case 3:// FC, FL FR 2679 case 4:// FC, FL FR, RC 2680 case 5:// FC, FL FR, SL SR 2681 case 6:// FC, FL FR, SL SR, LFE 2682 //numChannels already contains the right value 2683 break; 2684 case 11:// FC, FL FR, SL SR, RC, LFE 2685 numChannels = 7; 2686 break; 2687 case 7: // FC, FCL FCR, FL FR, SL SR, LFE 2688 case 12:// FC, FL FR, SL SR, RL RR, LFE 2689 case 14:// FC, FL FR, SL SR, LFE, FHL FHR 2690 numChannels = 8; 2691 break; 2692 default: 2693 return ERROR_UNSUPPORTED; 2694 } 2695 2696 { 2697 if (objectType == AOT_SBR || objectType == AOT_PS) { 2698 const int32_t extensionSamplingFrequency = br.getBits(4); 2699 objectType = br.getBits(5); 2700 2701 if (objectType == AOT_ESCAPE) { 2702 objectType = 32 + br.getBits(6); 2703 } 2704 } 2705 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || 2706 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || 2707 objectType == AOT_ER_BSAC) { 2708 const int32_t frameLengthFlag = br.getBits(1); 2709 2710 const int32_t dependsOnCoreCoder = br.getBits(1); 2711 2712 if (dependsOnCoreCoder ) { 2713 const int32_t coreCoderDelay = br.getBits(14); 2714 } 2715 2716 const int32_t extensionFlag = br.getBits(1); 2717 2718 if (numChannels == 0 ) { 2719 int32_t channelsEffectiveNum = 0; 2720 int32_t channelsNum = 0; 2721 const int32_t ElementInstanceTag = br.getBits(4); 2722 const int32_t Profile = br.getBits(2); 2723 const int32_t SamplingFrequencyIndex = br.getBits(4); 2724 const int32_t NumFrontChannelElements = br.getBits(4); 2725 const int32_t NumSideChannelElements = br.getBits(4); 2726 const int32_t NumBackChannelElements = br.getBits(4); 2727 const int32_t NumLfeChannelElements = br.getBits(2); 2728 const int32_t NumAssocDataElements = br.getBits(3); 2729 const int32_t NumValidCcElements = br.getBits(4); 2730 2731 const int32_t MonoMixdownPresent = br.getBits(1); 2732 if (MonoMixdownPresent != 0) { 2733 const int32_t MonoMixdownElementNumber = br.getBits(4); 2734 } 2735 2736 const int32_t StereoMixdownPresent = br.getBits(1); 2737 if (StereoMixdownPresent != 0) { 2738 const int32_t StereoMixdownElementNumber = br.getBits(4); 2739 } 2740 2741 const int32_t MatrixMixdownIndexPresent = br.getBits(1); 2742 if (MatrixMixdownIndexPresent != 0) { 2743 const int32_t MatrixMixdownIndex = br.getBits(2); 2744 const int32_t PseudoSurroundEnable = br.getBits(1); 2745 } 2746 2747 int i; 2748 for (i=0; i < NumFrontChannelElements; i++) { 2749 const int32_t FrontElementIsCpe = br.getBits(1); 2750 const int32_t FrontElementTagSelect = br.getBits(4); 2751 channelsNum += FrontElementIsCpe ? 2 : 1; 2752 } 2753 2754 for (i=0; i < NumSideChannelElements; i++) { 2755 const int32_t SideElementIsCpe = br.getBits(1); 2756 const int32_t SideElementTagSelect = br.getBits(4); 2757 channelsNum += SideElementIsCpe ? 2 : 1; 2758 } 2759 2760 for (i=0; i < NumBackChannelElements; i++) { 2761 const int32_t BackElementIsCpe = br.getBits(1); 2762 const int32_t BackElementTagSelect = br.getBits(4); 2763 channelsNum += BackElementIsCpe ? 2 : 1; 2764 } 2765 channelsEffectiveNum = channelsNum; 2766 2767 for (i=0; i < NumLfeChannelElements; i++) { 2768 const int32_t LfeElementTagSelect = br.getBits(4); 2769 channelsNum += 1; 2770 } 2771 ALOGV("mpeg4 audio channelsNum = %d", channelsNum); 2772 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); 2773 numChannels = channelsNum; 2774 } 2775 } 2776 } 2777 2778 if (numChannels == 0) { 2779 return ERROR_UNSUPPORTED; 2780 } 2781 2782 int32_t prevSampleRate; 2783 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2784 2785 if (prevSampleRate != sampleRate) { 2786 ALOGV("mpeg4 audio sample rate different from previous setting. " 2787 "was: %d, now: %d", prevSampleRate, sampleRate); 2788 } 2789 2790 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2791 2792 int32_t prevChannelCount; 2793 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2794 2795 if (prevChannelCount != numChannels) { 2796 ALOGV("mpeg4 audio channel count different from previous setting. " 2797 "was: %d, now: %d", prevChannelCount, numChannels); 2798 } 2799 2800 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2801 2802 return OK; 2803} 2804 2805//////////////////////////////////////////////////////////////////////////////// 2806 2807MPEG4Source::MPEG4Source( 2808 const sp<MetaData> &format, 2809 const sp<DataSource> &dataSource, 2810 int32_t timeScale, 2811 const sp<SampleTable> &sampleTable, 2812 Vector<SidxEntry> &sidx, 2813 off64_t firstMoofOffset) 2814 : mFormat(format), 2815 mDataSource(dataSource), 2816 mTimescale(timeScale), 2817 mSampleTable(sampleTable), 2818 mCurrentSampleIndex(0), 2819 mCurrentFragmentIndex(0), 2820 mSegments(sidx), 2821 mFirstMoofOffset(firstMoofOffset), 2822 mCurrentMoofOffset(firstMoofOffset), 2823 mCurrentTime(0), 2824 mCurrentSampleInfoAllocSize(0), 2825 mCurrentSampleInfoSizes(NULL), 2826 mCurrentSampleInfoOffsetsAllocSize(0), 2827 mCurrentSampleInfoOffsets(NULL), 2828 mIsAVC(false), 2829 mIsHEVC(false), 2830 mNALLengthSize(0), 2831 mStarted(false), 2832 mGroup(NULL), 2833 mBuffer(NULL), 2834 mWantsNALFragments(false), 2835 mSrcBuffer(NULL) { 2836 2837 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2838 mDefaultIVSize = 0; 2839 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2840 uint32_t keytype; 2841 const void *key; 2842 size_t keysize; 2843 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2844 CHECK(keysize <= 16); 2845 memset(mCryptoKey, 0, 16); 2846 memcpy(mCryptoKey, key, keysize); 2847 } 2848 2849 const char *mime; 2850 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2851 CHECK(success); 2852 2853 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2854 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC); 2855 2856 if (mIsAVC) { 2857 uint32_t type; 2858 const void *data; 2859 size_t size; 2860 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2861 2862 const uint8_t *ptr = (const uint8_t *)data; 2863 2864 CHECK(size >= 7); 2865 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2866 2867 // The number of bytes used to encode the length of a NAL unit. 2868 mNALLengthSize = 1 + (ptr[4] & 3); 2869 } else if (mIsHEVC) { 2870 uint32_t type; 2871 const void *data; 2872 size_t size; 2873 CHECK(format->findData(kKeyHVCC, &type, &data, &size)); 2874 2875 const uint8_t *ptr = (const uint8_t *)data; 2876 2877 CHECK(size >= 7); 2878 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2879 2880 mNALLengthSize = 1 + (ptr[14 + 7] & 3); 2881 } 2882 2883 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2884 2885 if (mFirstMoofOffset != 0) { 2886 off64_t offset = mFirstMoofOffset; 2887 parseChunk(&offset); 2888 } 2889} 2890 2891MPEG4Source::~MPEG4Source() { 2892 if (mStarted) { 2893 stop(); 2894 } 2895 free(mCurrentSampleInfoSizes); 2896 free(mCurrentSampleInfoOffsets); 2897} 2898 2899status_t MPEG4Source::start(MetaData *params) { 2900 Mutex::Autolock autoLock(mLock); 2901 2902 CHECK(!mStarted); 2903 2904 int32_t val; 2905 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2906 && val != 0) { 2907 mWantsNALFragments = true; 2908 } else { 2909 mWantsNALFragments = false; 2910 } 2911 2912 mGroup = new MediaBufferGroup; 2913 2914 int32_t max_size; 2915 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2916 2917 mGroup->add_buffer(new MediaBuffer(max_size)); 2918 2919 mSrcBuffer = new uint8_t[max_size]; 2920 2921 mStarted = true; 2922 2923 return OK; 2924} 2925 2926status_t MPEG4Source::stop() { 2927 Mutex::Autolock autoLock(mLock); 2928 2929 CHECK(mStarted); 2930 2931 if (mBuffer != NULL) { 2932 mBuffer->release(); 2933 mBuffer = NULL; 2934 } 2935 2936 delete[] mSrcBuffer; 2937 mSrcBuffer = NULL; 2938 2939 delete mGroup; 2940 mGroup = NULL; 2941 2942 mStarted = false; 2943 mCurrentSampleIndex = 0; 2944 2945 return OK; 2946} 2947 2948status_t MPEG4Source::parseChunk(off64_t *offset) { 2949 uint32_t hdr[2]; 2950 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2951 return ERROR_IO; 2952 } 2953 uint64_t chunk_size = ntohl(hdr[0]); 2954 uint32_t chunk_type = ntohl(hdr[1]); 2955 off64_t data_offset = *offset + 8; 2956 2957 if (chunk_size == 1) { 2958 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2959 return ERROR_IO; 2960 } 2961 chunk_size = ntoh64(chunk_size); 2962 data_offset += 8; 2963 2964 if (chunk_size < 16) { 2965 // The smallest valid chunk is 16 bytes long in this case. 2966 return ERROR_MALFORMED; 2967 } 2968 } else if (chunk_size < 8) { 2969 // The smallest valid chunk is 8 bytes long. 2970 return ERROR_MALFORMED; 2971 } 2972 2973 char chunk[5]; 2974 MakeFourCCString(chunk_type, chunk); 2975 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2976 2977 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2978 2979 switch(chunk_type) { 2980 2981 case FOURCC('t', 'r', 'a', 'f'): 2982 case FOURCC('m', 'o', 'o', 'f'): { 2983 off64_t stop_offset = *offset + chunk_size; 2984 *offset = data_offset; 2985 while (*offset < stop_offset) { 2986 status_t err = parseChunk(offset); 2987 if (err != OK) { 2988 return err; 2989 } 2990 } 2991 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 2992 // *offset points to the box following this moof. Find the next moof from there. 2993 2994 while (true) { 2995 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2996 return ERROR_END_OF_STREAM; 2997 } 2998 chunk_size = ntohl(hdr[0]); 2999 chunk_type = ntohl(hdr[1]); 3000 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 3001 mNextMoofOffset = *offset; 3002 break; 3003 } 3004 *offset += chunk_size; 3005 } 3006 } 3007 break; 3008 } 3009 3010 case FOURCC('t', 'f', 'h', 'd'): { 3011 status_t err; 3012 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 3013 return err; 3014 } 3015 *offset += chunk_size; 3016 break; 3017 } 3018 3019 case FOURCC('t', 'r', 'u', 'n'): { 3020 status_t err; 3021 if (mLastParsedTrackId == mTrackId) { 3022 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 3023 return err; 3024 } 3025 } 3026 3027 *offset += chunk_size; 3028 break; 3029 } 3030 3031 case FOURCC('s', 'a', 'i', 'z'): { 3032 status_t err; 3033 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 3034 return err; 3035 } 3036 *offset += chunk_size; 3037 break; 3038 } 3039 case FOURCC('s', 'a', 'i', 'o'): { 3040 status_t err; 3041 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 3042 return err; 3043 } 3044 *offset += chunk_size; 3045 break; 3046 } 3047 3048 case FOURCC('m', 'd', 'a', 't'): { 3049 // parse DRM info if present 3050 ALOGV("MPEG4Source::parseChunk mdat"); 3051 // if saiz/saoi was previously observed, do something with the sampleinfos 3052 *offset += chunk_size; 3053 break; 3054 } 3055 3056 default: { 3057 *offset += chunk_size; 3058 break; 3059 } 3060 } 3061 return OK; 3062} 3063 3064status_t MPEG4Source::parseSampleAuxiliaryInformationSizes( 3065 off64_t offset, off64_t /* size */) { 3066 ALOGV("parseSampleAuxiliaryInformationSizes"); 3067 // 14496-12 8.7.12 3068 uint8_t version; 3069 if (mDataSource->readAt( 3070 offset, &version, sizeof(version)) 3071 < (ssize_t)sizeof(version)) { 3072 return ERROR_IO; 3073 } 3074 3075 if (version != 0) { 3076 return ERROR_UNSUPPORTED; 3077 } 3078 offset++; 3079 3080 uint32_t flags; 3081 if (!mDataSource->getUInt24(offset, &flags)) { 3082 return ERROR_IO; 3083 } 3084 offset += 3; 3085 3086 if (flags & 1) { 3087 uint32_t tmp; 3088 if (!mDataSource->getUInt32(offset, &tmp)) { 3089 return ERROR_MALFORMED; 3090 } 3091 mCurrentAuxInfoType = tmp; 3092 offset += 4; 3093 if (!mDataSource->getUInt32(offset, &tmp)) { 3094 return ERROR_MALFORMED; 3095 } 3096 mCurrentAuxInfoTypeParameter = tmp; 3097 offset += 4; 3098 } 3099 3100 uint8_t defsize; 3101 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 3102 return ERROR_MALFORMED; 3103 } 3104 mCurrentDefaultSampleInfoSize = defsize; 3105 offset++; 3106 3107 uint32_t smplcnt; 3108 if (!mDataSource->getUInt32(offset, &smplcnt)) { 3109 return ERROR_MALFORMED; 3110 } 3111 mCurrentSampleInfoCount = smplcnt; 3112 offset += 4; 3113 3114 if (mCurrentDefaultSampleInfoSize != 0) { 3115 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 3116 return OK; 3117 } 3118 if (smplcnt > mCurrentSampleInfoAllocSize) { 3119 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 3120 mCurrentSampleInfoAllocSize = smplcnt; 3121 } 3122 3123 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 3124 return OK; 3125} 3126 3127status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets( 3128 off64_t offset, off64_t /* size */) { 3129 ALOGV("parseSampleAuxiliaryInformationOffsets"); 3130 // 14496-12 8.7.13 3131 uint8_t version; 3132 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 3133 return ERROR_IO; 3134 } 3135 offset++; 3136 3137 uint32_t flags; 3138 if (!mDataSource->getUInt24(offset, &flags)) { 3139 return ERROR_IO; 3140 } 3141 offset += 3; 3142 3143 uint32_t entrycount; 3144 if (!mDataSource->getUInt32(offset, &entrycount)) { 3145 return ERROR_IO; 3146 } 3147 offset += 4; 3148 3149 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 3150 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 3151 mCurrentSampleInfoOffsetsAllocSize = entrycount; 3152 } 3153 mCurrentSampleInfoOffsetCount = entrycount; 3154 3155 for (size_t i = 0; i < entrycount; i++) { 3156 if (version == 0) { 3157 uint32_t tmp; 3158 if (!mDataSource->getUInt32(offset, &tmp)) { 3159 return ERROR_IO; 3160 } 3161 mCurrentSampleInfoOffsets[i] = tmp; 3162 offset += 4; 3163 } else { 3164 uint64_t tmp; 3165 if (!mDataSource->getUInt64(offset, &tmp)) { 3166 return ERROR_IO; 3167 } 3168 mCurrentSampleInfoOffsets[i] = tmp; 3169 offset += 8; 3170 } 3171 } 3172 3173 // parse clear/encrypted data 3174 3175 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 3176 3177 drmoffset += mCurrentMoofOffset; 3178 int ivlength; 3179 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 3180 3181 // read CencSampleAuxiliaryDataFormats 3182 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 3183 Sample *smpl = &mCurrentSamples.editItemAt(i); 3184 3185 memset(smpl->iv, 0, 16); 3186 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 3187 return ERROR_IO; 3188 } 3189 3190 drmoffset += ivlength; 3191 3192 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 3193 if (smplinfosize == 0) { 3194 smplinfosize = mCurrentSampleInfoSizes[i]; 3195 } 3196 if (smplinfosize > ivlength) { 3197 uint16_t numsubsamples; 3198 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 3199 return ERROR_IO; 3200 } 3201 drmoffset += 2; 3202 for (size_t j = 0; j < numsubsamples; j++) { 3203 uint16_t numclear; 3204 uint32_t numencrypted; 3205 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 3206 return ERROR_IO; 3207 } 3208 drmoffset += 2; 3209 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 3210 return ERROR_IO; 3211 } 3212 drmoffset += 4; 3213 smpl->clearsizes.add(numclear); 3214 smpl->encryptedsizes.add(numencrypted); 3215 } 3216 } else { 3217 smpl->clearsizes.add(0); 3218 smpl->encryptedsizes.add(smpl->size); 3219 } 3220 } 3221 3222 3223 return OK; 3224} 3225 3226status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 3227 3228 if (size < 8) { 3229 return -EINVAL; 3230 } 3231 3232 uint32_t flags; 3233 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 3234 return ERROR_MALFORMED; 3235 } 3236 3237 if (flags & 0xff000000) { 3238 return -EINVAL; 3239 } 3240 3241 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 3242 return ERROR_MALFORMED; 3243 } 3244 3245 if (mLastParsedTrackId != mTrackId) { 3246 // this is not the right track, skip it 3247 return OK; 3248 } 3249 3250 mTrackFragmentHeaderInfo.mFlags = flags; 3251 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 3252 offset += 8; 3253 size -= 8; 3254 3255 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 3256 3257 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 3258 if (size < 8) { 3259 return -EINVAL; 3260 } 3261 3262 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 3263 return ERROR_MALFORMED; 3264 } 3265 offset += 8; 3266 size -= 8; 3267 } 3268 3269 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 3270 if (size < 4) { 3271 return -EINVAL; 3272 } 3273 3274 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 3275 return ERROR_MALFORMED; 3276 } 3277 offset += 4; 3278 size -= 4; 3279 } 3280 3281 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3282 if (size < 4) { 3283 return -EINVAL; 3284 } 3285 3286 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 3287 return ERROR_MALFORMED; 3288 } 3289 offset += 4; 3290 size -= 4; 3291 } 3292 3293 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3294 if (size < 4) { 3295 return -EINVAL; 3296 } 3297 3298 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 3299 return ERROR_MALFORMED; 3300 } 3301 offset += 4; 3302 size -= 4; 3303 } 3304 3305 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3306 if (size < 4) { 3307 return -EINVAL; 3308 } 3309 3310 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 3311 return ERROR_MALFORMED; 3312 } 3313 offset += 4; 3314 size -= 4; 3315 } 3316 3317 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 3318 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 3319 } 3320 3321 mTrackFragmentHeaderInfo.mDataOffset = 0; 3322 return OK; 3323} 3324 3325status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 3326 3327 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 3328 if (size < 8) { 3329 return -EINVAL; 3330 } 3331 3332 enum { 3333 kDataOffsetPresent = 0x01, 3334 kFirstSampleFlagsPresent = 0x04, 3335 kSampleDurationPresent = 0x100, 3336 kSampleSizePresent = 0x200, 3337 kSampleFlagsPresent = 0x400, 3338 kSampleCompositionTimeOffsetPresent = 0x800, 3339 }; 3340 3341 uint32_t flags; 3342 if (!mDataSource->getUInt32(offset, &flags)) { 3343 return ERROR_MALFORMED; 3344 } 3345 ALOGV("fragment run flags: %08x", flags); 3346 3347 if (flags & 0xff000000) { 3348 return -EINVAL; 3349 } 3350 3351 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 3352 // These two shall not be used together. 3353 return -EINVAL; 3354 } 3355 3356 uint32_t sampleCount; 3357 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 3358 return ERROR_MALFORMED; 3359 } 3360 offset += 8; 3361 size -= 8; 3362 3363 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 3364 3365 uint32_t firstSampleFlags = 0; 3366 3367 if (flags & kDataOffsetPresent) { 3368 if (size < 4) { 3369 return -EINVAL; 3370 } 3371 3372 int32_t dataOffsetDelta; 3373 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 3374 return ERROR_MALFORMED; 3375 } 3376 3377 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 3378 3379 offset += 4; 3380 size -= 4; 3381 } 3382 3383 if (flags & kFirstSampleFlagsPresent) { 3384 if (size < 4) { 3385 return -EINVAL; 3386 } 3387 3388 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 3389 return ERROR_MALFORMED; 3390 } 3391 offset += 4; 3392 size -= 4; 3393 } 3394 3395 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 3396 sampleCtsOffset = 0; 3397 3398 size_t bytesPerSample = 0; 3399 if (flags & kSampleDurationPresent) { 3400 bytesPerSample += 4; 3401 } else if (mTrackFragmentHeaderInfo.mFlags 3402 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 3403 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3404 } else { 3405 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 3406 } 3407 3408 if (flags & kSampleSizePresent) { 3409 bytesPerSample += 4; 3410 } else if (mTrackFragmentHeaderInfo.mFlags 3411 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 3412 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3413 } else { 3414 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 3415 } 3416 3417 if (flags & kSampleFlagsPresent) { 3418 bytesPerSample += 4; 3419 } else if (mTrackFragmentHeaderInfo.mFlags 3420 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 3421 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3422 } else { 3423 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 3424 } 3425 3426 if (flags & kSampleCompositionTimeOffsetPresent) { 3427 bytesPerSample += 4; 3428 } else { 3429 sampleCtsOffset = 0; 3430 } 3431 3432 if (size < sampleCount * bytesPerSample) { 3433 return -EINVAL; 3434 } 3435 3436 Sample tmp; 3437 for (uint32_t i = 0; i < sampleCount; ++i) { 3438 if (flags & kSampleDurationPresent) { 3439 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 3440 return ERROR_MALFORMED; 3441 } 3442 offset += 4; 3443 } 3444 3445 if (flags & kSampleSizePresent) { 3446 if (!mDataSource->getUInt32(offset, &sampleSize)) { 3447 return ERROR_MALFORMED; 3448 } 3449 offset += 4; 3450 } 3451 3452 if (flags & kSampleFlagsPresent) { 3453 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 3454 return ERROR_MALFORMED; 3455 } 3456 offset += 4; 3457 } 3458 3459 if (flags & kSampleCompositionTimeOffsetPresent) { 3460 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3461 return ERROR_MALFORMED; 3462 } 3463 offset += 4; 3464 } 3465 3466 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 3467 " flags 0x%08x", i + 1, 3468 dataOffset, sampleSize, sampleDuration, 3469 (flags & kFirstSampleFlagsPresent) && i == 0 3470 ? firstSampleFlags : sampleFlags); 3471 tmp.offset = dataOffset; 3472 tmp.size = sampleSize; 3473 tmp.duration = sampleDuration; 3474 mCurrentSamples.add(tmp); 3475 3476 dataOffset += sampleSize; 3477 } 3478 3479 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3480 3481 return OK; 3482} 3483 3484sp<MetaData> MPEG4Source::getFormat() { 3485 Mutex::Autolock autoLock(mLock); 3486 3487 return mFormat; 3488} 3489 3490size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3491 switch (mNALLengthSize) { 3492 case 1: 3493 return *data; 3494 case 2: 3495 return U16_AT(data); 3496 case 3: 3497 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3498 case 4: 3499 return U32_AT(data); 3500 } 3501 3502 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3503 // a 2-bit integer. 3504 CHECK(!"Should not be here."); 3505 3506 return 0; 3507} 3508 3509status_t MPEG4Source::read( 3510 MediaBuffer **out, const ReadOptions *options) { 3511 Mutex::Autolock autoLock(mLock); 3512 3513 CHECK(mStarted); 3514 3515 if (mFirstMoofOffset > 0) { 3516 return fragmentedRead(out, options); 3517 } 3518 3519 *out = NULL; 3520 3521 int64_t targetSampleTimeUs = -1; 3522 3523 int64_t seekTimeUs; 3524 ReadOptions::SeekMode mode; 3525 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3526 uint32_t findFlags = 0; 3527 switch (mode) { 3528 case ReadOptions::SEEK_PREVIOUS_SYNC: 3529 findFlags = SampleTable::kFlagBefore; 3530 break; 3531 case ReadOptions::SEEK_NEXT_SYNC: 3532 findFlags = SampleTable::kFlagAfter; 3533 break; 3534 case ReadOptions::SEEK_CLOSEST_SYNC: 3535 case ReadOptions::SEEK_CLOSEST: 3536 findFlags = SampleTable::kFlagClosest; 3537 break; 3538 default: 3539 CHECK(!"Should not be here."); 3540 break; 3541 } 3542 3543 uint32_t sampleIndex; 3544 status_t err = mSampleTable->findSampleAtTime( 3545 seekTimeUs * mTimescale / 1000000, 3546 &sampleIndex, findFlags); 3547 3548 if (mode == ReadOptions::SEEK_CLOSEST) { 3549 // We found the closest sample already, now we want the sync 3550 // sample preceding it (or the sample itself of course), even 3551 // if the subsequent sync sample is closer. 3552 findFlags = SampleTable::kFlagBefore; 3553 } 3554 3555 uint32_t syncSampleIndex; 3556 if (err == OK) { 3557 err = mSampleTable->findSyncSampleNear( 3558 sampleIndex, &syncSampleIndex, findFlags); 3559 } 3560 3561 uint32_t sampleTime; 3562 if (err == OK) { 3563 err = mSampleTable->getMetaDataForSample( 3564 sampleIndex, NULL, NULL, &sampleTime); 3565 } 3566 3567 if (err != OK) { 3568 if (err == ERROR_OUT_OF_RANGE) { 3569 // An attempt to seek past the end of the stream would 3570 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3571 // this all the way to the MediaPlayer would cause abnormal 3572 // termination. Legacy behaviour appears to be to behave as if 3573 // we had seeked to the end of stream, ending normally. 3574 err = ERROR_END_OF_STREAM; 3575 } 3576 ALOGV("end of stream"); 3577 return err; 3578 } 3579 3580 if (mode == ReadOptions::SEEK_CLOSEST) { 3581 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3582 } 3583 3584#if 0 3585 uint32_t syncSampleTime; 3586 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3587 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3588 3589 ALOGI("seek to time %lld us => sample at time %lld us, " 3590 "sync sample at time %lld us", 3591 seekTimeUs, 3592 sampleTime * 1000000ll / mTimescale, 3593 syncSampleTime * 1000000ll / mTimescale); 3594#endif 3595 3596 mCurrentSampleIndex = syncSampleIndex; 3597 if (mBuffer != NULL) { 3598 mBuffer->release(); 3599 mBuffer = NULL; 3600 } 3601 3602 // fall through 3603 } 3604 3605 off64_t offset; 3606 size_t size; 3607 uint32_t cts, stts; 3608 bool isSyncSample; 3609 bool newBuffer = false; 3610 if (mBuffer == NULL) { 3611 newBuffer = true; 3612 3613 status_t err = 3614 mSampleTable->getMetaDataForSample( 3615 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts); 3616 3617 if (err != OK) { 3618 return err; 3619 } 3620 3621 err = mGroup->acquire_buffer(&mBuffer); 3622 3623 if (err != OK) { 3624 CHECK(mBuffer == NULL); 3625 return err; 3626 } 3627 } 3628 3629 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) { 3630 if (newBuffer) { 3631 ssize_t num_bytes_read = 3632 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3633 3634 if (num_bytes_read < (ssize_t)size) { 3635 mBuffer->release(); 3636 mBuffer = NULL; 3637 3638 return ERROR_IO; 3639 } 3640 3641 CHECK(mBuffer != NULL); 3642 mBuffer->set_range(0, size); 3643 mBuffer->meta_data()->clear(); 3644 mBuffer->meta_data()->setInt64( 3645 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3646 mBuffer->meta_data()->setInt64( 3647 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3648 3649 if (targetSampleTimeUs >= 0) { 3650 mBuffer->meta_data()->setInt64( 3651 kKeyTargetTime, targetSampleTimeUs); 3652 } 3653 3654 if (isSyncSample) { 3655 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3656 } 3657 3658 ++mCurrentSampleIndex; 3659 } 3660 3661 if (!mIsAVC && !mIsHEVC) { 3662 *out = mBuffer; 3663 mBuffer = NULL; 3664 3665 return OK; 3666 } 3667 3668 // Each NAL unit is split up into its constituent fragments and 3669 // each one of them returned in its own buffer. 3670 3671 CHECK(mBuffer->range_length() >= mNALLengthSize); 3672 3673 const uint8_t *src = 3674 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3675 3676 size_t nal_size = parseNALSize(src); 3677 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3678 ALOGE("incomplete NAL unit."); 3679 3680 mBuffer->release(); 3681 mBuffer = NULL; 3682 3683 return ERROR_MALFORMED; 3684 } 3685 3686 MediaBuffer *clone = mBuffer->clone(); 3687 CHECK(clone != NULL); 3688 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3689 3690 CHECK(mBuffer != NULL); 3691 mBuffer->set_range( 3692 mBuffer->range_offset() + mNALLengthSize + nal_size, 3693 mBuffer->range_length() - mNALLengthSize - nal_size); 3694 3695 if (mBuffer->range_length() == 0) { 3696 mBuffer->release(); 3697 mBuffer = NULL; 3698 } 3699 3700 *out = clone; 3701 3702 return OK; 3703 } else { 3704 // Whole NAL units are returned but each fragment is prefixed by 3705 // the start code (0x00 00 00 01). 3706 ssize_t num_bytes_read = 0; 3707 int32_t drm = 0; 3708 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3709 if (usesDRM) { 3710 num_bytes_read = 3711 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3712 } else { 3713 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3714 } 3715 3716 if (num_bytes_read < (ssize_t)size) { 3717 mBuffer->release(); 3718 mBuffer = NULL; 3719 3720 return ERROR_IO; 3721 } 3722 3723 if (usesDRM) { 3724 CHECK(mBuffer != NULL); 3725 mBuffer->set_range(0, size); 3726 3727 } else { 3728 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3729 size_t srcOffset = 0; 3730 size_t dstOffset = 0; 3731 3732 while (srcOffset < size) { 3733 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3734 size_t nalLength = 0; 3735 if (!isMalFormed) { 3736 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3737 srcOffset += mNALLengthSize; 3738 isMalFormed = srcOffset + nalLength > size; 3739 } 3740 3741 if (isMalFormed) { 3742 ALOGE("Video is malformed"); 3743 mBuffer->release(); 3744 mBuffer = NULL; 3745 return ERROR_MALFORMED; 3746 } 3747 3748 if (nalLength == 0) { 3749 continue; 3750 } 3751 3752 CHECK(dstOffset + 4 <= mBuffer->size()); 3753 3754 dstData[dstOffset++] = 0; 3755 dstData[dstOffset++] = 0; 3756 dstData[dstOffset++] = 0; 3757 dstData[dstOffset++] = 1; 3758 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3759 srcOffset += nalLength; 3760 dstOffset += nalLength; 3761 } 3762 CHECK_EQ(srcOffset, size); 3763 CHECK(mBuffer != NULL); 3764 mBuffer->set_range(0, dstOffset); 3765 } 3766 3767 mBuffer->meta_data()->clear(); 3768 mBuffer->meta_data()->setInt64( 3769 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3770 mBuffer->meta_data()->setInt64( 3771 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale); 3772 3773 if (targetSampleTimeUs >= 0) { 3774 mBuffer->meta_data()->setInt64( 3775 kKeyTargetTime, targetSampleTimeUs); 3776 } 3777 3778 if (isSyncSample) { 3779 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3780 } 3781 3782 ++mCurrentSampleIndex; 3783 3784 *out = mBuffer; 3785 mBuffer = NULL; 3786 3787 return OK; 3788 } 3789} 3790 3791status_t MPEG4Source::fragmentedRead( 3792 MediaBuffer **out, const ReadOptions *options) { 3793 3794 ALOGV("MPEG4Source::fragmentedRead"); 3795 3796 CHECK(mStarted); 3797 3798 *out = NULL; 3799 3800 int64_t targetSampleTimeUs = -1; 3801 3802 int64_t seekTimeUs; 3803 ReadOptions::SeekMode mode; 3804 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3805 3806 int numSidxEntries = mSegments.size(); 3807 if (numSidxEntries != 0) { 3808 int64_t totalTime = 0; 3809 off64_t totalOffset = mFirstMoofOffset; 3810 for (int i = 0; i < numSidxEntries; i++) { 3811 const SidxEntry *se = &mSegments[i]; 3812 if (totalTime + se->mDurationUs > seekTimeUs) { 3813 // The requested time is somewhere in this segment 3814 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) || 3815 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3816 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3817 // requested next sync, or closest sync and it was closer to the end of 3818 // this segment 3819 totalTime += se->mDurationUs; 3820 totalOffset += se->mSize; 3821 } 3822 break; 3823 } 3824 totalTime += se->mDurationUs; 3825 totalOffset += se->mSize; 3826 } 3827 mCurrentMoofOffset = totalOffset; 3828 mCurrentSamples.clear(); 3829 mCurrentSampleIndex = 0; 3830 parseChunk(&totalOffset); 3831 mCurrentTime = totalTime * mTimescale / 1000000ll; 3832 } else { 3833 // without sidx boxes, we can only seek to 0 3834 mCurrentMoofOffset = mFirstMoofOffset; 3835 mCurrentSamples.clear(); 3836 mCurrentSampleIndex = 0; 3837 off64_t tmp = mCurrentMoofOffset; 3838 parseChunk(&tmp); 3839 mCurrentTime = 0; 3840 } 3841 3842 if (mBuffer != NULL) { 3843 mBuffer->release(); 3844 mBuffer = NULL; 3845 } 3846 3847 // fall through 3848 } 3849 3850 off64_t offset = 0; 3851 size_t size = 0; 3852 uint32_t cts = 0; 3853 bool isSyncSample = false; 3854 bool newBuffer = false; 3855 if (mBuffer == NULL) { 3856 newBuffer = true; 3857 3858 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3859 // move to next fragment if there is one 3860 if (mNextMoofOffset <= mCurrentMoofOffset) { 3861 return ERROR_END_OF_STREAM; 3862 } 3863 off64_t nextMoof = mNextMoofOffset; 3864 mCurrentMoofOffset = nextMoof; 3865 mCurrentSamples.clear(); 3866 mCurrentSampleIndex = 0; 3867 parseChunk(&nextMoof); 3868 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3869 return ERROR_END_OF_STREAM; 3870 } 3871 } 3872 3873 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3874 offset = smpl->offset; 3875 size = smpl->size; 3876 cts = mCurrentTime; 3877 mCurrentTime += smpl->duration; 3878 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3879 3880 status_t err = mGroup->acquire_buffer(&mBuffer); 3881 3882 if (err != OK) { 3883 CHECK(mBuffer == NULL); 3884 ALOGV("acquire_buffer returned %d", err); 3885 return err; 3886 } 3887 } 3888 3889 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3890 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3891 bufmeta->clear(); 3892 if (smpl->encryptedsizes.size()) { 3893 // store clear/encrypted lengths in metadata 3894 bufmeta->setData(kKeyPlainSizes, 0, 3895 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3896 bufmeta->setData(kKeyEncryptedSizes, 0, 3897 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3898 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3899 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3900 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3901 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3902 } 3903 3904 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) { 3905 if (newBuffer) { 3906 ssize_t num_bytes_read = 3907 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3908 3909 if (num_bytes_read < (ssize_t)size) { 3910 mBuffer->release(); 3911 mBuffer = NULL; 3912 3913 ALOGV("i/o error"); 3914 return ERROR_IO; 3915 } 3916 3917 CHECK(mBuffer != NULL); 3918 mBuffer->set_range(0, size); 3919 mBuffer->meta_data()->setInt64( 3920 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3921 mBuffer->meta_data()->setInt64( 3922 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 3923 3924 if (targetSampleTimeUs >= 0) { 3925 mBuffer->meta_data()->setInt64( 3926 kKeyTargetTime, targetSampleTimeUs); 3927 } 3928 3929 if (isSyncSample) { 3930 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3931 } 3932 3933 ++mCurrentSampleIndex; 3934 } 3935 3936 if (!mIsAVC && !mIsHEVC) { 3937 *out = mBuffer; 3938 mBuffer = NULL; 3939 3940 return OK; 3941 } 3942 3943 // Each NAL unit is split up into its constituent fragments and 3944 // each one of them returned in its own buffer. 3945 3946 CHECK(mBuffer->range_length() >= mNALLengthSize); 3947 3948 const uint8_t *src = 3949 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3950 3951 size_t nal_size = parseNALSize(src); 3952 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3953 ALOGE("incomplete NAL unit."); 3954 3955 mBuffer->release(); 3956 mBuffer = NULL; 3957 3958 return ERROR_MALFORMED; 3959 } 3960 3961 MediaBuffer *clone = mBuffer->clone(); 3962 CHECK(clone != NULL); 3963 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3964 3965 CHECK(mBuffer != NULL); 3966 mBuffer->set_range( 3967 mBuffer->range_offset() + mNALLengthSize + nal_size, 3968 mBuffer->range_length() - mNALLengthSize - nal_size); 3969 3970 if (mBuffer->range_length() == 0) { 3971 mBuffer->release(); 3972 mBuffer = NULL; 3973 } 3974 3975 *out = clone; 3976 3977 return OK; 3978 } else { 3979 ALOGV("whole NAL"); 3980 // Whole NAL units are returned but each fragment is prefixed by 3981 // the start code (0x00 00 00 01). 3982 ssize_t num_bytes_read = 0; 3983 int32_t drm = 0; 3984 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3985 if (usesDRM) { 3986 num_bytes_read = 3987 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3988 } else { 3989 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3990 } 3991 3992 if (num_bytes_read < (ssize_t)size) { 3993 mBuffer->release(); 3994 mBuffer = NULL; 3995 3996 ALOGV("i/o error"); 3997 return ERROR_IO; 3998 } 3999 4000 if (usesDRM) { 4001 CHECK(mBuffer != NULL); 4002 mBuffer->set_range(0, size); 4003 4004 } else { 4005 uint8_t *dstData = (uint8_t *)mBuffer->data(); 4006 size_t srcOffset = 0; 4007 size_t dstOffset = 0; 4008 4009 while (srcOffset < size) { 4010 bool isMalFormed = (srcOffset + mNALLengthSize > size); 4011 size_t nalLength = 0; 4012 if (!isMalFormed) { 4013 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 4014 srcOffset += mNALLengthSize; 4015 isMalFormed = srcOffset + nalLength > size; 4016 } 4017 4018 if (isMalFormed) { 4019 ALOGE("Video is malformed"); 4020 mBuffer->release(); 4021 mBuffer = NULL; 4022 return ERROR_MALFORMED; 4023 } 4024 4025 if (nalLength == 0) { 4026 continue; 4027 } 4028 4029 CHECK(dstOffset + 4 <= mBuffer->size()); 4030 4031 dstData[dstOffset++] = 0; 4032 dstData[dstOffset++] = 0; 4033 dstData[dstOffset++] = 0; 4034 dstData[dstOffset++] = 1; 4035 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 4036 srcOffset += nalLength; 4037 dstOffset += nalLength; 4038 } 4039 CHECK_EQ(srcOffset, size); 4040 CHECK(mBuffer != NULL); 4041 mBuffer->set_range(0, dstOffset); 4042 } 4043 4044 mBuffer->meta_data()->setInt64( 4045 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 4046 mBuffer->meta_data()->setInt64( 4047 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale); 4048 4049 if (targetSampleTimeUs >= 0) { 4050 mBuffer->meta_data()->setInt64( 4051 kKeyTargetTime, targetSampleTimeUs); 4052 } 4053 4054 if (isSyncSample) { 4055 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 4056 } 4057 4058 ++mCurrentSampleIndex; 4059 4060 *out = mBuffer; 4061 mBuffer = NULL; 4062 4063 return OK; 4064 } 4065} 4066 4067MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 4068 const char *mimePrefix) { 4069 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 4070 const char *mime; 4071 if (track->meta != NULL 4072 && track->meta->findCString(kKeyMIMEType, &mime) 4073 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 4074 return track; 4075 } 4076 } 4077 4078 return NULL; 4079} 4080 4081static bool LegacySniffMPEG4( 4082 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 4083 uint8_t header[8]; 4084 4085 ssize_t n = source->readAt(4, header, sizeof(header)); 4086 if (n < (ssize_t)sizeof(header)) { 4087 return false; 4088 } 4089 4090 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 4091 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 4092 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 4093 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 4094 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 4095 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 4096 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4097 *confidence = 0.4; 4098 4099 return true; 4100 } 4101 4102 return false; 4103} 4104 4105static bool isCompatibleBrand(uint32_t fourcc) { 4106 static const uint32_t kCompatibleBrands[] = { 4107 FOURCC('i', 's', 'o', 'm'), 4108 FOURCC('i', 's', 'o', '2'), 4109 FOURCC('a', 'v', 'c', '1'), 4110 FOURCC('h', 'v', 'c', '1'), 4111 FOURCC('h', 'e', 'v', '1'), 4112 FOURCC('3', 'g', 'p', '4'), 4113 FOURCC('m', 'p', '4', '1'), 4114 FOURCC('m', 'p', '4', '2'), 4115 4116 // Won't promise that the following file types can be played. 4117 // Just give these file types a chance. 4118 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 4119 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 4120 4121 FOURCC('3', 'g', '2', 'a'), // 3GPP2 4122 FOURCC('3', 'g', '2', 'b'), 4123 }; 4124 4125 for (size_t i = 0; 4126 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 4127 ++i) { 4128 if (kCompatibleBrands[i] == fourcc) { 4129 return true; 4130 } 4131 } 4132 4133 return false; 4134} 4135 4136// Attempt to actually parse the 'ftyp' atom and determine if a suitable 4137// compatible brand is present. 4138// Also try to identify where this file's metadata ends 4139// (end of the 'moov' atom) and report it to the caller as part of 4140// the metadata. 4141static bool BetterSniffMPEG4( 4142 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4143 sp<AMessage> *meta) { 4144 // We scan up to 128 bytes to identify this file as an MP4. 4145 static const off64_t kMaxScanOffset = 128ll; 4146 4147 off64_t offset = 0ll; 4148 bool foundGoodFileType = false; 4149 off64_t moovAtomEndOffset = -1ll; 4150 bool done = false; 4151 4152 while (!done && offset < kMaxScanOffset) { 4153 uint32_t hdr[2]; 4154 if (source->readAt(offset, hdr, 8) < 8) { 4155 return false; 4156 } 4157 4158 uint64_t chunkSize = ntohl(hdr[0]); 4159 uint32_t chunkType = ntohl(hdr[1]); 4160 off64_t chunkDataOffset = offset + 8; 4161 4162 if (chunkSize == 1) { 4163 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 4164 return false; 4165 } 4166 4167 chunkSize = ntoh64(chunkSize); 4168 chunkDataOffset += 8; 4169 4170 if (chunkSize < 16) { 4171 // The smallest valid chunk is 16 bytes long in this case. 4172 return false; 4173 } 4174 } else if (chunkSize < 8) { 4175 // The smallest valid chunk is 8 bytes long. 4176 return false; 4177 } 4178 4179 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 4180 4181 char chunkstring[5]; 4182 MakeFourCCString(chunkType, chunkstring); 4183 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 4184 switch (chunkType) { 4185 case FOURCC('f', 't', 'y', 'p'): 4186 { 4187 if (chunkDataSize < 8) { 4188 return false; 4189 } 4190 4191 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 4192 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 4193 if (i == 1) { 4194 // Skip this index, it refers to the minorVersion, 4195 // not a brand. 4196 continue; 4197 } 4198 4199 uint32_t brand; 4200 if (source->readAt( 4201 chunkDataOffset + 4 * i, &brand, 4) < 4) { 4202 return false; 4203 } 4204 4205 brand = ntohl(brand); 4206 4207 if (isCompatibleBrand(brand)) { 4208 foundGoodFileType = true; 4209 break; 4210 } 4211 } 4212 4213 if (!foundGoodFileType) { 4214 return false; 4215 } 4216 4217 break; 4218 } 4219 4220 case FOURCC('m', 'o', 'o', 'v'): 4221 { 4222 moovAtomEndOffset = offset + chunkSize; 4223 4224 done = true; 4225 break; 4226 } 4227 4228 default: 4229 break; 4230 } 4231 4232 offset += chunkSize; 4233 } 4234 4235 if (!foundGoodFileType) { 4236 return false; 4237 } 4238 4239 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 4240 *confidence = 0.4f; 4241 4242 if (moovAtomEndOffset >= 0) { 4243 *meta = new AMessage; 4244 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 4245 4246 ALOGV("found metadata size: %lld", moovAtomEndOffset); 4247 } 4248 4249 return true; 4250} 4251 4252bool SniffMPEG4( 4253 const sp<DataSource> &source, String8 *mimeType, float *confidence, 4254 sp<AMessage> *meta) { 4255 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 4256 return true; 4257 } 4258 4259 if (LegacySniffMPEG4(source, mimeType, confidence)) { 4260 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 4261 return true; 4262 } 4263 4264 return false; 4265} 4266 4267} // namespace android 4268