1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17//#define LOG_NDEBUG 0 18#define LOG_TAG "MPEG4Extractor" 19#include <utils/Log.h> 20 21#include "include/MPEG4Extractor.h" 22#include "include/SampleTable.h" 23#include "include/ESDS.h" 24 25#include <ctype.h> 26#include <stdint.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include <media/stagefright/foundation/ABitReader.h> 31#include <media/stagefright/foundation/ABuffer.h> 32#include <media/stagefright/foundation/ADebug.h> 33#include <media/stagefright/foundation/AMessage.h> 34#include <media/stagefright/MediaBuffer.h> 35#include <media/stagefright/MediaBufferGroup.h> 36#include <media/stagefright/MediaDefs.h> 37#include <media/stagefright/MediaSource.h> 38#include <media/stagefright/MetaData.h> 39#include <utils/String8.h> 40 41namespace android { 42 43class MPEG4Source : public MediaSource { 44public: 45 // Caller retains ownership of both "dataSource" and "sampleTable". 46 MPEG4Source(const sp<MetaData> &format, 47 const sp<DataSource> &dataSource, 48 int32_t timeScale, 49 const sp<SampleTable> &sampleTable, 50 Vector<SidxEntry> &sidx, 51 off64_t firstMoofOffset); 52 53 virtual status_t start(MetaData *params = NULL); 54 virtual status_t stop(); 55 56 virtual sp<MetaData> getFormat(); 57 58 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL); 59 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL); 60 61protected: 62 virtual ~MPEG4Source(); 63 64private: 65 Mutex mLock; 66 67 sp<MetaData> mFormat; 68 sp<DataSource> mDataSource; 69 int32_t mTimescale; 70 sp<SampleTable> mSampleTable; 71 uint32_t mCurrentSampleIndex; 72 uint32_t mCurrentFragmentIndex; 73 Vector<SidxEntry> &mSegments; 74 off64_t mFirstMoofOffset; 75 off64_t mCurrentMoofOffset; 76 off64_t mNextMoofOffset; 77 uint32_t mCurrentTime; 78 int32_t mLastParsedTrackId; 79 int32_t mTrackId; 80 81 int32_t mCryptoMode; // passed in from extractor 82 int32_t mDefaultIVSize; // passed in from extractor 83 uint8_t mCryptoKey[16]; // passed in from extractor 84 uint32_t mCurrentAuxInfoType; 85 uint32_t mCurrentAuxInfoTypeParameter; 86 int32_t mCurrentDefaultSampleInfoSize; 87 uint32_t mCurrentSampleInfoCount; 88 uint32_t mCurrentSampleInfoAllocSize; 89 uint8_t* mCurrentSampleInfoSizes; 90 uint32_t mCurrentSampleInfoOffsetCount; 91 uint32_t mCurrentSampleInfoOffsetsAllocSize; 92 uint64_t* mCurrentSampleInfoOffsets; 93 94 bool mIsAVC; 95 size_t mNALLengthSize; 96 97 bool mStarted; 98 99 MediaBufferGroup *mGroup; 100 101 MediaBuffer *mBuffer; 102 103 bool mWantsNALFragments; 104 105 uint8_t *mSrcBuffer; 106 107 size_t parseNALSize(const uint8_t *data) const; 108 status_t parseChunk(off64_t *offset); 109 status_t parseTrackFragmentHeader(off64_t offset, off64_t size); 110 status_t parseTrackFragmentRun(off64_t offset, off64_t size); 111 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size); 112 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size); 113 114 struct TrackFragmentHeaderInfo { 115 enum Flags { 116 kBaseDataOffsetPresent = 0x01, 117 kSampleDescriptionIndexPresent = 0x02, 118 kDefaultSampleDurationPresent = 0x08, 119 kDefaultSampleSizePresent = 0x10, 120 kDefaultSampleFlagsPresent = 0x20, 121 kDurationIsEmpty = 0x10000, 122 }; 123 124 uint32_t mTrackID; 125 uint32_t mFlags; 126 uint64_t mBaseDataOffset; 127 uint32_t mSampleDescriptionIndex; 128 uint32_t mDefaultSampleDuration; 129 uint32_t mDefaultSampleSize; 130 uint32_t mDefaultSampleFlags; 131 132 uint64_t mDataOffset; 133 }; 134 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo; 135 136 struct Sample { 137 off64_t offset; 138 size_t size; 139 uint32_t duration; 140 uint8_t iv[16]; 141 Vector<size_t> clearsizes; 142 Vector<size_t> encryptedsizes; 143 }; 144 Vector<Sample> mCurrentSamples; 145 146 MPEG4Source(const MPEG4Source &); 147 MPEG4Source &operator=(const MPEG4Source &); 148}; 149 150// This custom data source wraps an existing one and satisfies requests 151// falling entirely within a cached range from the cache while forwarding 152// all remaining requests to the wrapped datasource. 153// This is used to cache the full sampletable metadata for a single track, 154// possibly wrapping multiple times to cover all tracks, i.e. 155// Each MPEG4DataSource caches the sampletable metadata for a single track. 156 157struct MPEG4DataSource : public DataSource { 158 MPEG4DataSource(const sp<DataSource> &source); 159 160 virtual status_t initCheck() const; 161 virtual ssize_t readAt(off64_t offset, void *data, size_t size); 162 virtual status_t getSize(off64_t *size); 163 virtual uint32_t flags(); 164 165 status_t setCachedRange(off64_t offset, size_t size); 166 167protected: 168 virtual ~MPEG4DataSource(); 169 170private: 171 Mutex mLock; 172 173 sp<DataSource> mSource; 174 off64_t mCachedOffset; 175 size_t mCachedSize; 176 uint8_t *mCache; 177 178 void clearCache(); 179 180 MPEG4DataSource(const MPEG4DataSource &); 181 MPEG4DataSource &operator=(const MPEG4DataSource &); 182}; 183 184MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source) 185 : mSource(source), 186 mCachedOffset(0), 187 mCachedSize(0), 188 mCache(NULL) { 189} 190 191MPEG4DataSource::~MPEG4DataSource() { 192 clearCache(); 193} 194 195void MPEG4DataSource::clearCache() { 196 if (mCache) { 197 free(mCache); 198 mCache = NULL; 199 } 200 201 mCachedOffset = 0; 202 mCachedSize = 0; 203} 204 205status_t MPEG4DataSource::initCheck() const { 206 return mSource->initCheck(); 207} 208 209ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) { 210 Mutex::Autolock autoLock(mLock); 211 212 if (offset >= mCachedOffset 213 && offset + size <= mCachedOffset + mCachedSize) { 214 memcpy(data, &mCache[offset - mCachedOffset], size); 215 return size; 216 } 217 218 return mSource->readAt(offset, data, size); 219} 220 221status_t MPEG4DataSource::getSize(off64_t *size) { 222 return mSource->getSize(size); 223} 224 225uint32_t MPEG4DataSource::flags() { 226 return mSource->flags(); 227} 228 229status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) { 230 Mutex::Autolock autoLock(mLock); 231 232 clearCache(); 233 234 mCache = (uint8_t *)malloc(size); 235 236 if (mCache == NULL) { 237 return -ENOMEM; 238 } 239 240 mCachedOffset = offset; 241 mCachedSize = size; 242 243 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize); 244 245 if (err < (ssize_t)size) { 246 clearCache(); 247 248 return ERROR_IO; 249 } 250 251 return OK; 252} 253 254//////////////////////////////////////////////////////////////////////////////// 255 256static void hexdump(const void *_data, size_t size) { 257 const uint8_t *data = (const uint8_t *)_data; 258 size_t offset = 0; 259 while (offset < size) { 260 printf("0x%04x ", offset); 261 262 size_t n = size - offset; 263 if (n > 16) { 264 n = 16; 265 } 266 267 for (size_t i = 0; i < 16; ++i) { 268 if (i == 8) { 269 printf(" "); 270 } 271 272 if (offset + i < size) { 273 printf("%02x ", data[offset + i]); 274 } else { 275 printf(" "); 276 } 277 } 278 279 printf(" "); 280 281 for (size_t i = 0; i < n; ++i) { 282 if (isprint(data[offset + i])) { 283 printf("%c", data[offset + i]); 284 } else { 285 printf("."); 286 } 287 } 288 289 printf("\n"); 290 291 offset += 16; 292 } 293} 294 295static const char *FourCC2MIME(uint32_t fourcc) { 296 switch (fourcc) { 297 case FOURCC('m', 'p', '4', 'a'): 298 return MEDIA_MIMETYPE_AUDIO_AAC; 299 300 case FOURCC('s', 'a', 'm', 'r'): 301 return MEDIA_MIMETYPE_AUDIO_AMR_NB; 302 303 case FOURCC('s', 'a', 'w', 'b'): 304 return MEDIA_MIMETYPE_AUDIO_AMR_WB; 305 306 case FOURCC('m', 'p', '4', 'v'): 307 return MEDIA_MIMETYPE_VIDEO_MPEG4; 308 309 case FOURCC('s', '2', '6', '3'): 310 case FOURCC('h', '2', '6', '3'): 311 case FOURCC('H', '2', '6', '3'): 312 return MEDIA_MIMETYPE_VIDEO_H263; 313 314 case FOURCC('a', 'v', 'c', '1'): 315 return MEDIA_MIMETYPE_VIDEO_AVC; 316 317 default: 318 CHECK(!"should not be here."); 319 return NULL; 320 } 321} 322 323static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) { 324 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) { 325 // AMR NB audio is always mono, 8kHz 326 *channels = 1; 327 *rate = 8000; 328 return true; 329 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) { 330 // AMR WB audio is always mono, 16kHz 331 *channels = 1; 332 *rate = 16000; 333 return true; 334 } 335 return false; 336} 337 338MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source) 339 : mSidxDuration(0), 340 mMoofOffset(0), 341 mDataSource(source), 342 mInitCheck(NO_INIT), 343 mHasVideo(false), 344 mHeaderTimescale(0), 345 mFirstTrack(NULL), 346 mLastTrack(NULL), 347 mFileMetaData(new MetaData), 348 mFirstSINF(NULL), 349 mIsDrm(false) { 350} 351 352MPEG4Extractor::~MPEG4Extractor() { 353 Track *track = mFirstTrack; 354 while (track) { 355 Track *next = track->next; 356 357 delete track; 358 track = next; 359 } 360 mFirstTrack = mLastTrack = NULL; 361 362 SINF *sinf = mFirstSINF; 363 while (sinf) { 364 SINF *next = sinf->next; 365 delete sinf->IPMPData; 366 delete sinf; 367 sinf = next; 368 } 369 mFirstSINF = NULL; 370 371 for (size_t i = 0; i < mPssh.size(); i++) { 372 delete [] mPssh[i].data; 373 } 374} 375 376uint32_t MPEG4Extractor::flags() const { 377 return CAN_PAUSE | 378 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ? 379 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); 380} 381 382sp<MetaData> MPEG4Extractor::getMetaData() { 383 status_t err; 384 if ((err = readMetaData()) != OK) { 385 return new MetaData; 386 } 387 388 return mFileMetaData; 389} 390 391size_t MPEG4Extractor::countTracks() { 392 status_t err; 393 if ((err = readMetaData()) != OK) { 394 ALOGV("MPEG4Extractor::countTracks: no tracks"); 395 return 0; 396 } 397 398 size_t n = 0; 399 Track *track = mFirstTrack; 400 while (track) { 401 ++n; 402 track = track->next; 403 } 404 405 ALOGV("MPEG4Extractor::countTracks: %d tracks", n); 406 return n; 407} 408 409sp<MetaData> MPEG4Extractor::getTrackMetaData( 410 size_t index, uint32_t flags) { 411 status_t err; 412 if ((err = readMetaData()) != OK) { 413 return NULL; 414 } 415 416 Track *track = mFirstTrack; 417 while (index > 0) { 418 if (track == NULL) { 419 return NULL; 420 } 421 422 track = track->next; 423 --index; 424 } 425 426 if (track == NULL) { 427 return NULL; 428 } 429 430 if ((flags & kIncludeExtensiveMetaData) 431 && !track->includes_expensive_metadata) { 432 track->includes_expensive_metadata = true; 433 434 const char *mime; 435 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 436 if (!strncasecmp("video/", mime, 6)) { 437 if (mMoofOffset > 0) { 438 int64_t duration; 439 if (track->meta->findInt64(kKeyDuration, &duration)) { 440 // nothing fancy, just pick a frame near 1/4th of the duration 441 track->meta->setInt64( 442 kKeyThumbnailTime, duration / 4); 443 } 444 } else { 445 uint32_t sampleIndex; 446 uint32_t sampleTime; 447 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK 448 && track->sampleTable->getMetaDataForSample( 449 sampleIndex, NULL /* offset */, NULL /* size */, 450 &sampleTime) == OK) { 451 track->meta->setInt64( 452 kKeyThumbnailTime, 453 ((int64_t)sampleTime * 1000000) / track->timescale); 454 } 455 } 456 } 457 } 458 459 return track->meta; 460} 461 462static void MakeFourCCString(uint32_t x, char *s) { 463 s[0] = x >> 24; 464 s[1] = (x >> 16) & 0xff; 465 s[2] = (x >> 8) & 0xff; 466 s[3] = x & 0xff; 467 s[4] = '\0'; 468} 469 470status_t MPEG4Extractor::readMetaData() { 471 if (mInitCheck != NO_INIT) { 472 return mInitCheck; 473 } 474 475 off64_t offset = 0; 476 status_t err; 477 while (true) { 478 err = parseChunk(&offset, 0); 479 if (err == OK) { 480 continue; 481 } 482 483 uint32_t hdr[2]; 484 if (mDataSource->readAt(offset, hdr, 8) < 8) { 485 break; 486 } 487 uint32_t chunk_type = ntohl(hdr[1]); 488 if (chunk_type == FOURCC('s', 'i', 'd', 'x')) { 489 // parse the sidx box too 490 continue; 491 } else if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 492 // store the offset of the first segment 493 mMoofOffset = offset; 494 } 495 break; 496 } 497 498 if (mInitCheck == OK) { 499 if (mHasVideo) { 500 mFileMetaData->setCString( 501 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); 502 } else { 503 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); 504 } 505 506 mInitCheck = OK; 507 } else { 508 mInitCheck = err; 509 } 510 511 CHECK_NE(err, (status_t)NO_INIT); 512 513 // copy pssh data into file metadata 514 int psshsize = 0; 515 for (size_t i = 0; i < mPssh.size(); i++) { 516 psshsize += 20 + mPssh[i].datalen; 517 } 518 if (psshsize) { 519 char *buf = (char*)malloc(psshsize); 520 char *ptr = buf; 521 for (size_t i = 0; i < mPssh.size(); i++) { 522 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length 523 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen); 524 ptr += (20 + mPssh[i].datalen); 525 } 526 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize); 527 free(buf); 528 } 529 return mInitCheck; 530} 531 532char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { 533 if (mFirstSINF == NULL) { 534 return NULL; 535 } 536 537 SINF *sinf = mFirstSINF; 538 while (sinf && (trackID != sinf->trackID)) { 539 sinf = sinf->next; 540 } 541 542 if (sinf == NULL) { 543 return NULL; 544 } 545 546 *len = sinf->len; 547 return sinf->IPMPData; 548} 549 550// Reads an encoded integer 7 bits at a time until it encounters the high bit clear. 551static int32_t readSize(off64_t offset, 552 const sp<DataSource> DataSource, uint8_t *numOfBytes) { 553 uint32_t size = 0; 554 uint8_t data; 555 bool moreData = true; 556 *numOfBytes = 0; 557 558 while (moreData) { 559 if (DataSource->readAt(offset, &data, 1) < 1) { 560 return -1; 561 } 562 offset ++; 563 moreData = (data >= 128) ? true : false; 564 size = (size << 7) | (data & 0x7f); // Take last 7 bits 565 (*numOfBytes) ++; 566 } 567 568 return size; 569} 570 571status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) { 572 uint8_t updateIdTag; 573 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 574 return ERROR_IO; 575 } 576 data_offset ++; 577 578 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 579 return ERROR_MALFORMED; 580 } 581 582 uint8_t numOfBytes; 583 int32_t size = readSize(data_offset, mDataSource, &numOfBytes); 584 if (size < 0) { 585 return ERROR_IO; 586 } 587 int32_t classSize = size; 588 data_offset += numOfBytes; 589 590 while(size >= 11 ) { 591 uint8_t descriptorTag; 592 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) { 593 return ERROR_IO; 594 } 595 data_offset ++; 596 597 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) { 598 return ERROR_MALFORMED; 599 } 600 601 uint8_t buffer[8]; 602 //ObjectDescriptorID and ObjectDescriptor url flag 603 if (mDataSource->readAt(data_offset, buffer, 2) < 2) { 604 return ERROR_IO; 605 } 606 data_offset += 2; 607 608 if ((buffer[1] >> 5) & 0x0001) { //url flag is set 609 return ERROR_MALFORMED; 610 } 611 612 if (mDataSource->readAt(data_offset, buffer, 8) < 8) { 613 return ERROR_IO; 614 } 615 data_offset += 8; 616 617 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1]) 618 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) { 619 return ERROR_MALFORMED; 620 } 621 622 SINF *sinf = new SINF; 623 sinf->trackID = U16_AT(&buffer[3]); 624 sinf->IPMPDescriptorID = buffer[7]; 625 sinf->next = mFirstSINF; 626 mFirstSINF = sinf; 627 628 size -= (8 + 2 + 1); 629 } 630 631 if (size != 0) { 632 return ERROR_MALFORMED; 633 } 634 635 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) { 636 return ERROR_IO; 637 } 638 data_offset ++; 639 640 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) { 641 return ERROR_MALFORMED; 642 } 643 644 size = readSize(data_offset, mDataSource, &numOfBytes); 645 if (size < 0) { 646 return ERROR_IO; 647 } 648 classSize = size; 649 data_offset += numOfBytes; 650 651 while (size > 0) { 652 uint8_t tag; 653 int32_t dataLen; 654 if (mDataSource->readAt(data_offset, &tag, 1) < 1) { 655 return ERROR_IO; 656 } 657 data_offset ++; 658 659 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) { 660 uint8_t id; 661 dataLen = readSize(data_offset, mDataSource, &numOfBytes); 662 if (dataLen < 0) { 663 return ERROR_IO; 664 } else if (dataLen < 4) { 665 return ERROR_MALFORMED; 666 } 667 data_offset += numOfBytes; 668 669 if (mDataSource->readAt(data_offset, &id, 1) < 1) { 670 return ERROR_IO; 671 } 672 data_offset ++; 673 674 SINF *sinf = mFirstSINF; 675 while (sinf && (sinf->IPMPDescriptorID != id)) { 676 sinf = sinf->next; 677 } 678 if (sinf == NULL) { 679 return ERROR_MALFORMED; 680 } 681 sinf->len = dataLen - 3; 682 sinf->IPMPData = new char[sinf->len]; 683 684 if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) { 685 return ERROR_IO; 686 } 687 data_offset += sinf->len; 688 689 size -= (dataLen + numOfBytes + 1); 690 } 691 } 692 693 if (size != 0) { 694 return ERROR_MALFORMED; 695 } 696 697 return UNKNOWN_ERROR; // Return a dummy error. 698} 699 700struct PathAdder { 701 PathAdder(Vector<uint32_t> *path, uint32_t chunkType) 702 : mPath(path) { 703 mPath->push(chunkType); 704 } 705 706 ~PathAdder() { 707 mPath->pop(); 708 } 709 710private: 711 Vector<uint32_t> *mPath; 712 713 PathAdder(const PathAdder &); 714 PathAdder &operator=(const PathAdder &); 715}; 716 717static bool underMetaDataPath(const Vector<uint32_t> &path) { 718 return path.size() >= 5 719 && path[0] == FOURCC('m', 'o', 'o', 'v') 720 && path[1] == FOURCC('u', 'd', 't', 'a') 721 && path[2] == FOURCC('m', 'e', 't', 'a') 722 && path[3] == FOURCC('i', 'l', 's', 't'); 723} 724 725// Given a time in seconds since Jan 1 1904, produce a human-readable string. 726static void convertTimeToDate(int64_t time_1904, String8 *s) { 727 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600); 728 729 char tmp[32]; 730 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970)); 731 732 s->setTo(tmp); 733} 734 735status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) { 736 ALOGV("entering parseChunk %lld/%d", *offset, depth); 737 uint32_t hdr[2]; 738 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 739 return ERROR_IO; 740 } 741 uint64_t chunk_size = ntohl(hdr[0]); 742 uint32_t chunk_type = ntohl(hdr[1]); 743 off64_t data_offset = *offset + 8; 744 745 if (chunk_size == 1) { 746 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 747 return ERROR_IO; 748 } 749 chunk_size = ntoh64(chunk_size); 750 data_offset += 8; 751 752 if (chunk_size < 16) { 753 // The smallest valid chunk is 16 bytes long in this case. 754 return ERROR_MALFORMED; 755 } 756 } else if (chunk_size < 8) { 757 // The smallest valid chunk is 8 bytes long. 758 return ERROR_MALFORMED; 759 } 760 761 char chunk[5]; 762 MakeFourCCString(chunk_type, chunk); 763 ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth); 764 765#if 0 766 static const char kWhitespace[] = " "; 767 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth]; 768 printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size); 769 770 char buffer[256]; 771 size_t n = chunk_size; 772 if (n > sizeof(buffer)) { 773 n = sizeof(buffer); 774 } 775 if (mDataSource->readAt(*offset, buffer, n) 776 < (ssize_t)n) { 777 return ERROR_IO; 778 } 779 780 hexdump(buffer, n); 781#endif 782 783 PathAdder autoAdder(&mPath, chunk_type); 784 785 off64_t chunk_data_size = *offset + chunk_size - data_offset; 786 787 if (chunk_type != FOURCC('c', 'p', 'r', 't') 788 && chunk_type != FOURCC('c', 'o', 'v', 'r') 789 && mPath.size() == 5 && underMetaDataPath(mPath)) { 790 off64_t stop_offset = *offset + chunk_size; 791 *offset = data_offset; 792 while (*offset < stop_offset) { 793 status_t err = parseChunk(offset, depth + 1); 794 if (err != OK) { 795 return err; 796 } 797 } 798 799 if (*offset != stop_offset) { 800 return ERROR_MALFORMED; 801 } 802 803 return OK; 804 } 805 806 switch(chunk_type) { 807 case FOURCC('m', 'o', 'o', 'v'): 808 case FOURCC('t', 'r', 'a', 'k'): 809 case FOURCC('m', 'd', 'i', 'a'): 810 case FOURCC('m', 'i', 'n', 'f'): 811 case FOURCC('d', 'i', 'n', 'f'): 812 case FOURCC('s', 't', 'b', 'l'): 813 case FOURCC('m', 'v', 'e', 'x'): 814 case FOURCC('m', 'o', 'o', 'f'): 815 case FOURCC('t', 'r', 'a', 'f'): 816 case FOURCC('m', 'f', 'r', 'a'): 817 case FOURCC('u', 'd', 't', 'a'): 818 case FOURCC('i', 'l', 's', 't'): 819 case FOURCC('s', 'i', 'n', 'f'): 820 case FOURCC('s', 'c', 'h', 'i'): 821 case FOURCC('e', 'd', 't', 's'): 822 { 823 if (chunk_type == FOURCC('s', 't', 'b', 'l')) { 824 ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size); 825 826 if (mDataSource->flags() 827 & (DataSource::kWantsPrefetching 828 | DataSource::kIsCachingDataSource)) { 829 sp<MPEG4DataSource> cachedSource = 830 new MPEG4DataSource(mDataSource); 831 832 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) { 833 mDataSource = cachedSource; 834 } 835 } 836 837 mLastTrack->sampleTable = new SampleTable(mDataSource); 838 } 839 840 bool isTrack = false; 841 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) { 842 isTrack = true; 843 844 Track *track = new Track; 845 track->next = NULL; 846 if (mLastTrack) { 847 mLastTrack->next = track; 848 } else { 849 mFirstTrack = track; 850 } 851 mLastTrack = track; 852 853 track->meta = new MetaData; 854 track->includes_expensive_metadata = false; 855 track->skipTrack = false; 856 track->timescale = 0; 857 track->meta->setCString(kKeyMIMEType, "application/octet-stream"); 858 } 859 860 off64_t stop_offset = *offset + chunk_size; 861 *offset = data_offset; 862 while (*offset < stop_offset) { 863 status_t err = parseChunk(offset, depth + 1); 864 if (err != OK) { 865 return err; 866 } 867 } 868 869 if (*offset != stop_offset) { 870 return ERROR_MALFORMED; 871 } 872 873 if (isTrack) { 874 if (mLastTrack->skipTrack) { 875 Track *cur = mFirstTrack; 876 877 if (cur == mLastTrack) { 878 delete cur; 879 mFirstTrack = mLastTrack = NULL; 880 } else { 881 while (cur && cur->next != mLastTrack) { 882 cur = cur->next; 883 } 884 cur->next = NULL; 885 delete mLastTrack; 886 mLastTrack = cur; 887 } 888 889 return OK; 890 } 891 892 status_t err = verifyTrack(mLastTrack); 893 894 if (err != OK) { 895 return err; 896 } 897 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) { 898 mInitCheck = OK; 899 900 if (!mIsDrm) { 901 return UNKNOWN_ERROR; // Return a dummy error. 902 } else { 903 return OK; 904 } 905 } 906 break; 907 } 908 909 case FOURCC('e', 'l', 's', 't'): 910 { 911 // See 14496-12 8.6.6 912 uint8_t version; 913 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 914 return ERROR_IO; 915 } 916 917 uint32_t entry_count; 918 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) { 919 return ERROR_IO; 920 } 921 922 if (entry_count != 1) { 923 // we only support a single entry at the moment, for gapless playback 924 ALOGW("ignoring edit list with %d entries", entry_count); 925 } else if (mHeaderTimescale == 0) { 926 ALOGW("ignoring edit list because timescale is 0"); 927 } else { 928 off64_t entriesoffset = data_offset + 8; 929 uint64_t segment_duration; 930 int64_t media_time; 931 932 if (version == 1) { 933 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) || 934 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) { 935 return ERROR_IO; 936 } 937 } else if (version == 0) { 938 uint32_t sd; 939 int32_t mt; 940 if (!mDataSource->getUInt32(entriesoffset, &sd) || 941 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) { 942 return ERROR_IO; 943 } 944 segment_duration = sd; 945 media_time = mt; 946 } else { 947 return ERROR_IO; 948 } 949 950 uint64_t halfscale = mHeaderTimescale / 2; 951 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale; 952 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale; 953 954 int64_t duration; 955 int32_t samplerate; 956 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) && 957 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) { 958 959 int64_t delay = (media_time * samplerate + 500000) / 1000000; 960 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 961 962 int64_t paddingus = duration - (segment_duration + media_time); 963 if (paddingus < 0) { 964 // track duration from media header (which is what kKeyDuration is) might 965 // be slightly shorter than the segment duration, which would make the 966 // padding negative. Clamp to zero. 967 paddingus = 0; 968 } 969 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000; 970 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples); 971 } 972 } 973 *offset += chunk_size; 974 break; 975 } 976 977 case FOURCC('f', 'r', 'm', 'a'): 978 { 979 uint32_t original_fourcc; 980 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) { 981 return ERROR_IO; 982 } 983 original_fourcc = ntohl(original_fourcc); 984 ALOGV("read original format: %d", original_fourcc); 985 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc)); 986 uint32_t num_channels = 0; 987 uint32_t sample_rate = 0; 988 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) { 989 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 990 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 991 } 992 *offset += chunk_size; 993 break; 994 } 995 996 case FOURCC('t', 'e', 'n', 'c'): 997 { 998 if (chunk_size < 32) { 999 return ERROR_MALFORMED; 1000 } 1001 1002 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte 1003 // default IV size, 16 bytes default KeyID 1004 // (ISO 23001-7) 1005 char buf[4]; 1006 memset(buf, 0, 4); 1007 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) { 1008 return ERROR_IO; 1009 } 1010 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf)); 1011 if (defaultAlgorithmId > 1) { 1012 // only 0 (clear) and 1 (AES-128) are valid 1013 return ERROR_MALFORMED; 1014 } 1015 1016 memset(buf, 0, 4); 1017 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) { 1018 return ERROR_IO; 1019 } 1020 uint32_t defaultIVSize = ntohl(*((int32_t*)buf)); 1021 1022 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) || 1023 (defaultAlgorithmId != 0 && defaultIVSize == 0)) { 1024 // only unencrypted data must have 0 IV size 1025 return ERROR_MALFORMED; 1026 } else if (defaultIVSize != 0 && 1027 defaultIVSize != 8 && 1028 defaultIVSize != 16) { 1029 // only supported sizes are 0, 8 and 16 1030 return ERROR_MALFORMED; 1031 } 1032 1033 uint8_t defaultKeyId[16]; 1034 1035 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) { 1036 return ERROR_IO; 1037 } 1038 1039 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId); 1040 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize); 1041 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16); 1042 *offset += chunk_size; 1043 break; 1044 } 1045 1046 case FOURCC('t', 'k', 'h', 'd'): 1047 { 1048 status_t err; 1049 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) { 1050 return err; 1051 } 1052 1053 *offset += chunk_size; 1054 break; 1055 } 1056 1057 case FOURCC('p', 's', 's', 'h'): 1058 { 1059 PsshInfo pssh; 1060 1061 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) { 1062 return ERROR_IO; 1063 } 1064 1065 uint32_t psshdatalen = 0; 1066 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) { 1067 return ERROR_IO; 1068 } 1069 pssh.datalen = ntohl(psshdatalen); 1070 ALOGV("pssh data size: %d", pssh.datalen); 1071 if (pssh.datalen + 20 > chunk_size) { 1072 // pssh data length exceeds size of containing box 1073 return ERROR_MALFORMED; 1074 } 1075 1076 pssh.data = new uint8_t[pssh.datalen]; 1077 ALOGV("allocated pssh @ %p", pssh.data); 1078 ssize_t requested = (ssize_t) pssh.datalen; 1079 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) { 1080 return ERROR_IO; 1081 } 1082 mPssh.push_back(pssh); 1083 1084 *offset += chunk_size; 1085 break; 1086 } 1087 1088 case FOURCC('m', 'd', 'h', 'd'): 1089 { 1090 if (chunk_data_size < 4) { 1091 return ERROR_MALFORMED; 1092 } 1093 1094 uint8_t version; 1095 if (mDataSource->readAt( 1096 data_offset, &version, sizeof(version)) 1097 < (ssize_t)sizeof(version)) { 1098 return ERROR_IO; 1099 } 1100 1101 off64_t timescale_offset; 1102 1103 if (version == 1) { 1104 timescale_offset = data_offset + 4 + 16; 1105 } else if (version == 0) { 1106 timescale_offset = data_offset + 4 + 8; 1107 } else { 1108 return ERROR_IO; 1109 } 1110 1111 uint32_t timescale; 1112 if (mDataSource->readAt( 1113 timescale_offset, ×cale, sizeof(timescale)) 1114 < (ssize_t)sizeof(timescale)) { 1115 return ERROR_IO; 1116 } 1117 1118 mLastTrack->timescale = ntohl(timescale); 1119 1120 int64_t duration = 0; 1121 if (version == 1) { 1122 if (mDataSource->readAt( 1123 timescale_offset + 4, &duration, sizeof(duration)) 1124 < (ssize_t)sizeof(duration)) { 1125 return ERROR_IO; 1126 } 1127 duration = ntoh64(duration); 1128 } else { 1129 uint32_t duration32; 1130 if (mDataSource->readAt( 1131 timescale_offset + 4, &duration32, sizeof(duration32)) 1132 < (ssize_t)sizeof(duration32)) { 1133 return ERROR_IO; 1134 } 1135 // ffmpeg sets duration to -1, which is incorrect. 1136 if (duration32 != 0xffffffff) { 1137 duration = ntohl(duration32); 1138 } 1139 } 1140 mLastTrack->meta->setInt64( 1141 kKeyDuration, (duration * 1000000) / mLastTrack->timescale); 1142 1143 uint8_t lang[2]; 1144 off64_t lang_offset; 1145 if (version == 1) { 1146 lang_offset = timescale_offset + 4 + 8; 1147 } else if (version == 0) { 1148 lang_offset = timescale_offset + 4 + 4; 1149 } else { 1150 return ERROR_IO; 1151 } 1152 1153 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang)) 1154 < (ssize_t)sizeof(lang)) { 1155 return ERROR_IO; 1156 } 1157 1158 // To get the ISO-639-2/T three character language code 1159 // 1 bit pad followed by 3 5-bits characters. Each character 1160 // is packed as the difference between its ASCII value and 0x60. 1161 char lang_code[4]; 1162 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60; 1163 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60; 1164 lang_code[2] = (lang[1] & 0x1f) + 0x60; 1165 lang_code[3] = '\0'; 1166 1167 mLastTrack->meta->setCString( 1168 kKeyMediaLanguage, lang_code); 1169 1170 *offset += chunk_size; 1171 break; 1172 } 1173 1174 case FOURCC('s', 't', 's', 'd'): 1175 { 1176 if (chunk_data_size < 8) { 1177 return ERROR_MALFORMED; 1178 } 1179 1180 uint8_t buffer[8]; 1181 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1182 return ERROR_MALFORMED; 1183 } 1184 1185 if (mDataSource->readAt( 1186 data_offset, buffer, 8) < 8) { 1187 return ERROR_IO; 1188 } 1189 1190 if (U32_AT(buffer) != 0) { 1191 // Should be version 0, flags 0. 1192 return ERROR_MALFORMED; 1193 } 1194 1195 uint32_t entry_count = U32_AT(&buffer[4]); 1196 1197 if (entry_count > 1) { 1198 // For 3GPP timed text, there could be multiple tx3g boxes contain 1199 // multiple text display formats. These formats will be used to 1200 // display the timed text. 1201 // For encrypted files, there may also be more than one entry. 1202 const char *mime; 1203 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1204 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) && 1205 strcasecmp(mime, "application/octet-stream")) { 1206 // For now we only support a single type of media per track. 1207 mLastTrack->skipTrack = true; 1208 *offset += chunk_size; 1209 break; 1210 } 1211 } 1212 off64_t stop_offset = *offset + chunk_size; 1213 *offset = data_offset + 8; 1214 for (uint32_t i = 0; i < entry_count; ++i) { 1215 status_t err = parseChunk(offset, depth + 1); 1216 if (err != OK) { 1217 return err; 1218 } 1219 } 1220 1221 if (*offset != stop_offset) { 1222 return ERROR_MALFORMED; 1223 } 1224 break; 1225 } 1226 1227 case FOURCC('m', 'p', '4', 'a'): 1228 case FOURCC('e', 'n', 'c', 'a'): 1229 case FOURCC('s', 'a', 'm', 'r'): 1230 case FOURCC('s', 'a', 'w', 'b'): 1231 { 1232 uint8_t buffer[8 + 20]; 1233 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1234 // Basic AudioSampleEntry size. 1235 return ERROR_MALFORMED; 1236 } 1237 1238 if (mDataSource->readAt( 1239 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1240 return ERROR_IO; 1241 } 1242 1243 uint16_t data_ref_index = U16_AT(&buffer[6]); 1244 uint32_t num_channels = U16_AT(&buffer[16]); 1245 1246 uint16_t sample_size = U16_AT(&buffer[18]); 1247 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16; 1248 1249 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) { 1250 // if the chunk type is enca, we'll get the type from the sinf/frma box later 1251 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1252 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate); 1253 } 1254 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n", 1255 chunk, num_channels, sample_size, sample_rate); 1256 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels); 1257 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate); 1258 1259 off64_t stop_offset = *offset + chunk_size; 1260 *offset = data_offset + sizeof(buffer); 1261 while (*offset < stop_offset) { 1262 status_t err = parseChunk(offset, depth + 1); 1263 if (err != OK) { 1264 return err; 1265 } 1266 } 1267 1268 if (*offset != stop_offset) { 1269 return ERROR_MALFORMED; 1270 } 1271 break; 1272 } 1273 1274 case FOURCC('m', 'p', '4', 'v'): 1275 case FOURCC('e', 'n', 'c', 'v'): 1276 case FOURCC('s', '2', '6', '3'): 1277 case FOURCC('H', '2', '6', '3'): 1278 case FOURCC('h', '2', '6', '3'): 1279 case FOURCC('a', 'v', 'c', '1'): 1280 { 1281 mHasVideo = true; 1282 1283 uint8_t buffer[78]; 1284 if (chunk_data_size < (ssize_t)sizeof(buffer)) { 1285 // Basic VideoSampleEntry size. 1286 return ERROR_MALFORMED; 1287 } 1288 1289 if (mDataSource->readAt( 1290 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) { 1291 return ERROR_IO; 1292 } 1293 1294 uint16_t data_ref_index = U16_AT(&buffer[6]); 1295 uint16_t width = U16_AT(&buffer[6 + 18]); 1296 uint16_t height = U16_AT(&buffer[6 + 20]); 1297 1298 // The video sample is not standard-compliant if it has invalid dimension. 1299 // Use some default width and height value, and 1300 // let the decoder figure out the actual width and height (and thus 1301 // be prepared for INFO_FOMRAT_CHANGED event). 1302 if (width == 0) width = 352; 1303 if (height == 0) height = 288; 1304 1305 // printf("*** coding='%s' width=%d height=%d\n", 1306 // chunk, width, height); 1307 1308 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) { 1309 // if the chunk type is encv, we'll get the type from the sinf/frma box later 1310 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type)); 1311 } 1312 mLastTrack->meta->setInt32(kKeyWidth, width); 1313 mLastTrack->meta->setInt32(kKeyHeight, height); 1314 1315 off64_t stop_offset = *offset + chunk_size; 1316 *offset = data_offset + sizeof(buffer); 1317 while (*offset < stop_offset) { 1318 status_t err = parseChunk(offset, depth + 1); 1319 if (err != OK) { 1320 return err; 1321 } 1322 } 1323 1324 if (*offset != stop_offset) { 1325 return ERROR_MALFORMED; 1326 } 1327 break; 1328 } 1329 1330 case FOURCC('s', 't', 'c', 'o'): 1331 case FOURCC('c', 'o', '6', '4'): 1332 { 1333 status_t err = 1334 mLastTrack->sampleTable->setChunkOffsetParams( 1335 chunk_type, data_offset, chunk_data_size); 1336 1337 if (err != OK) { 1338 return err; 1339 } 1340 1341 *offset += chunk_size; 1342 break; 1343 } 1344 1345 case FOURCC('s', 't', 's', 'c'): 1346 { 1347 status_t err = 1348 mLastTrack->sampleTable->setSampleToChunkParams( 1349 data_offset, chunk_data_size); 1350 1351 if (err != OK) { 1352 return err; 1353 } 1354 1355 *offset += chunk_size; 1356 break; 1357 } 1358 1359 case FOURCC('s', 't', 's', 'z'): 1360 case FOURCC('s', 't', 'z', '2'): 1361 { 1362 status_t err = 1363 mLastTrack->sampleTable->setSampleSizeParams( 1364 chunk_type, data_offset, chunk_data_size); 1365 1366 if (err != OK) { 1367 return err; 1368 } 1369 1370 size_t max_size; 1371 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size); 1372 1373 if (err != OK) { 1374 return err; 1375 } 1376 1377 if (max_size != 0) { 1378 // Assume that a given buffer only contains at most 10 chunks, 1379 // each chunk originally prefixed with a 2 byte length will 1380 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion, 1381 // and thus will grow by 2 bytes per chunk. 1382 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2); 1383 } else { 1384 // No size was specified. Pick a conservatively large size. 1385 int32_t width, height; 1386 if (!mLastTrack->meta->findInt32(kKeyWidth, &width) || 1387 !mLastTrack->meta->findInt32(kKeyHeight, &height)) { 1388 ALOGE("No width or height, assuming worst case 1080p"); 1389 width = 1920; 1390 height = 1080; 1391 } 1392 1393 const char *mime; 1394 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1395 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 1396 // AVC requires compression ratio of at least 2, and uses 1397 // macroblocks 1398 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192; 1399 } else { 1400 // For all other formats there is no minimum compression 1401 // ratio. Use compression ratio of 1. 1402 max_size = width * height * 3 / 2; 1403 } 1404 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size); 1405 } 1406 *offset += chunk_size; 1407 1408 // NOTE: setting another piece of metadata invalidates any pointers (such as the 1409 // mimetype) previously obtained, so don't cache them. 1410 const char *mime; 1411 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime)); 1412 // Calculate average frame rate. 1413 if (!strncasecmp("video/", mime, 6)) { 1414 size_t nSamples = mLastTrack->sampleTable->countSamples(); 1415 int64_t durationUs; 1416 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) { 1417 if (durationUs > 0) { 1418 int32_t frameRate = (nSamples * 1000000LL + 1419 (durationUs >> 1)) / durationUs; 1420 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate); 1421 } 1422 } 1423 } 1424 1425 break; 1426 } 1427 1428 case FOURCC('s', 't', 't', 's'): 1429 { 1430 status_t err = 1431 mLastTrack->sampleTable->setTimeToSampleParams( 1432 data_offset, chunk_data_size); 1433 1434 if (err != OK) { 1435 return err; 1436 } 1437 1438 *offset += chunk_size; 1439 break; 1440 } 1441 1442 case FOURCC('c', 't', 't', 's'): 1443 { 1444 status_t err = 1445 mLastTrack->sampleTable->setCompositionTimeToSampleParams( 1446 data_offset, chunk_data_size); 1447 1448 if (err != OK) { 1449 return err; 1450 } 1451 1452 *offset += chunk_size; 1453 break; 1454 } 1455 1456 case FOURCC('s', 't', 's', 's'): 1457 { 1458 status_t err = 1459 mLastTrack->sampleTable->setSyncSampleParams( 1460 data_offset, chunk_data_size); 1461 1462 if (err != OK) { 1463 return err; 1464 } 1465 1466 *offset += chunk_size; 1467 break; 1468 } 1469 1470 // @xyz 1471 case FOURCC('\xA9', 'x', 'y', 'z'): 1472 { 1473 // Best case the total data length inside "@xyz" box 1474 // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/", 1475 // where "\x00\x04" is the text string length with value = 4, 1476 // "\0x15\xc7" is the language code = en, and "0+0" is a 1477 // location (string) value with longitude = 0 and latitude = 0. 1478 if (chunk_data_size < 8) { 1479 return ERROR_MALFORMED; 1480 } 1481 1482 // Worst case the location string length would be 18, 1483 // for instance +90.0000-180.0000, without the trailing "/" and 1484 // the string length + language code. 1485 char buffer[18]; 1486 1487 // Substracting 5 from the data size is because the text string length + 1488 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte. 1489 off64_t location_length = chunk_data_size - 5; 1490 if (location_length >= (off64_t) sizeof(buffer)) { 1491 return ERROR_MALFORMED; 1492 } 1493 1494 if (mDataSource->readAt( 1495 data_offset + 4, buffer, location_length) < location_length) { 1496 return ERROR_IO; 1497 } 1498 1499 buffer[location_length] = '\0'; 1500 mFileMetaData->setCString(kKeyLocation, buffer); 1501 *offset += chunk_size; 1502 break; 1503 } 1504 1505 case FOURCC('e', 's', 'd', 's'): 1506 { 1507 if (chunk_data_size < 4) { 1508 return ERROR_MALFORMED; 1509 } 1510 1511 uint8_t buffer[256]; 1512 if (chunk_data_size > (off64_t)sizeof(buffer)) { 1513 return ERROR_BUFFER_TOO_SMALL; 1514 } 1515 1516 if (mDataSource->readAt( 1517 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1518 return ERROR_IO; 1519 } 1520 1521 if (U32_AT(buffer) != 0) { 1522 // Should be version 0, flags 0. 1523 return ERROR_MALFORMED; 1524 } 1525 1526 mLastTrack->meta->setData( 1527 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4); 1528 1529 if (mPath.size() >= 2 1530 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) { 1531 // Information from the ESDS must be relied on for proper 1532 // setup of sample rate and channel count for MPEG4 Audio. 1533 // The generic header appears to only contain generic 1534 // information... 1535 1536 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio( 1537 &buffer[4], chunk_data_size - 4); 1538 1539 if (err != OK) { 1540 return err; 1541 } 1542 } 1543 1544 *offset += chunk_size; 1545 break; 1546 } 1547 1548 case FOURCC('a', 'v', 'c', 'C'): 1549 { 1550 sp<ABuffer> buffer = new ABuffer(chunk_data_size); 1551 1552 if (mDataSource->readAt( 1553 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) { 1554 return ERROR_IO; 1555 } 1556 1557 mLastTrack->meta->setData( 1558 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size); 1559 1560 *offset += chunk_size; 1561 break; 1562 } 1563 1564 case FOURCC('d', '2', '6', '3'): 1565 { 1566 /* 1567 * d263 contains a fixed 7 bytes part: 1568 * vendor - 4 bytes 1569 * version - 1 byte 1570 * level - 1 byte 1571 * profile - 1 byte 1572 * optionally, "d263" box itself may contain a 16-byte 1573 * bit rate box (bitr) 1574 * average bit rate - 4 bytes 1575 * max bit rate - 4 bytes 1576 */ 1577 char buffer[23]; 1578 if (chunk_data_size != 7 && 1579 chunk_data_size != 23) { 1580 ALOGE("Incorrect D263 box size %lld", chunk_data_size); 1581 return ERROR_MALFORMED; 1582 } 1583 1584 if (mDataSource->readAt( 1585 data_offset, buffer, chunk_data_size) < chunk_data_size) { 1586 return ERROR_IO; 1587 } 1588 1589 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size); 1590 1591 *offset += chunk_size; 1592 break; 1593 } 1594 1595 case FOURCC('m', 'e', 't', 'a'): 1596 { 1597 uint8_t buffer[4]; 1598 if (chunk_data_size < (off64_t)sizeof(buffer)) { 1599 return ERROR_MALFORMED; 1600 } 1601 1602 if (mDataSource->readAt( 1603 data_offset, buffer, 4) < 4) { 1604 return ERROR_IO; 1605 } 1606 1607 if (U32_AT(buffer) != 0) { 1608 // Should be version 0, flags 0. 1609 1610 // If it's not, let's assume this is one of those 1611 // apparently malformed chunks that don't have flags 1612 // and completely different semantics than what's 1613 // in the MPEG4 specs and skip it. 1614 *offset += chunk_size; 1615 return OK; 1616 } 1617 1618 off64_t stop_offset = *offset + chunk_size; 1619 *offset = data_offset + sizeof(buffer); 1620 while (*offset < stop_offset) { 1621 status_t err = parseChunk(offset, depth + 1); 1622 if (err != OK) { 1623 return err; 1624 } 1625 } 1626 1627 if (*offset != stop_offset) { 1628 return ERROR_MALFORMED; 1629 } 1630 break; 1631 } 1632 1633 case FOURCC('m', 'e', 'a', 'n'): 1634 case FOURCC('n', 'a', 'm', 'e'): 1635 case FOURCC('d', 'a', 't', 'a'): 1636 { 1637 if (mPath.size() == 6 && underMetaDataPath(mPath)) { 1638 status_t err = parseMetaData(data_offset, chunk_data_size); 1639 1640 if (err != OK) { 1641 return err; 1642 } 1643 } 1644 1645 *offset += chunk_size; 1646 break; 1647 } 1648 1649 case FOURCC('m', 'v', 'h', 'd'): 1650 { 1651 if (chunk_data_size < 24) { 1652 return ERROR_MALFORMED; 1653 } 1654 1655 uint8_t header[24]; 1656 if (mDataSource->readAt( 1657 data_offset, header, sizeof(header)) 1658 < (ssize_t)sizeof(header)) { 1659 return ERROR_IO; 1660 } 1661 1662 uint64_t creationTime; 1663 if (header[0] == 1) { 1664 creationTime = U64_AT(&header[4]); 1665 mHeaderTimescale = U32_AT(&header[20]); 1666 } else if (header[0] != 0) { 1667 return ERROR_MALFORMED; 1668 } else { 1669 creationTime = U32_AT(&header[4]); 1670 mHeaderTimescale = U32_AT(&header[12]); 1671 } 1672 1673 String8 s; 1674 convertTimeToDate(creationTime, &s); 1675 1676 mFileMetaData->setCString(kKeyDate, s.string()); 1677 1678 *offset += chunk_size; 1679 break; 1680 } 1681 1682 case FOURCC('m', 'd', 'a', 't'): 1683 { 1684 ALOGV("mdat chunk, drm: %d", mIsDrm); 1685 if (!mIsDrm) { 1686 *offset += chunk_size; 1687 break; 1688 } 1689 1690 if (chunk_size < 8) { 1691 return ERROR_MALFORMED; 1692 } 1693 1694 return parseDrmSINF(offset, data_offset); 1695 } 1696 1697 case FOURCC('h', 'd', 'l', 'r'): 1698 { 1699 uint32_t buffer; 1700 if (mDataSource->readAt( 1701 data_offset + 8, &buffer, 4) < 4) { 1702 return ERROR_IO; 1703 } 1704 1705 uint32_t type = ntohl(buffer); 1706 // For the 3GPP file format, the handler-type within the 'hdlr' box 1707 // shall be 'text'. We also want to support 'sbtl' handler type 1708 // for a practical reason as various MPEG4 containers use it. 1709 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) { 1710 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP); 1711 } 1712 1713 *offset += chunk_size; 1714 break; 1715 } 1716 1717 case FOURCC('t', 'x', '3', 'g'): 1718 { 1719 uint32_t type; 1720 const void *data; 1721 size_t size = 0; 1722 if (!mLastTrack->meta->findData( 1723 kKeyTextFormatData, &type, &data, &size)) { 1724 size = 0; 1725 } 1726 1727 uint8_t *buffer = new uint8_t[size + chunk_size]; 1728 1729 if (size > 0) { 1730 memcpy(buffer, data, size); 1731 } 1732 1733 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size)) 1734 < chunk_size) { 1735 delete[] buffer; 1736 buffer = NULL; 1737 1738 return ERROR_IO; 1739 } 1740 1741 mLastTrack->meta->setData( 1742 kKeyTextFormatData, 0, buffer, size + chunk_size); 1743 1744 delete[] buffer; 1745 1746 *offset += chunk_size; 1747 break; 1748 } 1749 1750 case FOURCC('c', 'o', 'v', 'r'): 1751 { 1752 if (mFileMetaData != NULL) { 1753 ALOGV("chunk_data_size = %lld and data_offset = %lld", 1754 chunk_data_size, data_offset); 1755 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1); 1756 if (mDataSource->readAt( 1757 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) { 1758 return ERROR_IO; 1759 } 1760 const int kSkipBytesOfDataBox = 16; 1761 mFileMetaData->setData( 1762 kKeyAlbumArt, MetaData::TYPE_NONE, 1763 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox); 1764 } 1765 1766 *offset += chunk_size; 1767 break; 1768 } 1769 1770 case FOURCC('-', '-', '-', '-'): 1771 { 1772 mLastCommentMean.clear(); 1773 mLastCommentName.clear(); 1774 mLastCommentData.clear(); 1775 *offset += chunk_size; 1776 break; 1777 } 1778 1779 case FOURCC('s', 'i', 'd', 'x'): 1780 { 1781 parseSegmentIndex(data_offset, chunk_data_size); 1782 *offset += chunk_size; 1783 return UNKNOWN_ERROR; // stop parsing after sidx 1784 } 1785 1786 default: 1787 { 1788 *offset += chunk_size; 1789 break; 1790 } 1791 } 1792 1793 return OK; 1794} 1795 1796status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) { 1797 ALOGV("MPEG4Extractor::parseSegmentIndex"); 1798 1799 if (size < 12) { 1800 return -EINVAL; 1801 } 1802 1803 uint32_t flags; 1804 if (!mDataSource->getUInt32(offset, &flags)) { 1805 return ERROR_MALFORMED; 1806 } 1807 1808 uint32_t version = flags >> 24; 1809 flags &= 0xffffff; 1810 1811 ALOGV("sidx version %d", version); 1812 1813 uint32_t referenceId; 1814 if (!mDataSource->getUInt32(offset + 4, &referenceId)) { 1815 return ERROR_MALFORMED; 1816 } 1817 1818 uint32_t timeScale; 1819 if (!mDataSource->getUInt32(offset + 8, &timeScale)) { 1820 return ERROR_MALFORMED; 1821 } 1822 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); 1823 1824 uint64_t earliestPresentationTime; 1825 uint64_t firstOffset; 1826 1827 offset += 12; 1828 size -= 12; 1829 1830 if (version == 0) { 1831 if (size < 8) { 1832 return -EINVAL; 1833 } 1834 uint32_t tmp; 1835 if (!mDataSource->getUInt32(offset, &tmp)) { 1836 return ERROR_MALFORMED; 1837 } 1838 earliestPresentationTime = tmp; 1839 if (!mDataSource->getUInt32(offset + 4, &tmp)) { 1840 return ERROR_MALFORMED; 1841 } 1842 firstOffset = tmp; 1843 offset += 8; 1844 size -= 8; 1845 } else { 1846 if (size < 16) { 1847 return -EINVAL; 1848 } 1849 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) { 1850 return ERROR_MALFORMED; 1851 } 1852 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) { 1853 return ERROR_MALFORMED; 1854 } 1855 offset += 16; 1856 size -= 16; 1857 } 1858 ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); 1859 1860 if (size < 4) { 1861 return -EINVAL; 1862 } 1863 1864 uint16_t referenceCount; 1865 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) { 1866 return ERROR_MALFORMED; 1867 } 1868 offset += 4; 1869 size -= 4; 1870 ALOGV("refcount: %d", referenceCount); 1871 1872 if (size < referenceCount * 12) { 1873 return -EINVAL; 1874 } 1875 1876 uint64_t total_duration = 0; 1877 for (unsigned int i = 0; i < referenceCount; i++) { 1878 uint32_t d1, d2, d3; 1879 1880 if (!mDataSource->getUInt32(offset, &d1) || // size 1881 !mDataSource->getUInt32(offset + 4, &d2) || // duration 1882 !mDataSource->getUInt32(offset + 8, &d3)) { // flags 1883 return ERROR_MALFORMED; 1884 } 1885 1886 if (d1 & 0x80000000) { 1887 ALOGW("sub-sidx boxes not supported yet"); 1888 } 1889 bool sap = d3 & 0x80000000; 1890 bool saptype = d3 >> 28; 1891 if (!sap || saptype > 2) { 1892 ALOGW("not a stream access point, or unsupported type"); 1893 } 1894 total_duration += d2; 1895 offset += 12; 1896 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); 1897 SidxEntry se; 1898 se.mSize = d1 & 0x7fffffff; 1899 se.mDurationUs = 1000000LL * d2 / timeScale; 1900 mSidxEntries.add(se); 1901 } 1902 1903 mSidxDuration = total_duration * 1000000 / timeScale; 1904 ALOGV("duration: %lld", mSidxDuration); 1905 1906 int64_t metaDuration; 1907 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) { 1908 mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration); 1909 } 1910 return OK; 1911} 1912 1913 1914 1915status_t MPEG4Extractor::parseTrackHeader( 1916 off64_t data_offset, off64_t data_size) { 1917 if (data_size < 4) { 1918 return ERROR_MALFORMED; 1919 } 1920 1921 uint8_t version; 1922 if (mDataSource->readAt(data_offset, &version, 1) < 1) { 1923 return ERROR_IO; 1924 } 1925 1926 size_t dynSize = (version == 1) ? 36 : 24; 1927 1928 uint8_t buffer[36 + 60]; 1929 1930 if (data_size != (off64_t)dynSize + 60) { 1931 return ERROR_MALFORMED; 1932 } 1933 1934 if (mDataSource->readAt( 1935 data_offset, buffer, data_size) < (ssize_t)data_size) { 1936 return ERROR_IO; 1937 } 1938 1939 uint64_t ctime, mtime, duration; 1940 int32_t id; 1941 1942 if (version == 1) { 1943 ctime = U64_AT(&buffer[4]); 1944 mtime = U64_AT(&buffer[12]); 1945 id = U32_AT(&buffer[20]); 1946 duration = U64_AT(&buffer[28]); 1947 } else if (version == 0) { 1948 ctime = U32_AT(&buffer[4]); 1949 mtime = U32_AT(&buffer[8]); 1950 id = U32_AT(&buffer[12]); 1951 duration = U32_AT(&buffer[20]); 1952 } else { 1953 return ERROR_UNSUPPORTED; 1954 } 1955 1956 mLastTrack->meta->setInt32(kKeyTrackID, id); 1957 1958 size_t matrixOffset = dynSize + 16; 1959 int32_t a00 = U32_AT(&buffer[matrixOffset]); 1960 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]); 1961 int32_t dx = U32_AT(&buffer[matrixOffset + 8]); 1962 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]); 1963 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]); 1964 int32_t dy = U32_AT(&buffer[matrixOffset + 20]); 1965 1966#if 0 1967 ALOGI("x' = %.2f * x + %.2f * y + %.2f", 1968 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f); 1969 ALOGI("y' = %.2f * x + %.2f * y + %.2f", 1970 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f); 1971#endif 1972 1973 uint32_t rotationDegrees; 1974 1975 static const int32_t kFixedOne = 0x10000; 1976 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) { 1977 // Identity, no rotation 1978 rotationDegrees = 0; 1979 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) { 1980 rotationDegrees = 90; 1981 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) { 1982 rotationDegrees = 270; 1983 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) { 1984 rotationDegrees = 180; 1985 } else { 1986 ALOGW("We only support 0,90,180,270 degree rotation matrices"); 1987 rotationDegrees = 0; 1988 } 1989 1990 if (rotationDegrees != 0) { 1991 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees); 1992 } 1993 1994 // Handle presentation display size, which could be different 1995 // from the image size indicated by kKeyWidth and kKeyHeight. 1996 uint32_t width = U32_AT(&buffer[dynSize + 52]); 1997 uint32_t height = U32_AT(&buffer[dynSize + 56]); 1998 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16); 1999 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16); 2000 2001 return OK; 2002} 2003 2004status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) { 2005 if (size < 4) { 2006 return ERROR_MALFORMED; 2007 } 2008 2009 uint8_t *buffer = new uint8_t[size + 1]; 2010 if (mDataSource->readAt( 2011 offset, buffer, size) != (ssize_t)size) { 2012 delete[] buffer; 2013 buffer = NULL; 2014 2015 return ERROR_IO; 2016 } 2017 2018 uint32_t flags = U32_AT(buffer); 2019 2020 uint32_t metadataKey = 0; 2021 char chunk[5]; 2022 MakeFourCCString(mPath[4], chunk); 2023 ALOGV("meta: %s @ %lld", chunk, offset); 2024 switch (mPath[4]) { 2025 case FOURCC(0xa9, 'a', 'l', 'b'): 2026 { 2027 metadataKey = kKeyAlbum; 2028 break; 2029 } 2030 case FOURCC(0xa9, 'A', 'R', 'T'): 2031 { 2032 metadataKey = kKeyArtist; 2033 break; 2034 } 2035 case FOURCC('a', 'A', 'R', 'T'): 2036 { 2037 metadataKey = kKeyAlbumArtist; 2038 break; 2039 } 2040 case FOURCC(0xa9, 'd', 'a', 'y'): 2041 { 2042 metadataKey = kKeyYear; 2043 break; 2044 } 2045 case FOURCC(0xa9, 'n', 'a', 'm'): 2046 { 2047 metadataKey = kKeyTitle; 2048 break; 2049 } 2050 case FOURCC(0xa9, 'w', 'r', 't'): 2051 { 2052 metadataKey = kKeyWriter; 2053 break; 2054 } 2055 case FOURCC('c', 'o', 'v', 'r'): 2056 { 2057 metadataKey = kKeyAlbumArt; 2058 break; 2059 } 2060 case FOURCC('g', 'n', 'r', 'e'): 2061 { 2062 metadataKey = kKeyGenre; 2063 break; 2064 } 2065 case FOURCC(0xa9, 'g', 'e', 'n'): 2066 { 2067 metadataKey = kKeyGenre; 2068 break; 2069 } 2070 case FOURCC('c', 'p', 'i', 'l'): 2071 { 2072 if (size == 9 && flags == 21) { 2073 char tmp[16]; 2074 sprintf(tmp, "%d", 2075 (int)buffer[size - 1]); 2076 2077 mFileMetaData->setCString(kKeyCompilation, tmp); 2078 } 2079 break; 2080 } 2081 case FOURCC('t', 'r', 'k', 'n'): 2082 { 2083 if (size == 16 && flags == 0) { 2084 char tmp[16]; 2085 uint16_t* pTrack = (uint16_t*)&buffer[10]; 2086 uint16_t* pTotalTracks = (uint16_t*)&buffer[12]; 2087 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks)); 2088 2089 mFileMetaData->setCString(kKeyCDTrackNumber, tmp); 2090 } 2091 break; 2092 } 2093 case FOURCC('d', 'i', 's', 'k'): 2094 { 2095 if ((size == 14 || size == 16) && flags == 0) { 2096 char tmp[16]; 2097 uint16_t* pDisc = (uint16_t*)&buffer[10]; 2098 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12]; 2099 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs)); 2100 2101 mFileMetaData->setCString(kKeyDiscNumber, tmp); 2102 } 2103 break; 2104 } 2105 case FOURCC('-', '-', '-', '-'): 2106 { 2107 buffer[size] = '\0'; 2108 switch (mPath[5]) { 2109 case FOURCC('m', 'e', 'a', 'n'): 2110 mLastCommentMean.setTo((const char *)buffer + 4); 2111 break; 2112 case FOURCC('n', 'a', 'm', 'e'): 2113 mLastCommentName.setTo((const char *)buffer + 4); 2114 break; 2115 case FOURCC('d', 'a', 't', 'a'): 2116 mLastCommentData.setTo((const char *)buffer + 8); 2117 break; 2118 } 2119 2120 // Once we have a set of mean/name/data info, go ahead and process 2121 // it to see if its something we are interested in. Whether or not 2122 // were are interested in the specific tag, make sure to clear out 2123 // the set so we can be ready to process another tuple should one 2124 // show up later in the file. 2125 if ((mLastCommentMean.length() != 0) && 2126 (mLastCommentName.length() != 0) && 2127 (mLastCommentData.length() != 0)) { 2128 2129 if (mLastCommentMean == "com.apple.iTunes" 2130 && mLastCommentName == "iTunSMPB") { 2131 int32_t delay, padding; 2132 if (sscanf(mLastCommentData, 2133 " %*x %x %x %*x", &delay, &padding) == 2) { 2134 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay); 2135 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding); 2136 } 2137 } 2138 2139 mLastCommentMean.clear(); 2140 mLastCommentName.clear(); 2141 mLastCommentData.clear(); 2142 } 2143 break; 2144 } 2145 2146 default: 2147 break; 2148 } 2149 2150 if (size >= 8 && metadataKey) { 2151 if (metadataKey == kKeyAlbumArt) { 2152 mFileMetaData->setData( 2153 kKeyAlbumArt, MetaData::TYPE_NONE, 2154 buffer + 8, size - 8); 2155 } else if (metadataKey == kKeyGenre) { 2156 if (flags == 0) { 2157 // uint8_t genre code, iTunes genre codes are 2158 // the standard id3 codes, except they start 2159 // at 1 instead of 0 (e.g. Pop is 14, not 13) 2160 // We use standard id3 numbering, so subtract 1. 2161 int genrecode = (int)buffer[size - 1]; 2162 genrecode--; 2163 if (genrecode < 0) { 2164 genrecode = 255; // reserved for 'unknown genre' 2165 } 2166 char genre[10]; 2167 sprintf(genre, "%d", genrecode); 2168 2169 mFileMetaData->setCString(metadataKey, genre); 2170 } else if (flags == 1) { 2171 // custom genre string 2172 buffer[size] = '\0'; 2173 2174 mFileMetaData->setCString( 2175 metadataKey, (const char *)buffer + 8); 2176 } 2177 } else { 2178 buffer[size] = '\0'; 2179 2180 mFileMetaData->setCString( 2181 metadataKey, (const char *)buffer + 8); 2182 } 2183 } 2184 2185 delete[] buffer; 2186 buffer = NULL; 2187 2188 return OK; 2189} 2190 2191sp<MediaSource> MPEG4Extractor::getTrack(size_t index) { 2192 status_t err; 2193 if ((err = readMetaData()) != OK) { 2194 return NULL; 2195 } 2196 2197 Track *track = mFirstTrack; 2198 while (index > 0) { 2199 if (track == NULL) { 2200 return NULL; 2201 } 2202 2203 track = track->next; 2204 --index; 2205 } 2206 2207 if (track == NULL) { 2208 return NULL; 2209 } 2210 2211 ALOGV("getTrack called, pssh: %d", mPssh.size()); 2212 2213 return new MPEG4Source( 2214 track->meta, mDataSource, track->timescale, track->sampleTable, 2215 mSidxEntries, mMoofOffset); 2216} 2217 2218// static 2219status_t MPEG4Extractor::verifyTrack(Track *track) { 2220 const char *mime; 2221 CHECK(track->meta->findCString(kKeyMIMEType, &mime)); 2222 2223 uint32_t type; 2224 const void *data; 2225 size_t size; 2226 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) { 2227 if (!track->meta->findData(kKeyAVCC, &type, &data, &size) 2228 || type != kTypeAVCC) { 2229 return ERROR_MALFORMED; 2230 } 2231 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) 2232 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) { 2233 if (!track->meta->findData(kKeyESDS, &type, &data, &size) 2234 || type != kTypeESDS) { 2235 return ERROR_MALFORMED; 2236 } 2237 } 2238 2239 if (!track->sampleTable->isValid()) { 2240 // Make sure we have all the metadata we need. 2241 return ERROR_MALFORMED; 2242 } 2243 2244 return OK; 2245} 2246 2247status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( 2248 const void *esds_data, size_t esds_size) { 2249 ESDS esds(esds_data, esds_size); 2250 2251 uint8_t objectTypeIndication; 2252 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) { 2253 return ERROR_MALFORMED; 2254 } 2255 2256 if (objectTypeIndication == 0xe1) { 2257 // This isn't MPEG4 audio at all, it's QCELP 14k... 2258 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP); 2259 return OK; 2260 } 2261 2262 if (objectTypeIndication == 0x6b) { 2263 // The media subtype is MP3 audio 2264 // Our software MP3 audio decoder may not be able to handle 2265 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED 2266 ALOGE("MP3 track in MP4/3GPP file is not supported"); 2267 return ERROR_UNSUPPORTED; 2268 } 2269 2270 const uint8_t *csd; 2271 size_t csd_size; 2272 if (esds.getCodecSpecificInfo( 2273 (const void **)&csd, &csd_size) != OK) { 2274 return ERROR_MALFORMED; 2275 } 2276 2277#if 0 2278 printf("ESD of size %d\n", csd_size); 2279 hexdump(csd, csd_size); 2280#endif 2281 2282 if (csd_size == 0) { 2283 // There's no further information, i.e. no codec specific data 2284 // Let's assume that the information provided in the mpeg4 headers 2285 // is accurate and hope for the best. 2286 2287 return OK; 2288 } 2289 2290 if (csd_size < 2) { 2291 return ERROR_MALFORMED; 2292 } 2293 2294 static uint32_t kSamplingRate[] = { 2295 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 2296 16000, 12000, 11025, 8000, 7350 2297 }; 2298 2299 ABitReader br(csd, csd_size); 2300 uint32_t objectType = br.getBits(5); 2301 2302 if (objectType == 31) { // AAC-ELD => additional 6 bits 2303 objectType = 32 + br.getBits(6); 2304 } 2305 2306 //keep AOT type 2307 mLastTrack->meta->setInt32(kKeyAACAOT, objectType); 2308 2309 uint32_t freqIndex = br.getBits(4); 2310 2311 int32_t sampleRate = 0; 2312 int32_t numChannels = 0; 2313 if (freqIndex == 15) { 2314 if (csd_size < 5) { 2315 return ERROR_MALFORMED; 2316 } 2317 sampleRate = br.getBits(24); 2318 numChannels = br.getBits(4); 2319 } else { 2320 numChannels = br.getBits(4); 2321 2322 if (freqIndex == 13 || freqIndex == 14) { 2323 return ERROR_MALFORMED; 2324 } 2325 2326 sampleRate = kSamplingRate[freqIndex]; 2327 } 2328 2329 if (objectType == 5 || objectType == 29) { // SBR specific config per 14496-3 table 1.13 2330 uint32_t extFreqIndex = br.getBits(4); 2331 int32_t extSampleRate; 2332 if (extFreqIndex == 15) { 2333 if (csd_size < 8) { 2334 return ERROR_MALFORMED; 2335 } 2336 extSampleRate = br.getBits(24); 2337 } else { 2338 if (extFreqIndex == 13 || extFreqIndex == 14) { 2339 return ERROR_MALFORMED; 2340 } 2341 extSampleRate = kSamplingRate[extFreqIndex]; 2342 } 2343 //TODO: save the extension sampling rate value in meta data => 2344 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); 2345 } 2346 2347 if (numChannels == 0) { 2348 return ERROR_UNSUPPORTED; 2349 } 2350 2351 int32_t prevSampleRate; 2352 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate)); 2353 2354 if (prevSampleRate != sampleRate) { 2355 ALOGV("mpeg4 audio sample rate different from previous setting. " 2356 "was: %d, now: %d", prevSampleRate, sampleRate); 2357 } 2358 2359 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate); 2360 2361 int32_t prevChannelCount; 2362 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount)); 2363 2364 if (prevChannelCount != numChannels) { 2365 ALOGV("mpeg4 audio channel count different from previous setting. " 2366 "was: %d, now: %d", prevChannelCount, numChannels); 2367 } 2368 2369 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels); 2370 2371 return OK; 2372} 2373 2374//////////////////////////////////////////////////////////////////////////////// 2375 2376MPEG4Source::MPEG4Source( 2377 const sp<MetaData> &format, 2378 const sp<DataSource> &dataSource, 2379 int32_t timeScale, 2380 const sp<SampleTable> &sampleTable, 2381 Vector<SidxEntry> &sidx, 2382 off64_t firstMoofOffset) 2383 : mFormat(format), 2384 mDataSource(dataSource), 2385 mTimescale(timeScale), 2386 mSampleTable(sampleTable), 2387 mCurrentSampleIndex(0), 2388 mCurrentFragmentIndex(0), 2389 mSegments(sidx), 2390 mFirstMoofOffset(firstMoofOffset), 2391 mCurrentMoofOffset(firstMoofOffset), 2392 mCurrentTime(0), 2393 mCurrentSampleInfoAllocSize(0), 2394 mCurrentSampleInfoSizes(NULL), 2395 mCurrentSampleInfoOffsetsAllocSize(0), 2396 mCurrentSampleInfoOffsets(NULL), 2397 mIsAVC(false), 2398 mNALLengthSize(0), 2399 mStarted(false), 2400 mGroup(NULL), 2401 mBuffer(NULL), 2402 mWantsNALFragments(false), 2403 mSrcBuffer(NULL) { 2404 2405 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode); 2406 mDefaultIVSize = 0; 2407 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize); 2408 uint32_t keytype; 2409 const void *key; 2410 size_t keysize; 2411 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) { 2412 CHECK(keysize <= 16); 2413 memset(mCryptoKey, 0, 16); 2414 memcpy(mCryptoKey, key, keysize); 2415 } 2416 2417 const char *mime; 2418 bool success = mFormat->findCString(kKeyMIMEType, &mime); 2419 CHECK(success); 2420 2421 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC); 2422 2423 if (mIsAVC) { 2424 uint32_t type; 2425 const void *data; 2426 size_t size; 2427 CHECK(format->findData(kKeyAVCC, &type, &data, &size)); 2428 2429 const uint8_t *ptr = (const uint8_t *)data; 2430 2431 CHECK(size >= 7); 2432 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1 2433 2434 // The number of bytes used to encode the length of a NAL unit. 2435 mNALLengthSize = 1 + (ptr[4] & 3); 2436 } 2437 2438 CHECK(format->findInt32(kKeyTrackID, &mTrackId)); 2439 2440 if (mFirstMoofOffset != 0) { 2441 off64_t offset = mFirstMoofOffset; 2442 parseChunk(&offset); 2443 } 2444} 2445 2446MPEG4Source::~MPEG4Source() { 2447 if (mStarted) { 2448 stop(); 2449 } 2450 free(mCurrentSampleInfoSizes); 2451 free(mCurrentSampleInfoOffsets); 2452} 2453 2454status_t MPEG4Source::start(MetaData *params) { 2455 Mutex::Autolock autoLock(mLock); 2456 2457 CHECK(!mStarted); 2458 2459 int32_t val; 2460 if (params && params->findInt32(kKeyWantsNALFragments, &val) 2461 && val != 0) { 2462 mWantsNALFragments = true; 2463 } else { 2464 mWantsNALFragments = false; 2465 } 2466 2467 mGroup = new MediaBufferGroup; 2468 2469 int32_t max_size; 2470 CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); 2471 2472 mGroup->add_buffer(new MediaBuffer(max_size)); 2473 2474 mSrcBuffer = new uint8_t[max_size]; 2475 2476 mStarted = true; 2477 2478 return OK; 2479} 2480 2481status_t MPEG4Source::stop() { 2482 Mutex::Autolock autoLock(mLock); 2483 2484 CHECK(mStarted); 2485 2486 if (mBuffer != NULL) { 2487 mBuffer->release(); 2488 mBuffer = NULL; 2489 } 2490 2491 delete[] mSrcBuffer; 2492 mSrcBuffer = NULL; 2493 2494 delete mGroup; 2495 mGroup = NULL; 2496 2497 mStarted = false; 2498 mCurrentSampleIndex = 0; 2499 2500 return OK; 2501} 2502 2503status_t MPEG4Source::parseChunk(off64_t *offset) { 2504 uint32_t hdr[2]; 2505 if (mDataSource->readAt(*offset, hdr, 8) < 8) { 2506 return ERROR_IO; 2507 } 2508 uint64_t chunk_size = ntohl(hdr[0]); 2509 uint32_t chunk_type = ntohl(hdr[1]); 2510 off64_t data_offset = *offset + 8; 2511 2512 if (chunk_size == 1) { 2513 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) { 2514 return ERROR_IO; 2515 } 2516 chunk_size = ntoh64(chunk_size); 2517 data_offset += 8; 2518 2519 if (chunk_size < 16) { 2520 // The smallest valid chunk is 16 bytes long in this case. 2521 return ERROR_MALFORMED; 2522 } 2523 } else if (chunk_size < 8) { 2524 // The smallest valid chunk is 8 bytes long. 2525 return ERROR_MALFORMED; 2526 } 2527 2528 char chunk[5]; 2529 MakeFourCCString(chunk_type, chunk); 2530 ALOGV("MPEG4Source chunk %s @ %llx", chunk, *offset); 2531 2532 off64_t chunk_data_size = *offset + chunk_size - data_offset; 2533 2534 switch(chunk_type) { 2535 2536 case FOURCC('t', 'r', 'a', 'f'): 2537 case FOURCC('m', 'o', 'o', 'f'): { 2538 off64_t stop_offset = *offset + chunk_size; 2539 *offset = data_offset; 2540 while (*offset < stop_offset) { 2541 status_t err = parseChunk(offset); 2542 if (err != OK) { 2543 return err; 2544 } 2545 } 2546 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) { 2547 // *offset points to the mdat box following this moof 2548 parseChunk(offset); // doesn't actually parse it, just updates offset 2549 mNextMoofOffset = *offset; 2550 } 2551 break; 2552 } 2553 2554 case FOURCC('t', 'f', 'h', 'd'): { 2555 status_t err; 2556 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) { 2557 return err; 2558 } 2559 *offset += chunk_size; 2560 break; 2561 } 2562 2563 case FOURCC('t', 'r', 'u', 'n'): { 2564 status_t err; 2565 if (mLastParsedTrackId == mTrackId) { 2566 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) { 2567 return err; 2568 } 2569 } 2570 2571 *offset += chunk_size; 2572 break; 2573 } 2574 2575 case FOURCC('s', 'a', 'i', 'z'): { 2576 status_t err; 2577 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) { 2578 return err; 2579 } 2580 *offset += chunk_size; 2581 break; 2582 } 2583 case FOURCC('s', 'a', 'i', 'o'): { 2584 status_t err; 2585 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) { 2586 return err; 2587 } 2588 *offset += chunk_size; 2589 break; 2590 } 2591 2592 case FOURCC('m', 'd', 'a', 't'): { 2593 // parse DRM info if present 2594 ALOGV("MPEG4Source::parseChunk mdat"); 2595 // if saiz/saoi was previously observed, do something with the sampleinfos 2596 *offset += chunk_size; 2597 break; 2598 } 2599 2600 default: { 2601 *offset += chunk_size; 2602 break; 2603 } 2604 } 2605 return OK; 2606} 2607 2608status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size) { 2609 ALOGV("parseSampleAuxiliaryInformationSizes"); 2610 // 14496-12 8.7.12 2611 uint8_t version; 2612 if (mDataSource->readAt( 2613 offset, &version, sizeof(version)) 2614 < (ssize_t)sizeof(version)) { 2615 return ERROR_IO; 2616 } 2617 2618 if (version != 0) { 2619 return ERROR_UNSUPPORTED; 2620 } 2621 offset++; 2622 2623 uint32_t flags; 2624 if (!mDataSource->getUInt24(offset, &flags)) { 2625 return ERROR_IO; 2626 } 2627 offset += 3; 2628 2629 if (flags & 1) { 2630 uint32_t tmp; 2631 if (!mDataSource->getUInt32(offset, &tmp)) { 2632 return ERROR_MALFORMED; 2633 } 2634 mCurrentAuxInfoType = tmp; 2635 offset += 4; 2636 if (!mDataSource->getUInt32(offset, &tmp)) { 2637 return ERROR_MALFORMED; 2638 } 2639 mCurrentAuxInfoTypeParameter = tmp; 2640 offset += 4; 2641 } 2642 2643 uint8_t defsize; 2644 if (mDataSource->readAt(offset, &defsize, 1) != 1) { 2645 return ERROR_MALFORMED; 2646 } 2647 mCurrentDefaultSampleInfoSize = defsize; 2648 offset++; 2649 2650 uint32_t smplcnt; 2651 if (!mDataSource->getUInt32(offset, &smplcnt)) { 2652 return ERROR_MALFORMED; 2653 } 2654 mCurrentSampleInfoCount = smplcnt; 2655 offset += 4; 2656 2657 if (mCurrentDefaultSampleInfoSize != 0) { 2658 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize); 2659 return OK; 2660 } 2661 if (smplcnt > mCurrentSampleInfoAllocSize) { 2662 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt); 2663 mCurrentSampleInfoAllocSize = smplcnt; 2664 } 2665 2666 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt); 2667 return OK; 2668} 2669 2670status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size) { 2671 ALOGV("parseSampleAuxiliaryInformationOffsets"); 2672 // 14496-12 8.7.13 2673 uint8_t version; 2674 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) { 2675 return ERROR_IO; 2676 } 2677 offset++; 2678 2679 uint32_t flags; 2680 if (!mDataSource->getUInt24(offset, &flags)) { 2681 return ERROR_IO; 2682 } 2683 offset += 3; 2684 2685 uint32_t entrycount; 2686 if (!mDataSource->getUInt32(offset, &entrycount)) { 2687 return ERROR_IO; 2688 } 2689 offset += 4; 2690 2691 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) { 2692 mCurrentSampleInfoOffsets = (uint64_t*) realloc(mCurrentSampleInfoOffsets, entrycount * 8); 2693 mCurrentSampleInfoOffsetsAllocSize = entrycount; 2694 } 2695 mCurrentSampleInfoOffsetCount = entrycount; 2696 2697 for (size_t i = 0; i < entrycount; i++) { 2698 if (version == 0) { 2699 uint32_t tmp; 2700 if (!mDataSource->getUInt32(offset, &tmp)) { 2701 return ERROR_IO; 2702 } 2703 mCurrentSampleInfoOffsets[i] = tmp; 2704 offset += 4; 2705 } else { 2706 uint64_t tmp; 2707 if (!mDataSource->getUInt64(offset, &tmp)) { 2708 return ERROR_IO; 2709 } 2710 mCurrentSampleInfoOffsets[i] = tmp; 2711 offset += 8; 2712 } 2713 } 2714 2715 // parse clear/encrypted data 2716 2717 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof 2718 2719 drmoffset += mCurrentMoofOffset; 2720 int ivlength; 2721 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength)); 2722 2723 // read CencSampleAuxiliaryDataFormats 2724 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) { 2725 Sample *smpl = &mCurrentSamples.editItemAt(i); 2726 2727 memset(smpl->iv, 0, 16); 2728 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) { 2729 return ERROR_IO; 2730 } 2731 2732 drmoffset += ivlength; 2733 2734 int32_t smplinfosize = mCurrentDefaultSampleInfoSize; 2735 if (smplinfosize == 0) { 2736 smplinfosize = mCurrentSampleInfoSizes[i]; 2737 } 2738 if (smplinfosize > ivlength) { 2739 uint16_t numsubsamples; 2740 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) { 2741 return ERROR_IO; 2742 } 2743 drmoffset += 2; 2744 for (size_t j = 0; j < numsubsamples; j++) { 2745 uint16_t numclear; 2746 uint32_t numencrypted; 2747 if (!mDataSource->getUInt16(drmoffset, &numclear)) { 2748 return ERROR_IO; 2749 } 2750 drmoffset += 2; 2751 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) { 2752 return ERROR_IO; 2753 } 2754 drmoffset += 4; 2755 smpl->clearsizes.add(numclear); 2756 smpl->encryptedsizes.add(numencrypted); 2757 } 2758 } else { 2759 smpl->clearsizes.add(0); 2760 smpl->encryptedsizes.add(smpl->size); 2761 } 2762 } 2763 2764 2765 return OK; 2766} 2767 2768status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) { 2769 2770 if (size < 8) { 2771 return -EINVAL; 2772 } 2773 2774 uint32_t flags; 2775 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags 2776 return ERROR_MALFORMED; 2777 } 2778 2779 if (flags & 0xff000000) { 2780 return -EINVAL; 2781 } 2782 2783 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) { 2784 return ERROR_MALFORMED; 2785 } 2786 2787 if (mLastParsedTrackId != mTrackId) { 2788 // this is not the right track, skip it 2789 return OK; 2790 } 2791 2792 mTrackFragmentHeaderInfo.mFlags = flags; 2793 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId; 2794 offset += 8; 2795 size -= 8; 2796 2797 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID); 2798 2799 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) { 2800 if (size < 8) { 2801 return -EINVAL; 2802 } 2803 2804 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) { 2805 return ERROR_MALFORMED; 2806 } 2807 offset += 8; 2808 size -= 8; 2809 } 2810 2811 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) { 2812 if (size < 4) { 2813 return -EINVAL; 2814 } 2815 2816 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) { 2817 return ERROR_MALFORMED; 2818 } 2819 offset += 4; 2820 size -= 4; 2821 } 2822 2823 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2824 if (size < 4) { 2825 return -EINVAL; 2826 } 2827 2828 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) { 2829 return ERROR_MALFORMED; 2830 } 2831 offset += 4; 2832 size -= 4; 2833 } 2834 2835 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2836 if (size < 4) { 2837 return -EINVAL; 2838 } 2839 2840 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) { 2841 return ERROR_MALFORMED; 2842 } 2843 offset += 4; 2844 size -= 4; 2845 } 2846 2847 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2848 if (size < 4) { 2849 return -EINVAL; 2850 } 2851 2852 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) { 2853 return ERROR_MALFORMED; 2854 } 2855 offset += 4; 2856 size -= 4; 2857 } 2858 2859 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) { 2860 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset; 2861 } 2862 2863 mTrackFragmentHeaderInfo.mDataOffset = 0; 2864 return OK; 2865} 2866 2867status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) { 2868 2869 ALOGV("MPEG4Extractor::parseTrackFragmentRun"); 2870 if (size < 8) { 2871 return -EINVAL; 2872 } 2873 2874 enum { 2875 kDataOffsetPresent = 0x01, 2876 kFirstSampleFlagsPresent = 0x04, 2877 kSampleDurationPresent = 0x100, 2878 kSampleSizePresent = 0x200, 2879 kSampleFlagsPresent = 0x400, 2880 kSampleCompositionTimeOffsetPresent = 0x800, 2881 }; 2882 2883 uint32_t flags; 2884 if (!mDataSource->getUInt32(offset, &flags)) { 2885 return ERROR_MALFORMED; 2886 } 2887 ALOGV("fragment run flags: %08x", flags); 2888 2889 if (flags & 0xff000000) { 2890 return -EINVAL; 2891 } 2892 2893 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) { 2894 // These two shall not be used together. 2895 return -EINVAL; 2896 } 2897 2898 uint32_t sampleCount; 2899 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) { 2900 return ERROR_MALFORMED; 2901 } 2902 offset += 8; 2903 size -= 8; 2904 2905 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset; 2906 2907 uint32_t firstSampleFlags = 0; 2908 2909 if (flags & kDataOffsetPresent) { 2910 if (size < 4) { 2911 return -EINVAL; 2912 } 2913 2914 int32_t dataOffsetDelta; 2915 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) { 2916 return ERROR_MALFORMED; 2917 } 2918 2919 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta; 2920 2921 offset += 4; 2922 size -= 4; 2923 } 2924 2925 if (flags & kFirstSampleFlagsPresent) { 2926 if (size < 4) { 2927 return -EINVAL; 2928 } 2929 2930 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) { 2931 return ERROR_MALFORMED; 2932 } 2933 offset += 4; 2934 size -= 4; 2935 } 2936 2937 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0, 2938 sampleCtsOffset = 0; 2939 2940 size_t bytesPerSample = 0; 2941 if (flags & kSampleDurationPresent) { 2942 bytesPerSample += 4; 2943 } else if (mTrackFragmentHeaderInfo.mFlags 2944 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) { 2945 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2946 } else { 2947 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration; 2948 } 2949 2950 if (flags & kSampleSizePresent) { 2951 bytesPerSample += 4; 2952 } else if (mTrackFragmentHeaderInfo.mFlags 2953 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) { 2954 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2955 } else { 2956 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize; 2957 } 2958 2959 if (flags & kSampleFlagsPresent) { 2960 bytesPerSample += 4; 2961 } else if (mTrackFragmentHeaderInfo.mFlags 2962 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) { 2963 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2964 } else { 2965 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags; 2966 } 2967 2968 if (flags & kSampleCompositionTimeOffsetPresent) { 2969 bytesPerSample += 4; 2970 } else { 2971 sampleCtsOffset = 0; 2972 } 2973 2974 if (size < sampleCount * bytesPerSample) { 2975 return -EINVAL; 2976 } 2977 2978 Sample tmp; 2979 for (uint32_t i = 0; i < sampleCount; ++i) { 2980 if (flags & kSampleDurationPresent) { 2981 if (!mDataSource->getUInt32(offset, &sampleDuration)) { 2982 return ERROR_MALFORMED; 2983 } 2984 offset += 4; 2985 } 2986 2987 if (flags & kSampleSizePresent) { 2988 if (!mDataSource->getUInt32(offset, &sampleSize)) { 2989 return ERROR_MALFORMED; 2990 } 2991 offset += 4; 2992 } 2993 2994 if (flags & kSampleFlagsPresent) { 2995 if (!mDataSource->getUInt32(offset, &sampleFlags)) { 2996 return ERROR_MALFORMED; 2997 } 2998 offset += 4; 2999 } 3000 3001 if (flags & kSampleCompositionTimeOffsetPresent) { 3002 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) { 3003 return ERROR_MALFORMED; 3004 } 3005 offset += 4; 3006 } 3007 3008 ALOGV("adding sample %d at offset 0x%08llx, size %u, duration %u, " 3009 " flags 0x%08x", i + 1, 3010 dataOffset, sampleSize, sampleDuration, 3011 (flags & kFirstSampleFlagsPresent) && i == 0 3012 ? firstSampleFlags : sampleFlags); 3013 tmp.offset = dataOffset; 3014 tmp.size = sampleSize; 3015 tmp.duration = sampleDuration; 3016 mCurrentSamples.add(tmp); 3017 3018 dataOffset += sampleSize; 3019 } 3020 3021 mTrackFragmentHeaderInfo.mDataOffset = dataOffset; 3022 3023 return OK; 3024} 3025 3026sp<MetaData> MPEG4Source::getFormat() { 3027 Mutex::Autolock autoLock(mLock); 3028 3029 return mFormat; 3030} 3031 3032size_t MPEG4Source::parseNALSize(const uint8_t *data) const { 3033 switch (mNALLengthSize) { 3034 case 1: 3035 return *data; 3036 case 2: 3037 return U16_AT(data); 3038 case 3: 3039 return ((size_t)data[0] << 16) | U16_AT(&data[1]); 3040 case 4: 3041 return U32_AT(data); 3042 } 3043 3044 // This cannot happen, mNALLengthSize springs to life by adding 1 to 3045 // a 2-bit integer. 3046 CHECK(!"Should not be here."); 3047 3048 return 0; 3049} 3050 3051status_t MPEG4Source::read( 3052 MediaBuffer **out, const ReadOptions *options) { 3053 Mutex::Autolock autoLock(mLock); 3054 3055 CHECK(mStarted); 3056 3057 if (mFirstMoofOffset > 0) { 3058 return fragmentedRead(out, options); 3059 } 3060 3061 *out = NULL; 3062 3063 int64_t targetSampleTimeUs = -1; 3064 3065 int64_t seekTimeUs; 3066 ReadOptions::SeekMode mode; 3067 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3068 uint32_t findFlags = 0; 3069 switch (mode) { 3070 case ReadOptions::SEEK_PREVIOUS_SYNC: 3071 findFlags = SampleTable::kFlagBefore; 3072 break; 3073 case ReadOptions::SEEK_NEXT_SYNC: 3074 findFlags = SampleTable::kFlagAfter; 3075 break; 3076 case ReadOptions::SEEK_CLOSEST_SYNC: 3077 case ReadOptions::SEEK_CLOSEST: 3078 findFlags = SampleTable::kFlagClosest; 3079 break; 3080 default: 3081 CHECK(!"Should not be here."); 3082 break; 3083 } 3084 3085 uint32_t sampleIndex; 3086 status_t err = mSampleTable->findSampleAtTime( 3087 seekTimeUs * mTimescale / 1000000, 3088 &sampleIndex, findFlags); 3089 3090 if (mode == ReadOptions::SEEK_CLOSEST) { 3091 // We found the closest sample already, now we want the sync 3092 // sample preceding it (or the sample itself of course), even 3093 // if the subsequent sync sample is closer. 3094 findFlags = SampleTable::kFlagBefore; 3095 } 3096 3097 uint32_t syncSampleIndex; 3098 if (err == OK) { 3099 err = mSampleTable->findSyncSampleNear( 3100 sampleIndex, &syncSampleIndex, findFlags); 3101 } 3102 3103 uint32_t sampleTime; 3104 if (err == OK) { 3105 err = mSampleTable->getMetaDataForSample( 3106 sampleIndex, NULL, NULL, &sampleTime); 3107 } 3108 3109 if (err != OK) { 3110 if (err == ERROR_OUT_OF_RANGE) { 3111 // An attempt to seek past the end of the stream would 3112 // normally cause this ERROR_OUT_OF_RANGE error. Propagating 3113 // this all the way to the MediaPlayer would cause abnormal 3114 // termination. Legacy behaviour appears to be to behave as if 3115 // we had seeked to the end of stream, ending normally. 3116 err = ERROR_END_OF_STREAM; 3117 } 3118 ALOGV("end of stream"); 3119 return err; 3120 } 3121 3122 if (mode == ReadOptions::SEEK_CLOSEST) { 3123 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale; 3124 } 3125 3126#if 0 3127 uint32_t syncSampleTime; 3128 CHECK_EQ(OK, mSampleTable->getMetaDataForSample( 3129 syncSampleIndex, NULL, NULL, &syncSampleTime)); 3130 3131 ALOGI("seek to time %lld us => sample at time %lld us, " 3132 "sync sample at time %lld us", 3133 seekTimeUs, 3134 sampleTime * 1000000ll / mTimescale, 3135 syncSampleTime * 1000000ll / mTimescale); 3136#endif 3137 3138 mCurrentSampleIndex = syncSampleIndex; 3139 if (mBuffer != NULL) { 3140 mBuffer->release(); 3141 mBuffer = NULL; 3142 } 3143 3144 // fall through 3145 } 3146 3147 off64_t offset; 3148 size_t size; 3149 uint32_t cts; 3150 bool isSyncSample; 3151 bool newBuffer = false; 3152 if (mBuffer == NULL) { 3153 newBuffer = true; 3154 3155 status_t err = 3156 mSampleTable->getMetaDataForSample( 3157 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample); 3158 3159 if (err != OK) { 3160 return err; 3161 } 3162 3163 err = mGroup->acquire_buffer(&mBuffer); 3164 3165 if (err != OK) { 3166 CHECK(mBuffer == NULL); 3167 return err; 3168 } 3169 } 3170 3171 if (!mIsAVC || mWantsNALFragments) { 3172 if (newBuffer) { 3173 ssize_t num_bytes_read = 3174 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3175 3176 if (num_bytes_read < (ssize_t)size) { 3177 mBuffer->release(); 3178 mBuffer = NULL; 3179 3180 return ERROR_IO; 3181 } 3182 3183 CHECK(mBuffer != NULL); 3184 mBuffer->set_range(0, size); 3185 mBuffer->meta_data()->clear(); 3186 mBuffer->meta_data()->setInt64( 3187 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3188 3189 if (targetSampleTimeUs >= 0) { 3190 mBuffer->meta_data()->setInt64( 3191 kKeyTargetTime, targetSampleTimeUs); 3192 } 3193 3194 if (isSyncSample) { 3195 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3196 } 3197 3198 ++mCurrentSampleIndex; 3199 } 3200 3201 if (!mIsAVC) { 3202 *out = mBuffer; 3203 mBuffer = NULL; 3204 3205 return OK; 3206 } 3207 3208 // Each NAL unit is split up into its constituent fragments and 3209 // each one of them returned in its own buffer. 3210 3211 CHECK(mBuffer->range_length() >= mNALLengthSize); 3212 3213 const uint8_t *src = 3214 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3215 3216 size_t nal_size = parseNALSize(src); 3217 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3218 ALOGE("incomplete NAL unit."); 3219 3220 mBuffer->release(); 3221 mBuffer = NULL; 3222 3223 return ERROR_MALFORMED; 3224 } 3225 3226 MediaBuffer *clone = mBuffer->clone(); 3227 CHECK(clone != NULL); 3228 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3229 3230 CHECK(mBuffer != NULL); 3231 mBuffer->set_range( 3232 mBuffer->range_offset() + mNALLengthSize + nal_size, 3233 mBuffer->range_length() - mNALLengthSize - nal_size); 3234 3235 if (mBuffer->range_length() == 0) { 3236 mBuffer->release(); 3237 mBuffer = NULL; 3238 } 3239 3240 *out = clone; 3241 3242 return OK; 3243 } else { 3244 // Whole NAL units are returned but each fragment is prefixed by 3245 // the start code (0x00 00 00 01). 3246 ssize_t num_bytes_read = 0; 3247 int32_t drm = 0; 3248 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3249 if (usesDRM) { 3250 num_bytes_read = 3251 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3252 } else { 3253 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3254 } 3255 3256 if (num_bytes_read < (ssize_t)size) { 3257 mBuffer->release(); 3258 mBuffer = NULL; 3259 3260 return ERROR_IO; 3261 } 3262 3263 if (usesDRM) { 3264 CHECK(mBuffer != NULL); 3265 mBuffer->set_range(0, size); 3266 3267 } else { 3268 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3269 size_t srcOffset = 0; 3270 size_t dstOffset = 0; 3271 3272 while (srcOffset < size) { 3273 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3274 size_t nalLength = 0; 3275 if (!isMalFormed) { 3276 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3277 srcOffset += mNALLengthSize; 3278 isMalFormed = srcOffset + nalLength > size; 3279 } 3280 3281 if (isMalFormed) { 3282 ALOGE("Video is malformed"); 3283 mBuffer->release(); 3284 mBuffer = NULL; 3285 return ERROR_MALFORMED; 3286 } 3287 3288 if (nalLength == 0) { 3289 continue; 3290 } 3291 3292 CHECK(dstOffset + 4 <= mBuffer->size()); 3293 3294 dstData[dstOffset++] = 0; 3295 dstData[dstOffset++] = 0; 3296 dstData[dstOffset++] = 0; 3297 dstData[dstOffset++] = 1; 3298 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3299 srcOffset += nalLength; 3300 dstOffset += nalLength; 3301 } 3302 CHECK_EQ(srcOffset, size); 3303 CHECK(mBuffer != NULL); 3304 mBuffer->set_range(0, dstOffset); 3305 } 3306 3307 mBuffer->meta_data()->clear(); 3308 mBuffer->meta_data()->setInt64( 3309 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3310 3311 if (targetSampleTimeUs >= 0) { 3312 mBuffer->meta_data()->setInt64( 3313 kKeyTargetTime, targetSampleTimeUs); 3314 } 3315 3316 if (isSyncSample) { 3317 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3318 } 3319 3320 ++mCurrentSampleIndex; 3321 3322 *out = mBuffer; 3323 mBuffer = NULL; 3324 3325 return OK; 3326 } 3327} 3328 3329status_t MPEG4Source::fragmentedRead( 3330 MediaBuffer **out, const ReadOptions *options) { 3331 3332 ALOGV("MPEG4Source::fragmentedRead"); 3333 3334 CHECK(mStarted); 3335 3336 *out = NULL; 3337 3338 int64_t targetSampleTimeUs = -1; 3339 3340 int64_t seekTimeUs; 3341 ReadOptions::SeekMode mode; 3342 if (options && options->getSeekTo(&seekTimeUs, &mode)) { 3343 3344 int numSidxEntries = mSegments.size(); 3345 if (numSidxEntries != 0) { 3346 int64_t totalTime = 0; 3347 off64_t totalOffset = mFirstMoofOffset; 3348 for (int i = 0; i < numSidxEntries; i++) { 3349 const SidxEntry *se = &mSegments[i]; 3350 if (totalTime + se->mDurationUs > seekTimeUs) { 3351 // The requested time is somewhere in this segment 3352 if ((mode == ReadOptions::SEEK_NEXT_SYNC) || 3353 (mode == ReadOptions::SEEK_CLOSEST_SYNC && 3354 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) { 3355 // requested next sync, or closest sync and it was closer to the end of 3356 // this segment 3357 totalTime += se->mDurationUs; 3358 totalOffset += se->mSize; 3359 } 3360 break; 3361 } 3362 totalTime += se->mDurationUs; 3363 totalOffset += se->mSize; 3364 } 3365 mCurrentMoofOffset = totalOffset; 3366 mCurrentSamples.clear(); 3367 mCurrentSampleIndex = 0; 3368 parseChunk(&totalOffset); 3369 mCurrentTime = totalTime * mTimescale / 1000000ll; 3370 } 3371 3372 if (mBuffer != NULL) { 3373 mBuffer->release(); 3374 mBuffer = NULL; 3375 } 3376 3377 // fall through 3378 } 3379 3380 off64_t offset = 0; 3381 size_t size; 3382 uint32_t cts = 0; 3383 bool isSyncSample = false; 3384 bool newBuffer = false; 3385 if (mBuffer == NULL) { 3386 newBuffer = true; 3387 3388 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3389 // move to next fragment 3390 Sample lastSample = mCurrentSamples[mCurrentSamples.size() - 1]; 3391 off64_t nextMoof = mNextMoofOffset; // lastSample.offset + lastSample.size; 3392 mCurrentMoofOffset = nextMoof; 3393 mCurrentSamples.clear(); 3394 mCurrentSampleIndex = 0; 3395 parseChunk(&nextMoof); 3396 if (mCurrentSampleIndex >= mCurrentSamples.size()) { 3397 return ERROR_END_OF_STREAM; 3398 } 3399 } 3400 3401 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3402 offset = smpl->offset; 3403 size = smpl->size; 3404 cts = mCurrentTime; 3405 mCurrentTime += smpl->duration; 3406 isSyncSample = (mCurrentSampleIndex == 0); // XXX 3407 3408 status_t err = mGroup->acquire_buffer(&mBuffer); 3409 3410 if (err != OK) { 3411 CHECK(mBuffer == NULL); 3412 ALOGV("acquire_buffer returned %d", err); 3413 return err; 3414 } 3415 } 3416 3417 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex]; 3418 const sp<MetaData> bufmeta = mBuffer->meta_data(); 3419 bufmeta->clear(); 3420 if (smpl->encryptedsizes.size()) { 3421 // store clear/encrypted lengths in metadata 3422 bufmeta->setData(kKeyPlainSizes, 0, 3423 smpl->clearsizes.array(), smpl->clearsizes.size() * 4); 3424 bufmeta->setData(kKeyEncryptedSizes, 0, 3425 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4); 3426 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size? 3427 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize); 3428 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode); 3429 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16); 3430 } 3431 3432 if (!mIsAVC || mWantsNALFragments) { 3433 if (newBuffer) { 3434 ssize_t num_bytes_read = 3435 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size); 3436 3437 if (num_bytes_read < (ssize_t)size) { 3438 mBuffer->release(); 3439 mBuffer = NULL; 3440 3441 ALOGV("i/o error"); 3442 return ERROR_IO; 3443 } 3444 3445 CHECK(mBuffer != NULL); 3446 mBuffer->set_range(0, size); 3447 mBuffer->meta_data()->setInt64( 3448 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3449 3450 if (targetSampleTimeUs >= 0) { 3451 mBuffer->meta_data()->setInt64( 3452 kKeyTargetTime, targetSampleTimeUs); 3453 } 3454 3455 if (isSyncSample) { 3456 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3457 } 3458 3459 ++mCurrentSampleIndex; 3460 } 3461 3462 if (!mIsAVC) { 3463 *out = mBuffer; 3464 mBuffer = NULL; 3465 3466 return OK; 3467 } 3468 3469 // Each NAL unit is split up into its constituent fragments and 3470 // each one of them returned in its own buffer. 3471 3472 CHECK(mBuffer->range_length() >= mNALLengthSize); 3473 3474 const uint8_t *src = 3475 (const uint8_t *)mBuffer->data() + mBuffer->range_offset(); 3476 3477 size_t nal_size = parseNALSize(src); 3478 if (mBuffer->range_length() < mNALLengthSize + nal_size) { 3479 ALOGE("incomplete NAL unit."); 3480 3481 mBuffer->release(); 3482 mBuffer = NULL; 3483 3484 return ERROR_MALFORMED; 3485 } 3486 3487 MediaBuffer *clone = mBuffer->clone(); 3488 CHECK(clone != NULL); 3489 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size); 3490 3491 CHECK(mBuffer != NULL); 3492 mBuffer->set_range( 3493 mBuffer->range_offset() + mNALLengthSize + nal_size, 3494 mBuffer->range_length() - mNALLengthSize - nal_size); 3495 3496 if (mBuffer->range_length() == 0) { 3497 mBuffer->release(); 3498 mBuffer = NULL; 3499 } 3500 3501 *out = clone; 3502 3503 return OK; 3504 } else { 3505 ALOGV("whole NAL"); 3506 // Whole NAL units are returned but each fragment is prefixed by 3507 // the start code (0x00 00 00 01). 3508 ssize_t num_bytes_read = 0; 3509 int32_t drm = 0; 3510 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0); 3511 if (usesDRM) { 3512 num_bytes_read = 3513 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size); 3514 } else { 3515 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size); 3516 } 3517 3518 if (num_bytes_read < (ssize_t)size) { 3519 mBuffer->release(); 3520 mBuffer = NULL; 3521 3522 ALOGV("i/o error"); 3523 return ERROR_IO; 3524 } 3525 3526 if (usesDRM) { 3527 CHECK(mBuffer != NULL); 3528 mBuffer->set_range(0, size); 3529 3530 } else { 3531 uint8_t *dstData = (uint8_t *)mBuffer->data(); 3532 size_t srcOffset = 0; 3533 size_t dstOffset = 0; 3534 3535 while (srcOffset < size) { 3536 bool isMalFormed = (srcOffset + mNALLengthSize > size); 3537 size_t nalLength = 0; 3538 if (!isMalFormed) { 3539 nalLength = parseNALSize(&mSrcBuffer[srcOffset]); 3540 srcOffset += mNALLengthSize; 3541 isMalFormed = srcOffset + nalLength > size; 3542 } 3543 3544 if (isMalFormed) { 3545 ALOGE("Video is malformed"); 3546 mBuffer->release(); 3547 mBuffer = NULL; 3548 return ERROR_MALFORMED; 3549 } 3550 3551 if (nalLength == 0) { 3552 continue; 3553 } 3554 3555 CHECK(dstOffset + 4 <= mBuffer->size()); 3556 3557 dstData[dstOffset++] = 0; 3558 dstData[dstOffset++] = 0; 3559 dstData[dstOffset++] = 0; 3560 dstData[dstOffset++] = 1; 3561 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength); 3562 srcOffset += nalLength; 3563 dstOffset += nalLength; 3564 } 3565 CHECK_EQ(srcOffset, size); 3566 CHECK(mBuffer != NULL); 3567 mBuffer->set_range(0, dstOffset); 3568 } 3569 3570 mBuffer->meta_data()->setInt64( 3571 kKeyTime, ((int64_t)cts * 1000000) / mTimescale); 3572 3573 if (targetSampleTimeUs >= 0) { 3574 mBuffer->meta_data()->setInt64( 3575 kKeyTargetTime, targetSampleTimeUs); 3576 } 3577 3578 if (isSyncSample) { 3579 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1); 3580 } 3581 3582 ++mCurrentSampleIndex; 3583 3584 *out = mBuffer; 3585 mBuffer = NULL; 3586 3587 return OK; 3588 } 3589} 3590 3591MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix( 3592 const char *mimePrefix) { 3593 for (Track *track = mFirstTrack; track != NULL; track = track->next) { 3594 const char *mime; 3595 if (track->meta != NULL 3596 && track->meta->findCString(kKeyMIMEType, &mime) 3597 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) { 3598 return track; 3599 } 3600 } 3601 3602 return NULL; 3603} 3604 3605static bool LegacySniffMPEG4( 3606 const sp<DataSource> &source, String8 *mimeType, float *confidence) { 3607 uint8_t header[8]; 3608 3609 ssize_t n = source->readAt(4, header, sizeof(header)); 3610 if (n < (ssize_t)sizeof(header)) { 3611 return false; 3612 } 3613 3614 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8) 3615 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8) 3616 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8) 3617 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8) 3618 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8) 3619 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) { 3620 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3621 *confidence = 0.4; 3622 3623 return true; 3624 } 3625 3626 return false; 3627} 3628 3629static bool isCompatibleBrand(uint32_t fourcc) { 3630 static const uint32_t kCompatibleBrands[] = { 3631 FOURCC('i', 's', 'o', 'm'), 3632 FOURCC('i', 's', 'o', '2'), 3633 FOURCC('a', 'v', 'c', '1'), 3634 FOURCC('3', 'g', 'p', '4'), 3635 FOURCC('m', 'p', '4', '1'), 3636 FOURCC('m', 'p', '4', '2'), 3637 3638 // Won't promise that the following file types can be played. 3639 // Just give these file types a chance. 3640 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime 3641 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP 3642 3643 FOURCC('3', 'g', '2', 'a'), // 3GPP2 3644 FOURCC('3', 'g', '2', 'b'), 3645 }; 3646 3647 for (size_t i = 0; 3648 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); 3649 ++i) { 3650 if (kCompatibleBrands[i] == fourcc) { 3651 return true; 3652 } 3653 } 3654 3655 return false; 3656} 3657 3658// Attempt to actually parse the 'ftyp' atom and determine if a suitable 3659// compatible brand is present. 3660// Also try to identify where this file's metadata ends 3661// (end of the 'moov' atom) and report it to the caller as part of 3662// the metadata. 3663static bool BetterSniffMPEG4( 3664 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3665 sp<AMessage> *meta) { 3666 // We scan up to 128 bytes to identify this file as an MP4. 3667 static const off64_t kMaxScanOffset = 128ll; 3668 3669 off64_t offset = 0ll; 3670 bool foundGoodFileType = false; 3671 off64_t moovAtomEndOffset = -1ll; 3672 bool done = false; 3673 3674 while (!done && offset < kMaxScanOffset) { 3675 uint32_t hdr[2]; 3676 if (source->readAt(offset, hdr, 8) < 8) { 3677 return false; 3678 } 3679 3680 uint64_t chunkSize = ntohl(hdr[0]); 3681 uint32_t chunkType = ntohl(hdr[1]); 3682 off64_t chunkDataOffset = offset + 8; 3683 3684 if (chunkSize == 1) { 3685 if (source->readAt(offset + 8, &chunkSize, 8) < 8) { 3686 return false; 3687 } 3688 3689 chunkSize = ntoh64(chunkSize); 3690 chunkDataOffset += 8; 3691 3692 if (chunkSize < 16) { 3693 // The smallest valid chunk is 16 bytes long in this case. 3694 return false; 3695 } 3696 } else if (chunkSize < 8) { 3697 // The smallest valid chunk is 8 bytes long. 3698 return false; 3699 } 3700 3701 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; 3702 3703 char chunkstring[5]; 3704 MakeFourCCString(chunkType, chunkstring); 3705 ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); 3706 switch (chunkType) { 3707 case FOURCC('f', 't', 'y', 'p'): 3708 { 3709 if (chunkDataSize < 8) { 3710 return false; 3711 } 3712 3713 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; 3714 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { 3715 if (i == 1) { 3716 // Skip this index, it refers to the minorVersion, 3717 // not a brand. 3718 continue; 3719 } 3720 3721 uint32_t brand; 3722 if (source->readAt( 3723 chunkDataOffset + 4 * i, &brand, 4) < 4) { 3724 return false; 3725 } 3726 3727 brand = ntohl(brand); 3728 3729 if (isCompatibleBrand(brand)) { 3730 foundGoodFileType = true; 3731 break; 3732 } 3733 } 3734 3735 if (!foundGoodFileType) { 3736 return false; 3737 } 3738 3739 break; 3740 } 3741 3742 case FOURCC('m', 'o', 'o', 'v'): 3743 { 3744 moovAtomEndOffset = offset + chunkSize; 3745 3746 done = true; 3747 break; 3748 } 3749 3750 default: 3751 break; 3752 } 3753 3754 offset += chunkSize; 3755 } 3756 3757 if (!foundGoodFileType) { 3758 return false; 3759 } 3760 3761 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; 3762 *confidence = 0.4f; 3763 3764 if (moovAtomEndOffset >= 0) { 3765 *meta = new AMessage; 3766 (*meta)->setInt64("meta-data-size", moovAtomEndOffset); 3767 3768 ALOGV("found metadata size: %lld", moovAtomEndOffset); 3769 } 3770 3771 return true; 3772} 3773 3774bool SniffMPEG4( 3775 const sp<DataSource> &source, String8 *mimeType, float *confidence, 3776 sp<AMessage> *meta) { 3777 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) { 3778 return true; 3779 } 3780 3781 if (LegacySniffMPEG4(source, mimeType, confidence)) { 3782 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4."); 3783 return true; 3784 } 3785 3786 return false; 3787} 3788 3789} // namespace android 3790