1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ID3"
19#include <utils/Log.h>
20
21#include "../include/ID3.h"
22
23#include <media/stagefright/foundation/ADebug.h>
24#include <media/stagefright/DataSource.h>
25#include <media/stagefright/Utils.h>
26#include <utils/String8.h>
27#include <byteswap.h>
28
29namespace android {
30
31static const size_t kMaxMetadataSize = 3 * 1024 * 1024;
32
33struct MemorySource : public DataSource {
34    MemorySource(const uint8_t *data, size_t size)
35        : mData(data),
36          mSize(size) {
37    }
38
39    virtual status_t initCheck() const {
40        return OK;
41    }
42
43    virtual ssize_t readAt(off64_t offset, void *data, size_t size) {
44        off64_t available = (offset >= (off64_t)mSize) ? 0ll : mSize - offset;
45
46        size_t copy = (available > (off64_t)size) ? size : available;
47        memcpy(data, mData + offset, copy);
48
49        return copy;
50    }
51
52private:
53    const uint8_t *mData;
54    size_t mSize;
55
56    DISALLOW_EVIL_CONSTRUCTORS(MemorySource);
57};
58
59ID3::ID3(const sp<DataSource> &source, bool ignoreV1, off64_t offset)
60    : mIsValid(false),
61      mData(NULL),
62      mSize(0),
63      mFirstFrameOffset(0),
64      mVersion(ID3_UNKNOWN),
65      mRawSize(0) {
66    mIsValid = parseV2(source, offset);
67
68    if (!mIsValid && !ignoreV1) {
69        mIsValid = parseV1(source);
70    }
71}
72
73ID3::ID3(const uint8_t *data, size_t size, bool ignoreV1)
74    : mIsValid(false),
75      mData(NULL),
76      mSize(0),
77      mFirstFrameOffset(0),
78      mVersion(ID3_UNKNOWN),
79      mRawSize(0) {
80    sp<MemorySource> source = new MemorySource(data, size);
81
82    mIsValid = parseV2(source, 0);
83
84    if (!mIsValid && !ignoreV1) {
85        mIsValid = parseV1(source);
86    }
87}
88
89ID3::~ID3() {
90    if (mData) {
91        free(mData);
92        mData = NULL;
93    }
94}
95
96bool ID3::isValid() const {
97    return mIsValid;
98}
99
100ID3::Version ID3::version() const {
101    return mVersion;
102}
103
104// static
105bool ID3::ParseSyncsafeInteger(const uint8_t encoded[4], size_t *x) {
106    *x = 0;
107    for (int32_t i = 0; i < 4; ++i) {
108        if (encoded[i] & 0x80) {
109            return false;
110        }
111
112        *x = ((*x) << 7) | encoded[i];
113    }
114
115    return true;
116}
117
118bool ID3::parseV2(const sp<DataSource> &source, off64_t offset) {
119struct id3_header {
120    char id[3];
121    uint8_t version_major;
122    uint8_t version_minor;
123    uint8_t flags;
124    uint8_t enc_size[4];
125    };
126
127    id3_header header;
128    if (source->readAt(
129                offset, &header, sizeof(header)) != (ssize_t)sizeof(header)) {
130        return false;
131    }
132
133    if (memcmp(header.id, "ID3", 3)) {
134        return false;
135    }
136
137    if (header.version_major == 0xff || header.version_minor == 0xff) {
138        return false;
139    }
140
141    if (header.version_major == 2) {
142        if (header.flags & 0x3f) {
143            // We only support the 2 high bits, if any of the lower bits are
144            // set, we cannot guarantee to understand the tag format.
145            return false;
146        }
147
148        if (header.flags & 0x40) {
149            // No compression scheme has been decided yet, ignore the
150            // tag if compression is indicated.
151
152            return false;
153        }
154    } else if (header.version_major == 3) {
155        if (header.flags & 0x1f) {
156            // We only support the 3 high bits, if any of the lower bits are
157            // set, we cannot guarantee to understand the tag format.
158            return false;
159        }
160    } else if (header.version_major == 4) {
161        if (header.flags & 0x0f) {
162            // The lower 4 bits are undefined in this spec.
163            return false;
164        }
165    } else {
166        return false;
167    }
168
169    size_t size;
170    if (!ParseSyncsafeInteger(header.enc_size, &size)) {
171        return false;
172    }
173
174    if (size > kMaxMetadataSize) {
175        ALOGE("skipping huge ID3 metadata of size %zu", size);
176        return false;
177    }
178
179    mData = (uint8_t *)malloc(size);
180
181    if (mData == NULL) {
182        return false;
183    }
184
185    mSize = size;
186    mRawSize = mSize + sizeof(header);
187
188    if (source->readAt(offset + sizeof(header), mData, mSize) != (ssize_t)mSize) {
189        free(mData);
190        mData = NULL;
191
192        return false;
193    }
194
195    if (header.version_major == 4) {
196        void *copy = malloc(size);
197        memcpy(copy, mData, size);
198
199        bool success = removeUnsynchronizationV2_4(false /* iTunesHack */);
200        if (!success) {
201            memcpy(mData, copy, size);
202            mSize = size;
203
204            success = removeUnsynchronizationV2_4(true /* iTunesHack */);
205
206            if (success) {
207                ALOGV("Had to apply the iTunes hack to parse this ID3 tag");
208            }
209        }
210
211        free(copy);
212        copy = NULL;
213
214        if (!success) {
215            free(mData);
216            mData = NULL;
217
218            return false;
219        }
220    } else if (header.flags & 0x80) {
221        ALOGV("removing unsynchronization");
222
223        removeUnsynchronization();
224    }
225
226    mFirstFrameOffset = 0;
227    if (header.version_major == 3 && (header.flags & 0x40)) {
228        // Version 2.3 has an optional extended header.
229
230        if (mSize < 4) {
231            free(mData);
232            mData = NULL;
233
234            return false;
235        }
236
237        size_t extendedHeaderSize = U32_AT(&mData[0]) + 4;
238
239        if (extendedHeaderSize > mSize) {
240            free(mData);
241            mData = NULL;
242
243            return false;
244        }
245
246        mFirstFrameOffset = extendedHeaderSize;
247
248        uint16_t extendedFlags = 0;
249        if (extendedHeaderSize >= 6) {
250            extendedFlags = U16_AT(&mData[4]);
251
252            if (extendedHeaderSize >= 10) {
253                size_t paddingSize = U32_AT(&mData[6]);
254
255                if (mFirstFrameOffset + paddingSize > mSize) {
256                    free(mData);
257                    mData = NULL;
258
259                    return false;
260                }
261
262                mSize -= paddingSize;
263            }
264
265            if (extendedFlags & 0x8000) {
266                ALOGV("have crc");
267            }
268        }
269    } else if (header.version_major == 4 && (header.flags & 0x40)) {
270        // Version 2.4 has an optional extended header, that's different
271        // from Version 2.3's...
272
273        if (mSize < 4) {
274            free(mData);
275            mData = NULL;
276
277            return false;
278        }
279
280        size_t ext_size;
281        if (!ParseSyncsafeInteger(mData, &ext_size)) {
282            free(mData);
283            mData = NULL;
284
285            return false;
286        }
287
288        if (ext_size < 6 || ext_size > mSize) {
289            free(mData);
290            mData = NULL;
291
292            return false;
293        }
294
295        mFirstFrameOffset = ext_size;
296    }
297
298    if (header.version_major == 2) {
299        mVersion = ID3_V2_2;
300    } else if (header.version_major == 3) {
301        mVersion = ID3_V2_3;
302    } else {
303        CHECK_EQ(header.version_major, 4);
304        mVersion = ID3_V2_4;
305    }
306
307    return true;
308}
309
310void ID3::removeUnsynchronization() {
311    for (size_t i = 0; i + 1 < mSize; ++i) {
312        if (mData[i] == 0xff && mData[i + 1] == 0x00) {
313            memmove(&mData[i + 1], &mData[i + 2], mSize - i - 2);
314            --mSize;
315        }
316    }
317}
318
319static void WriteSyncsafeInteger(uint8_t *dst, size_t x) {
320    for (size_t i = 0; i < 4; ++i) {
321        dst[3 - i] = (x & 0x7f);
322        x >>= 7;
323    }
324}
325
326bool ID3::removeUnsynchronizationV2_4(bool iTunesHack) {
327    size_t oldSize = mSize;
328
329    size_t offset = 0;
330    while (offset + 10 <= mSize) {
331        if (!memcmp(&mData[offset], "\0\0\0\0", 4)) {
332            break;
333        }
334
335        size_t dataSize;
336        if (iTunesHack) {
337            dataSize = U32_AT(&mData[offset + 4]);
338        } else if (!ParseSyncsafeInteger(&mData[offset + 4], &dataSize)) {
339            return false;
340        }
341
342        if (offset + dataSize + 10 > mSize) {
343            return false;
344        }
345
346        uint16_t flags = U16_AT(&mData[offset + 8]);
347        uint16_t prevFlags = flags;
348
349        if (flags & 1) {
350            // Strip data length indicator
351
352            memmove(&mData[offset + 10], &mData[offset + 14], mSize - offset - 14);
353            mSize -= 4;
354            dataSize -= 4;
355
356            flags &= ~1;
357        }
358
359        if (flags & 2) {
360            // This file has "unsynchronization", so we have to replace occurrences
361            // of 0xff 0x00 with just 0xff in order to get the real data.
362
363            size_t readOffset = offset + 11;
364            size_t writeOffset = offset + 11;
365            for (size_t i = 0; i + 1 < dataSize; ++i) {
366                if (mData[readOffset - 1] == 0xff
367                        && mData[readOffset] == 0x00) {
368                    ++readOffset;
369                    --mSize;
370                    --dataSize;
371                }
372                mData[writeOffset++] = mData[readOffset++];
373            }
374            // move the remaining data following this frame
375            memmove(&mData[writeOffset], &mData[readOffset], oldSize - readOffset);
376
377            flags &= ~2;
378        }
379
380        if (flags != prevFlags || iTunesHack) {
381            WriteSyncsafeInteger(&mData[offset + 4], dataSize);
382            mData[offset + 8] = flags >> 8;
383            mData[offset + 9] = flags & 0xff;
384        }
385
386        offset += 10 + dataSize;
387    }
388
389    memset(&mData[mSize], 0, oldSize - mSize);
390
391    return true;
392}
393
394ID3::Iterator::Iterator(const ID3 &parent, const char *id)
395    : mParent(parent),
396      mID(NULL),
397      mOffset(mParent.mFirstFrameOffset),
398      mFrameData(NULL),
399      mFrameSize(0) {
400    if (id) {
401        mID = strdup(id);
402    }
403
404    findFrame();
405}
406
407ID3::Iterator::~Iterator() {
408    if (mID) {
409        free(mID);
410        mID = NULL;
411    }
412}
413
414bool ID3::Iterator::done() const {
415    return mFrameData == NULL;
416}
417
418void ID3::Iterator::next() {
419    if (mFrameData == NULL) {
420        return;
421    }
422
423    mOffset += mFrameSize;
424
425    findFrame();
426}
427
428void ID3::Iterator::getID(String8 *id) const {
429    id->setTo("");
430
431    if (mFrameData == NULL) {
432        return;
433    }
434
435    if (mParent.mVersion == ID3_V2_2) {
436        id->setTo((const char *)&mParent.mData[mOffset], 3);
437    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
438        id->setTo((const char *)&mParent.mData[mOffset], 4);
439    } else {
440        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
441
442        switch (mOffset) {
443            case 3:
444                id->setTo("TT2");
445                break;
446            case 33:
447                id->setTo("TP1");
448                break;
449            case 63:
450                id->setTo("TAL");
451                break;
452            case 93:
453                id->setTo("TYE");
454                break;
455            case 97:
456                id->setTo("COM");
457                break;
458            case 126:
459                id->setTo("TRK");
460                break;
461            case 127:
462                id->setTo("TCO");
463                break;
464            default:
465                CHECK(!"should not be here.");
466                break;
467        }
468    }
469}
470
471
472// the 2nd argument is used to get the data following the \0 in a comment field
473void ID3::Iterator::getString(String8 *id, String8 *comment) const {
474    getstring(id, false);
475    if (comment != NULL) {
476        getstring(comment, true);
477    }
478}
479
480// comment fields (COM/COMM) contain an initial short descriptor, followed by \0,
481// followed by more data. The data following the \0 can be retrieved by setting
482// "otherdata" to true.
483void ID3::Iterator::getstring(String8 *id, bool otherdata) const {
484    id->setTo("");
485
486    const uint8_t *frameData = mFrameData;
487    if (frameData == NULL) {
488        return;
489    }
490
491    uint8_t encoding = *frameData;
492
493    if (mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1) {
494        if (mOffset == 126 || mOffset == 127) {
495            // Special treatment for the track number and genre.
496            char tmp[16];
497            sprintf(tmp, "%d", (int)*frameData);
498
499            id->setTo(tmp);
500            return;
501        }
502
503        // this is supposed to be ISO-8859-1, but pass it up as-is to the caller, who will figure
504        // out the real encoding
505        id->setTo((const char*)frameData, mFrameSize);
506        return;
507    }
508
509    size_t n = mFrameSize - getHeaderLength() - 1;
510    if (otherdata) {
511        // skip past the encoding, language, and the 0 separator
512        frameData += 4;
513        int32_t i = n - 4;
514        while(--i >= 0 && *++frameData != 0) ;
515        int skipped = (frameData - mFrameData);
516        if (skipped >= (int)n) {
517            return;
518        }
519        n -= skipped;
520    }
521
522    if (encoding == 0x00) {
523        // supposedly ISO 8859-1
524        id->setTo((const char*)frameData + 1, n);
525    } else if (encoding == 0x03) {
526        // supposedly UTF-8
527        id->setTo((const char *)(frameData + 1), n);
528    } else if (encoding == 0x02) {
529        // supposedly UTF-16 BE, no byte order mark.
530        // API wants number of characters, not number of bytes...
531        int len = n / 2;
532        const char16_t *framedata = (const char16_t *) (frameData + 1);
533        char16_t *framedatacopy = NULL;
534#if BYTE_ORDER == LITTLE_ENDIAN
535        framedatacopy = new char16_t[len];
536        for (int i = 0; i < len; i++) {
537            framedatacopy[i] = bswap_16(framedata[i]);
538        }
539        framedata = framedatacopy;
540#endif
541        id->setTo(framedata, len);
542        if (framedatacopy != NULL) {
543            delete[] framedatacopy;
544        }
545    } else if (encoding == 0x01) {
546        // UCS-2
547        // API wants number of characters, not number of bytes...
548        int len = n / 2;
549        const char16_t *framedata = (const char16_t *) (frameData + 1);
550        char16_t *framedatacopy = NULL;
551        if (*framedata == 0xfffe) {
552            // endianness marker doesn't match host endianness, convert
553            framedatacopy = new char16_t[len];
554            for (int i = 0; i < len; i++) {
555                framedatacopy[i] = bswap_16(framedata[i]);
556            }
557            framedata = framedatacopy;
558        }
559        // If the string starts with an endianness marker, skip it
560        if (*framedata == 0xfeff) {
561            framedata++;
562            len--;
563        }
564
565        // check if the resulting data consists entirely of 8-bit values
566        bool eightBit = true;
567        for (int i = 0; i < len; i++) {
568            if (framedata[i] > 0xff) {
569                eightBit = false;
570                break;
571            }
572        }
573        if (eightBit) {
574            // collapse to 8 bit, then let the media scanner client figure out the real encoding
575            char *frame8 = new char[len];
576            for (int i = 0; i < len; i++) {
577                frame8[i] = framedata[i];
578            }
579            id->setTo(frame8, len);
580            delete [] frame8;
581        } else {
582            id->setTo(framedata, len);
583        }
584
585        if (framedatacopy != NULL) {
586            delete[] framedatacopy;
587        }
588    }
589}
590
591const uint8_t *ID3::Iterator::getData(size_t *length) const {
592    *length = 0;
593
594    if (mFrameData == NULL) {
595        return NULL;
596    }
597
598    *length = mFrameSize - getHeaderLength();
599
600    return mFrameData;
601}
602
603size_t ID3::Iterator::getHeaderLength() const {
604    if (mParent.mVersion == ID3_V2_2) {
605        return 6;
606    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
607        return 10;
608    } else {
609        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
610        return 0;
611    }
612}
613
614void ID3::Iterator::findFrame() {
615    for (;;) {
616        mFrameData = NULL;
617        mFrameSize = 0;
618
619        if (mParent.mVersion == ID3_V2_2) {
620            if (mOffset + 6 > mParent.mSize) {
621                return;
622            }
623
624            if (!memcmp(&mParent.mData[mOffset], "\0\0\0", 3)) {
625                return;
626            }
627
628            mFrameSize =
629                (mParent.mData[mOffset + 3] << 16)
630                | (mParent.mData[mOffset + 4] << 8)
631                | mParent.mData[mOffset + 5];
632
633            mFrameSize += 6;
634
635            if (mOffset + mFrameSize > mParent.mSize) {
636                ALOGV("partial frame at offset %zu (size = %zu, bytes-remaining = %zu)",
637                    mOffset, mFrameSize, mParent.mSize - mOffset - (size_t)6);
638                return;
639            }
640
641            mFrameData = &mParent.mData[mOffset + 6];
642
643            if (!mID) {
644                break;
645            }
646
647            char id[4];
648            memcpy(id, &mParent.mData[mOffset], 3);
649            id[3] = '\0';
650
651            if (!strcmp(id, mID)) {
652                break;
653            }
654        } else if (mParent.mVersion == ID3_V2_3
655                || mParent.mVersion == ID3_V2_4) {
656            if (mOffset + 10 > mParent.mSize) {
657                return;
658            }
659
660            if (!memcmp(&mParent.mData[mOffset], "\0\0\0\0", 4)) {
661                return;
662            }
663
664            size_t baseSize;
665            if (mParent.mVersion == ID3_V2_4) {
666                if (!ParseSyncsafeInteger(
667                            &mParent.mData[mOffset + 4], &baseSize)) {
668                    return;
669                }
670            } else {
671                baseSize = U32_AT(&mParent.mData[mOffset + 4]);
672            }
673
674            mFrameSize = 10 + baseSize;
675
676            if (mOffset + mFrameSize > mParent.mSize) {
677                ALOGV("partial frame at offset %zu (size = %zu, bytes-remaining = %zu)",
678                    mOffset, mFrameSize, mParent.mSize - mOffset - (size_t)10);
679                return;
680            }
681
682            uint16_t flags = U16_AT(&mParent.mData[mOffset + 8]);
683
684            if ((mParent.mVersion == ID3_V2_4 && (flags & 0x000c))
685                || (mParent.mVersion == ID3_V2_3 && (flags & 0x00c0))) {
686                // Compression or encryption are not supported at this time.
687                // Per-frame unsynchronization and data-length indicator
688                // have already been taken care of.
689
690                ALOGV("Skipping unsupported frame (compression, encryption "
691                     "or per-frame unsynchronization flagged");
692
693                mOffset += mFrameSize;
694                continue;
695            }
696
697            mFrameData = &mParent.mData[mOffset + 10];
698
699            if (!mID) {
700                break;
701            }
702
703            char id[5];
704            memcpy(id, &mParent.mData[mOffset], 4);
705            id[4] = '\0';
706
707            if (!strcmp(id, mID)) {
708                break;
709            }
710        } else {
711            CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
712
713            if (mOffset >= mParent.mSize) {
714                return;
715            }
716
717            mFrameData = &mParent.mData[mOffset];
718
719            switch (mOffset) {
720                case 3:
721                case 33:
722                case 63:
723                    mFrameSize = 30;
724                    break;
725                case 93:
726                    mFrameSize = 4;
727                    break;
728                case 97:
729                    if (mParent.mVersion == ID3_V1) {
730                        mFrameSize = 30;
731                    } else {
732                        mFrameSize = 29;
733                    }
734                    break;
735                case 126:
736                    mFrameSize = 1;
737                    break;
738                case 127:
739                    mFrameSize = 1;
740                    break;
741                default:
742                    CHECK(!"Should not be here, invalid offset.");
743                    break;
744            }
745
746            if (!mID) {
747                break;
748            }
749
750            String8 id;
751            getID(&id);
752
753            if (id == mID) {
754                break;
755            }
756        }
757
758        mOffset += mFrameSize;
759    }
760}
761
762static size_t StringSize(const uint8_t *start, uint8_t encoding) {
763    if (encoding == 0x00 || encoding == 0x03) {
764        // ISO 8859-1 or UTF-8
765        return strlen((const char *)start) + 1;
766    }
767
768    // UCS-2
769    size_t n = 0;
770    while (start[n] != '\0' || start[n + 1] != '\0') {
771        n += 2;
772    }
773
774    // Add size of null termination.
775    return n + 2;
776}
777
778const void *
779ID3::getAlbumArt(size_t *length, String8 *mime) const {
780    *length = 0;
781    mime->setTo("");
782
783    Iterator it(
784            *this,
785            (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) ? "APIC" : "PIC");
786
787    while (!it.done()) {
788        size_t size;
789        const uint8_t *data = it.getData(&size);
790
791        if (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) {
792            uint8_t encoding = data[0];
793            mime->setTo((const char *)&data[1]);
794            size_t mimeLen = strlen((const char *)&data[1]) + 1;
795
796            uint8_t picType = data[1 + mimeLen];
797#if 0
798            if (picType != 0x03) {
799                // Front Cover Art
800                it.next();
801                continue;
802            }
803#endif
804
805            size_t descLen = StringSize(&data[2 + mimeLen], encoding);
806
807            *length = size - 2 - mimeLen - descLen;
808
809            return &data[2 + mimeLen + descLen];
810        } else {
811            uint8_t encoding = data[0];
812
813            if (!memcmp(&data[1], "PNG", 3)) {
814                mime->setTo("image/png");
815            } else if (!memcmp(&data[1], "JPG", 3)) {
816                mime->setTo("image/jpeg");
817            } else if (!memcmp(&data[1], "-->", 3)) {
818                mime->setTo("text/plain");
819            } else {
820                return NULL;
821            }
822
823#if 0
824            uint8_t picType = data[4];
825            if (picType != 0x03) {
826                // Front Cover Art
827                it.next();
828                continue;
829            }
830#endif
831
832            size_t descLen = StringSize(&data[5], encoding);
833
834            *length = size - 5 - descLen;
835
836            return &data[5 + descLen];
837        }
838    }
839
840    return NULL;
841}
842
843bool ID3::parseV1(const sp<DataSource> &source) {
844    const size_t V1_TAG_SIZE = 128;
845
846    off64_t size;
847    if (source->getSize(&size) != OK || size < (off64_t)V1_TAG_SIZE) {
848        return false;
849    }
850
851    mData = (uint8_t *)malloc(V1_TAG_SIZE);
852    if (source->readAt(size - V1_TAG_SIZE, mData, V1_TAG_SIZE)
853            != (ssize_t)V1_TAG_SIZE) {
854        free(mData);
855        mData = NULL;
856
857        return false;
858    }
859
860    if (memcmp("TAG", mData, 3)) {
861        free(mData);
862        mData = NULL;
863
864        return false;
865    }
866
867    mSize = V1_TAG_SIZE;
868    mFirstFrameOffset = 3;
869
870    if (mData[V1_TAG_SIZE - 3] != 0) {
871        mVersion = ID3_V1;
872    } else {
873        mVersion = ID3_V1_1;
874    }
875
876    return true;
877}
878
879}  // namespace android
880