1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ID3"
19#include <utils/Log.h>
20
21#include "../include/ID3.h"
22
23#include <media/stagefright/foundation/ADebug.h>
24#include <media/stagefright/DataSource.h>
25#include <media/stagefright/Utils.h>
26#include <utils/String8.h>
27#include <byteswap.h>
28
29namespace android {
30
31static const size_t kMaxMetadataSize = 3 * 1024 * 1024;
32
33ID3::ID3(const sp<DataSource> &source, bool ignoreV1)
34    : mIsValid(false),
35      mData(NULL),
36      mSize(0),
37      mFirstFrameOffset(0),
38      mVersion(ID3_UNKNOWN) {
39    mIsValid = parseV2(source);
40
41    if (!mIsValid && !ignoreV1) {
42        mIsValid = parseV1(source);
43    }
44}
45
46ID3::~ID3() {
47    if (mData) {
48        free(mData);
49        mData = NULL;
50    }
51}
52
53bool ID3::isValid() const {
54    return mIsValid;
55}
56
57ID3::Version ID3::version() const {
58    return mVersion;
59}
60
61// static
62bool ID3::ParseSyncsafeInteger(const uint8_t encoded[4], size_t *x) {
63    *x = 0;
64    for (int32_t i = 0; i < 4; ++i) {
65        if (encoded[i] & 0x80) {
66            return false;
67        }
68
69        *x = ((*x) << 7) | encoded[i];
70    }
71
72    return true;
73}
74
75bool ID3::parseV2(const sp<DataSource> &source) {
76struct id3_header {
77    char id[3];
78    uint8_t version_major;
79    uint8_t version_minor;
80    uint8_t flags;
81    uint8_t enc_size[4];
82    };
83
84    id3_header header;
85    if (source->readAt(
86                0, &header, sizeof(header)) != (ssize_t)sizeof(header)) {
87        return false;
88    }
89
90    if (memcmp(header.id, "ID3", 3)) {
91        return false;
92    }
93
94    if (header.version_major == 0xff || header.version_minor == 0xff) {
95        return false;
96    }
97
98    if (header.version_major == 2) {
99        if (header.flags & 0x3f) {
100            // We only support the 2 high bits, if any of the lower bits are
101            // set, we cannot guarantee to understand the tag format.
102            return false;
103        }
104
105        if (header.flags & 0x40) {
106            // No compression scheme has been decided yet, ignore the
107            // tag if compression is indicated.
108
109            return false;
110        }
111    } else if (header.version_major == 3) {
112        if (header.flags & 0x1f) {
113            // We only support the 3 high bits, if any of the lower bits are
114            // set, we cannot guarantee to understand the tag format.
115            return false;
116        }
117    } else if (header.version_major == 4) {
118        if (header.flags & 0x0f) {
119            // The lower 4 bits are undefined in this spec.
120            return false;
121        }
122    } else {
123        return false;
124    }
125
126    size_t size;
127    if (!ParseSyncsafeInteger(header.enc_size, &size)) {
128        return false;
129    }
130
131    if (size > kMaxMetadataSize) {
132        ALOGE("skipping huge ID3 metadata of size %d", size);
133        return false;
134    }
135
136    mData = (uint8_t *)malloc(size);
137
138    if (mData == NULL) {
139        return false;
140    }
141
142    mSize = size;
143
144    if (source->readAt(sizeof(header), mData, mSize) != (ssize_t)mSize) {
145        free(mData);
146        mData = NULL;
147
148        return false;
149    }
150
151    if (header.version_major == 4) {
152        void *copy = malloc(size);
153        memcpy(copy, mData, size);
154
155        bool success = removeUnsynchronizationV2_4(false /* iTunesHack */);
156        if (!success) {
157            memcpy(mData, copy, size);
158            mSize = size;
159
160            success = removeUnsynchronizationV2_4(true /* iTunesHack */);
161
162            if (success) {
163                ALOGV("Had to apply the iTunes hack to parse this ID3 tag");
164            }
165        }
166
167        free(copy);
168        copy = NULL;
169
170        if (!success) {
171            free(mData);
172            mData = NULL;
173
174            return false;
175        }
176    } else if (header.flags & 0x80) {
177        ALOGV("removing unsynchronization");
178
179        removeUnsynchronization();
180    }
181
182    mFirstFrameOffset = 0;
183    if (header.version_major == 3 && (header.flags & 0x40)) {
184        // Version 2.3 has an optional extended header.
185
186        if (mSize < 4) {
187            free(mData);
188            mData = NULL;
189
190            return false;
191        }
192
193        size_t extendedHeaderSize = U32_AT(&mData[0]) + 4;
194
195        if (extendedHeaderSize > mSize) {
196            free(mData);
197            mData = NULL;
198
199            return false;
200        }
201
202        mFirstFrameOffset = extendedHeaderSize;
203
204        uint16_t extendedFlags = 0;
205        if (extendedHeaderSize >= 6) {
206            extendedFlags = U16_AT(&mData[4]);
207
208            if (extendedHeaderSize >= 10) {
209                size_t paddingSize = U32_AT(&mData[6]);
210
211                if (mFirstFrameOffset + paddingSize > mSize) {
212                    free(mData);
213                    mData = NULL;
214
215                    return false;
216                }
217
218                mSize -= paddingSize;
219            }
220
221            if (extendedFlags & 0x8000) {
222                ALOGV("have crc");
223            }
224        }
225    } else if (header.version_major == 4 && (header.flags & 0x40)) {
226        // Version 2.4 has an optional extended header, that's different
227        // from Version 2.3's...
228
229        if (mSize < 4) {
230            free(mData);
231            mData = NULL;
232
233            return false;
234        }
235
236        size_t ext_size;
237        if (!ParseSyncsafeInteger(mData, &ext_size)) {
238            free(mData);
239            mData = NULL;
240
241            return false;
242        }
243
244        if (ext_size < 6 || ext_size > mSize) {
245            free(mData);
246            mData = NULL;
247
248            return false;
249        }
250
251        mFirstFrameOffset = ext_size;
252    }
253
254    if (header.version_major == 2) {
255        mVersion = ID3_V2_2;
256    } else if (header.version_major == 3) {
257        mVersion = ID3_V2_3;
258    } else {
259        CHECK_EQ(header.version_major, 4);
260        mVersion = ID3_V2_4;
261    }
262
263    return true;
264}
265
266void ID3::removeUnsynchronization() {
267    for (size_t i = 0; i + 1 < mSize; ++i) {
268        if (mData[i] == 0xff && mData[i + 1] == 0x00) {
269            memmove(&mData[i + 1], &mData[i + 2], mSize - i - 2);
270            --mSize;
271        }
272    }
273}
274
275static void WriteSyncsafeInteger(uint8_t *dst, size_t x) {
276    for (size_t i = 0; i < 4; ++i) {
277        dst[3 - i] = (x & 0x7f);
278        x >>= 7;
279    }
280}
281
282bool ID3::removeUnsynchronizationV2_4(bool iTunesHack) {
283    size_t oldSize = mSize;
284
285    size_t offset = 0;
286    while (offset + 10 <= mSize) {
287        if (!memcmp(&mData[offset], "\0\0\0\0", 4)) {
288            break;
289        }
290
291        size_t dataSize;
292        if (iTunesHack) {
293            dataSize = U32_AT(&mData[offset + 4]);
294        } else if (!ParseSyncsafeInteger(&mData[offset + 4], &dataSize)) {
295            return false;
296        }
297
298        if (offset + dataSize + 10 > mSize) {
299            return false;
300        }
301
302        uint16_t flags = U16_AT(&mData[offset + 8]);
303        uint16_t prevFlags = flags;
304
305        if (flags & 1) {
306            // Strip data length indicator
307
308            memmove(&mData[offset + 10], &mData[offset + 14], mSize - offset - 14);
309            mSize -= 4;
310            dataSize -= 4;
311
312            flags &= ~1;
313        }
314
315        if (flags & 2) {
316            // Unsynchronization added.
317
318            for (size_t i = 0; i + 1 < dataSize; ++i) {
319                if (mData[offset + 10 + i] == 0xff
320                        && mData[offset + 11 + i] == 0x00) {
321                    memmove(&mData[offset + 11 + i], &mData[offset + 12 + i],
322                            mSize - offset - 12 - i);
323                    --mSize;
324                    --dataSize;
325                }
326            }
327
328            flags &= ~2;
329        }
330
331        if (flags != prevFlags || iTunesHack) {
332            WriteSyncsafeInteger(&mData[offset + 4], dataSize);
333            mData[offset + 8] = flags >> 8;
334            mData[offset + 9] = flags & 0xff;
335        }
336
337        offset += 10 + dataSize;
338    }
339
340    memset(&mData[mSize], 0, oldSize - mSize);
341
342    return true;
343}
344
345ID3::Iterator::Iterator(const ID3 &parent, const char *id)
346    : mParent(parent),
347      mID(NULL),
348      mOffset(mParent.mFirstFrameOffset),
349      mFrameData(NULL),
350      mFrameSize(0) {
351    if (id) {
352        mID = strdup(id);
353    }
354
355    findFrame();
356}
357
358ID3::Iterator::~Iterator() {
359    if (mID) {
360        free(mID);
361        mID = NULL;
362    }
363}
364
365bool ID3::Iterator::done() const {
366    return mFrameData == NULL;
367}
368
369void ID3::Iterator::next() {
370    if (mFrameData == NULL) {
371        return;
372    }
373
374    mOffset += mFrameSize;
375
376    findFrame();
377}
378
379void ID3::Iterator::getID(String8 *id) const {
380    id->setTo("");
381
382    if (mFrameData == NULL) {
383        return;
384    }
385
386    if (mParent.mVersion == ID3_V2_2) {
387        id->setTo((const char *)&mParent.mData[mOffset], 3);
388    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
389        id->setTo((const char *)&mParent.mData[mOffset], 4);
390    } else {
391        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
392
393        switch (mOffset) {
394            case 3:
395                id->setTo("TT2");
396                break;
397            case 33:
398                id->setTo("TP1");
399                break;
400            case 63:
401                id->setTo("TAL");
402                break;
403            case 93:
404                id->setTo("TYE");
405                break;
406            case 97:
407                id->setTo("COM");
408                break;
409            case 126:
410                id->setTo("TRK");
411                break;
412            case 127:
413                id->setTo("TCO");
414                break;
415            default:
416                CHECK(!"should not be here.");
417                break;
418        }
419    }
420}
421
422static void convertISO8859ToString8(
423        const uint8_t *data, size_t size,
424        String8 *s) {
425    size_t utf8len = 0;
426    for (size_t i = 0; i < size; ++i) {
427        if (data[i] == '\0') {
428            size = i;
429            break;
430        } else if (data[i] < 0x80) {
431            ++utf8len;
432        } else {
433            utf8len += 2;
434        }
435    }
436
437    if (utf8len == size) {
438        // Only ASCII characters present.
439
440        s->setTo((const char *)data, size);
441        return;
442    }
443
444    char *tmp = new char[utf8len];
445    char *ptr = tmp;
446    for (size_t i = 0; i < size; ++i) {
447        if (data[i] == '\0') {
448            break;
449        } else if (data[i] < 0x80) {
450            *ptr++ = data[i];
451        } else if (data[i] < 0xc0) {
452            *ptr++ = 0xc2;
453            *ptr++ = data[i];
454        } else {
455            *ptr++ = 0xc3;
456            *ptr++ = data[i] - 64;
457        }
458    }
459
460    s->setTo(tmp, utf8len);
461
462    delete[] tmp;
463    tmp = NULL;
464}
465
466// the 2nd argument is used to get the data following the \0 in a comment field
467void ID3::Iterator::getString(String8 *id, String8 *comment) const {
468    getstring(id, false);
469    if (comment != NULL) {
470        getstring(comment, true);
471    }
472}
473
474// comment fields (COM/COMM) contain an initial short descriptor, followed by \0,
475// followed by more data. The data following the \0 can be retrieved by setting
476// "otherdata" to true.
477void ID3::Iterator::getstring(String8 *id, bool otherdata) const {
478    id->setTo("");
479
480    const uint8_t *frameData = mFrameData;
481    if (frameData == NULL) {
482        return;
483    }
484
485    uint8_t encoding = *frameData;
486
487    if (mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1) {
488        if (mOffset == 126 || mOffset == 127) {
489            // Special treatment for the track number and genre.
490            char tmp[16];
491            sprintf(tmp, "%d", (int)*frameData);
492
493            id->setTo(tmp);
494            return;
495        }
496
497        convertISO8859ToString8(frameData, mFrameSize, id);
498        return;
499    }
500
501    size_t n = mFrameSize - getHeaderLength() - 1;
502    if (otherdata) {
503        // skip past the encoding, language, and the 0 separator
504        frameData += 4;
505        int32_t i = n - 4;
506        while(--i >= 0 && *++frameData != 0) ;
507        int skipped = (frameData - mFrameData);
508        if (skipped >= n) {
509            return;
510        }
511        n -= skipped;
512    }
513
514    if (encoding == 0x00) {
515        // ISO 8859-1
516        convertISO8859ToString8(frameData + 1, n, id);
517    } else if (encoding == 0x03) {
518        // UTF-8
519        id->setTo((const char *)(frameData + 1), n);
520    } else if (encoding == 0x02) {
521        // UTF-16 BE, no byte order mark.
522        // API wants number of characters, not number of bytes...
523        int len = n / 2;
524        const char16_t *framedata = (const char16_t *) (frameData + 1);
525        char16_t *framedatacopy = NULL;
526#if BYTE_ORDER == LITTLE_ENDIAN
527        framedatacopy = new char16_t[len];
528        for (int i = 0; i < len; i++) {
529            framedatacopy[i] = bswap_16(framedata[i]);
530        }
531        framedata = framedatacopy;
532#endif
533        id->setTo(framedata, len);
534        if (framedatacopy != NULL) {
535            delete[] framedatacopy;
536        }
537    } else {
538        // UCS-2
539        // API wants number of characters, not number of bytes...
540        int len = n / 2;
541        const char16_t *framedata = (const char16_t *) (frameData + 1);
542        char16_t *framedatacopy = NULL;
543        if (*framedata == 0xfffe) {
544            // endianness marker doesn't match host endianness, convert
545            framedatacopy = new char16_t[len];
546            for (int i = 0; i < len; i++) {
547                framedatacopy[i] = bswap_16(framedata[i]);
548            }
549            framedata = framedatacopy;
550        }
551        // If the string starts with an endianness marker, skip it
552        if (*framedata == 0xfeff) {
553            framedata++;
554            len--;
555        }
556        id->setTo(framedata, len);
557        if (framedatacopy != NULL) {
558            delete[] framedatacopy;
559        }
560    }
561}
562
563const uint8_t *ID3::Iterator::getData(size_t *length) const {
564    *length = 0;
565
566    if (mFrameData == NULL) {
567        return NULL;
568    }
569
570    *length = mFrameSize - getHeaderLength();
571
572    return mFrameData;
573}
574
575size_t ID3::Iterator::getHeaderLength() const {
576    if (mParent.mVersion == ID3_V2_2) {
577        return 6;
578    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
579        return 10;
580    } else {
581        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
582        return 0;
583    }
584}
585
586void ID3::Iterator::findFrame() {
587    for (;;) {
588        mFrameData = NULL;
589        mFrameSize = 0;
590
591        if (mParent.mVersion == ID3_V2_2) {
592            if (mOffset + 6 > mParent.mSize) {
593                return;
594            }
595
596            if (!memcmp(&mParent.mData[mOffset], "\0\0\0", 3)) {
597                return;
598            }
599
600            mFrameSize =
601                (mParent.mData[mOffset + 3] << 16)
602                | (mParent.mData[mOffset + 4] << 8)
603                | mParent.mData[mOffset + 5];
604
605            mFrameSize += 6;
606
607            if (mOffset + mFrameSize > mParent.mSize) {
608                ALOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
609                     mOffset, mFrameSize, mParent.mSize - mOffset - 6);
610                return;
611            }
612
613            mFrameData = &mParent.mData[mOffset + 6];
614
615            if (!mID) {
616                break;
617            }
618
619            char id[4];
620            memcpy(id, &mParent.mData[mOffset], 3);
621            id[3] = '\0';
622
623            if (!strcmp(id, mID)) {
624                break;
625            }
626        } else if (mParent.mVersion == ID3_V2_3
627                || mParent.mVersion == ID3_V2_4) {
628            if (mOffset + 10 > mParent.mSize) {
629                return;
630            }
631
632            if (!memcmp(&mParent.mData[mOffset], "\0\0\0\0", 4)) {
633                return;
634            }
635
636            size_t baseSize;
637            if (mParent.mVersion == ID3_V2_4) {
638                if (!ParseSyncsafeInteger(
639                            &mParent.mData[mOffset + 4], &baseSize)) {
640                    return;
641                }
642            } else {
643                baseSize = U32_AT(&mParent.mData[mOffset + 4]);
644            }
645
646            mFrameSize = 10 + baseSize;
647
648            if (mOffset + mFrameSize > mParent.mSize) {
649                ALOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
650                     mOffset, mFrameSize, mParent.mSize - mOffset - 10);
651                return;
652            }
653
654            uint16_t flags = U16_AT(&mParent.mData[mOffset + 8]);
655
656            if ((mParent.mVersion == ID3_V2_4 && (flags & 0x000c))
657                || (mParent.mVersion == ID3_V2_3 && (flags & 0x00c0))) {
658                // Compression or encryption are not supported at this time.
659                // Per-frame unsynchronization and data-length indicator
660                // have already been taken care of.
661
662                ALOGV("Skipping unsupported frame (compression, encryption "
663                     "or per-frame unsynchronization flagged");
664
665                mOffset += mFrameSize;
666                continue;
667            }
668
669            mFrameData = &mParent.mData[mOffset + 10];
670
671            if (!mID) {
672                break;
673            }
674
675            char id[5];
676            memcpy(id, &mParent.mData[mOffset], 4);
677            id[4] = '\0';
678
679            if (!strcmp(id, mID)) {
680                break;
681            }
682        } else {
683            CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
684
685            if (mOffset >= mParent.mSize) {
686                return;
687            }
688
689            mFrameData = &mParent.mData[mOffset];
690
691            switch (mOffset) {
692                case 3:
693                case 33:
694                case 63:
695                    mFrameSize = 30;
696                    break;
697                case 93:
698                    mFrameSize = 4;
699                    break;
700                case 97:
701                    if (mParent.mVersion == ID3_V1) {
702                        mFrameSize = 30;
703                    } else {
704                        mFrameSize = 29;
705                    }
706                    break;
707                case 126:
708                    mFrameSize = 1;
709                    break;
710                case 127:
711                    mFrameSize = 1;
712                    break;
713                default:
714                    CHECK(!"Should not be here, invalid offset.");
715                    break;
716            }
717
718            if (!mID) {
719                break;
720            }
721
722            String8 id;
723            getID(&id);
724
725            if (id == mID) {
726                break;
727            }
728        }
729
730        mOffset += mFrameSize;
731    }
732}
733
734static size_t StringSize(const uint8_t *start, uint8_t encoding) {
735    if (encoding == 0x00 || encoding == 0x03) {
736        // ISO 8859-1 or UTF-8
737        return strlen((const char *)start) + 1;
738    }
739
740    // UCS-2
741    size_t n = 0;
742    while (start[n] != '\0' || start[n + 1] != '\0') {
743        n += 2;
744    }
745
746    // Add size of null termination.
747    return n + 2;
748}
749
750const void *
751ID3::getAlbumArt(size_t *length, String8 *mime) const {
752    *length = 0;
753    mime->setTo("");
754
755    Iterator it(
756            *this,
757            (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) ? "APIC" : "PIC");
758
759    while (!it.done()) {
760        size_t size;
761        const uint8_t *data = it.getData(&size);
762
763        if (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) {
764            uint8_t encoding = data[0];
765            mime->setTo((const char *)&data[1]);
766            size_t mimeLen = strlen((const char *)&data[1]) + 1;
767
768            uint8_t picType = data[1 + mimeLen];
769#if 0
770            if (picType != 0x03) {
771                // Front Cover Art
772                it.next();
773                continue;
774            }
775#endif
776
777            size_t descLen = StringSize(&data[2 + mimeLen], encoding);
778
779            *length = size - 2 - mimeLen - descLen;
780
781            return &data[2 + mimeLen + descLen];
782        } else {
783            uint8_t encoding = data[0];
784
785            if (!memcmp(&data[1], "PNG", 3)) {
786                mime->setTo("image/png");
787            } else if (!memcmp(&data[1], "JPG", 3)) {
788                mime->setTo("image/jpeg");
789            } else if (!memcmp(&data[1], "-->", 3)) {
790                mime->setTo("text/plain");
791            } else {
792                return NULL;
793            }
794
795#if 0
796            uint8_t picType = data[4];
797            if (picType != 0x03) {
798                // Front Cover Art
799                it.next();
800                continue;
801            }
802#endif
803
804            size_t descLen = StringSize(&data[5], encoding);
805
806            *length = size - 5 - descLen;
807
808            return &data[5 + descLen];
809        }
810    }
811
812    return NULL;
813}
814
815bool ID3::parseV1(const sp<DataSource> &source) {
816    const size_t V1_TAG_SIZE = 128;
817
818    off64_t size;
819    if (source->getSize(&size) != OK || size < (off64_t)V1_TAG_SIZE) {
820        return false;
821    }
822
823    mData = (uint8_t *)malloc(V1_TAG_SIZE);
824    if (source->readAt(size - V1_TAG_SIZE, mData, V1_TAG_SIZE)
825            != (ssize_t)V1_TAG_SIZE) {
826        free(mData);
827        mData = NULL;
828
829        return false;
830    }
831
832    if (memcmp("TAG", mData, 3)) {
833        free(mData);
834        mData = NULL;
835
836        return false;
837    }
838
839    mSize = V1_TAG_SIZE;
840    mFirstFrameOffset = 3;
841
842    if (mData[V1_TAG_SIZE - 3] != 0) {
843        mVersion = ID3_V1;
844    } else {
845        mVersion = ID3_V1_1;
846    }
847
848    return true;
849}
850
851}  // namespace android
852