1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ID3"
19#include <utils/Log.h>
20
21#include "../include/ID3.h"
22
23#include <media/stagefright/DataSource.h>
24#include <media/stagefright/MediaDebug.h>
25#include <media/stagefright/Utils.h>
26#include <utils/String8.h>
27#include <byteswap.h>
28
29namespace android {
30
31static const size_t kMaxMetadataSize = 3 * 1024 * 1024;
32
33ID3::ID3(const sp<DataSource> &source)
34    : mIsValid(false),
35      mData(NULL),
36      mSize(0),
37      mFirstFrameOffset(0),
38      mVersion(ID3_UNKNOWN) {
39    mIsValid = parseV2(source);
40
41    if (!mIsValid) {
42        mIsValid = parseV1(source);
43    }
44}
45
46ID3::~ID3() {
47    if (mData) {
48        free(mData);
49        mData = NULL;
50    }
51}
52
53bool ID3::isValid() const {
54    return mIsValid;
55}
56
57ID3::Version ID3::version() const {
58    return mVersion;
59}
60
61// static
62bool ID3::ParseSyncsafeInteger(const uint8_t encoded[4], size_t *x) {
63    *x = 0;
64    for (int32_t i = 0; i < 4; ++i) {
65        if (encoded[i] & 0x80) {
66            return false;
67        }
68
69        *x = ((*x) << 7) | encoded[i];
70    }
71
72    return true;
73}
74
75bool ID3::parseV2(const sp<DataSource> &source) {
76struct id3_header {
77    char id[3];
78    uint8_t version_major;
79    uint8_t version_minor;
80    uint8_t flags;
81    uint8_t enc_size[4];
82    };
83
84    id3_header header;
85    if (source->readAt(
86                0, &header, sizeof(header)) != (ssize_t)sizeof(header)) {
87        return false;
88    }
89
90    if (memcmp(header.id, "ID3", 3)) {
91        return false;
92    }
93
94    if (header.version_major == 0xff || header.version_minor == 0xff) {
95        return false;
96    }
97
98    if (header.version_major == 2) {
99        if (header.flags & 0x3f) {
100            // We only support the 2 high bits, if any of the lower bits are
101            // set, we cannot guarantee to understand the tag format.
102            return false;
103        }
104
105        if (header.flags & 0x40) {
106            // No compression scheme has been decided yet, ignore the
107            // tag if compression is indicated.
108
109            return false;
110        }
111    } else if (header.version_major == 3) {
112        if (header.flags & 0x1f) {
113            // We only support the 3 high bits, if any of the lower bits are
114            // set, we cannot guarantee to understand the tag format.
115            return false;
116        }
117    } else if (header.version_major == 4) {
118        if (header.flags & 0x0f) {
119            // The lower 4 bits are undefined in this spec.
120            return false;
121        }
122    } else {
123        return false;
124    }
125
126    size_t size;
127    if (!ParseSyncsafeInteger(header.enc_size, &size)) {
128        return false;
129    }
130
131    if (size > kMaxMetadataSize) {
132        LOGE("skipping huge ID3 metadata of size %d", size);
133        return false;
134    }
135
136    mData = (uint8_t *)malloc(size);
137
138    if (mData == NULL) {
139        return false;
140    }
141
142    mSize = size;
143
144    if (source->readAt(sizeof(header), mData, mSize) != (ssize_t)mSize) {
145        free(mData);
146        mData = NULL;
147
148        return false;
149    }
150
151    if (header.version_major == 4) {
152        if (!removeUnsynchronizationV2_4()) {
153            free(mData);
154            mData = NULL;
155
156            return false;
157        }
158    } else if (header.flags & 0x80) {
159        LOGV("removing unsynchronization");
160
161        removeUnsynchronization();
162    }
163
164    mFirstFrameOffset = 0;
165    if (header.version_major == 3 && (header.flags & 0x40)) {
166        // Version 2.3 has an optional extended header.
167
168        if (mSize < 4) {
169            free(mData);
170            mData = NULL;
171
172            return false;
173        }
174
175        size_t extendedHeaderSize = U32_AT(&mData[0]) + 4;
176
177        if (extendedHeaderSize > mSize) {
178            free(mData);
179            mData = NULL;
180
181            return false;
182        }
183
184        mFirstFrameOffset = extendedHeaderSize;
185
186        uint16_t extendedFlags = 0;
187        if (extendedHeaderSize >= 6) {
188            extendedFlags = U16_AT(&mData[4]);
189
190            if (extendedHeaderSize >= 10) {
191                size_t paddingSize = U32_AT(&mData[6]);
192
193                if (mFirstFrameOffset + paddingSize > mSize) {
194                    free(mData);
195                    mData = NULL;
196
197                    return false;
198                }
199
200                mSize -= paddingSize;
201            }
202
203            if (extendedFlags & 0x8000) {
204                LOGV("have crc");
205            }
206        }
207    } else if (header.version_major == 4 && (header.flags & 0x40)) {
208        // Version 2.4 has an optional extended header, that's different
209        // from Version 2.3's...
210
211        if (mSize < 4) {
212            free(mData);
213            mData = NULL;
214
215            return false;
216        }
217
218        size_t ext_size;
219        if (!ParseSyncsafeInteger(mData, &ext_size)) {
220            free(mData);
221            mData = NULL;
222
223            return false;
224        }
225
226        if (ext_size < 6 || ext_size > mSize) {
227            free(mData);
228            mData = NULL;
229
230            return false;
231        }
232
233        mFirstFrameOffset = ext_size;
234    }
235
236    if (header.version_major == 2) {
237        mVersion = ID3_V2_2;
238    } else if (header.version_major == 3) {
239        mVersion = ID3_V2_3;
240    } else {
241        CHECK_EQ(header.version_major, 4);
242        mVersion = ID3_V2_4;
243    }
244
245    return true;
246}
247
248void ID3::removeUnsynchronization() {
249    for (size_t i = 0; i + 1 < mSize; ++i) {
250        if (mData[i] == 0xff && mData[i + 1] == 0x00) {
251            memmove(&mData[i + 1], &mData[i + 2], mSize - i - 2);
252            --mSize;
253        }
254    }
255}
256
257static void WriteSyncsafeInteger(uint8_t *dst, size_t x) {
258    for (size_t i = 0; i < 4; ++i) {
259        dst[3 - i] = (x & 0x7f);
260        x >>= 7;
261    }
262}
263
264bool ID3::removeUnsynchronizationV2_4() {
265    size_t oldSize = mSize;
266
267    size_t offset = 0;
268    while (offset + 10 <= mSize) {
269        if (!memcmp(&mData[offset], "\0\0\0\0", 4)) {
270            break;
271        }
272
273        size_t dataSize;
274        if (!ParseSyncsafeInteger(&mData[offset + 4], &dataSize)) {
275            return false;
276        }
277
278        if (offset + dataSize + 10 > mSize) {
279            return false;
280        }
281
282        uint16_t flags = U16_AT(&mData[offset + 8]);
283        uint16_t prevFlags = flags;
284
285        if (flags & 1) {
286            // Strip data length indicator
287
288            memmove(&mData[offset + 10], &mData[offset + 14], mSize - offset - 14);
289            mSize -= 4;
290            dataSize -= 4;
291
292            flags &= ~1;
293        }
294
295        if (flags & 2) {
296            // Unsynchronization added.
297
298            for (size_t i = 0; i + 1 < dataSize; ++i) {
299                if (mData[offset + 10 + i] == 0xff
300                        && mData[offset + 11 + i] == 0x00) {
301                    memmove(&mData[offset + 11 + i], &mData[offset + 12 + i],
302                            mSize - offset - 12 - i);
303                    --mSize;
304                    --dataSize;
305                }
306            }
307
308            flags &= ~2;
309        }
310
311        if (flags != prevFlags) {
312            WriteSyncsafeInteger(&mData[offset + 4], dataSize);
313            mData[offset + 8] = flags >> 8;
314            mData[offset + 9] = flags & 0xff;
315        }
316
317        offset += 10 + dataSize;
318    }
319
320    memset(&mData[mSize], 0, oldSize - mSize);
321
322    return true;
323}
324
325ID3::Iterator::Iterator(const ID3 &parent, const char *id)
326    : mParent(parent),
327      mID(NULL),
328      mOffset(mParent.mFirstFrameOffset),
329      mFrameData(NULL),
330      mFrameSize(0) {
331    if (id) {
332        mID = strdup(id);
333    }
334
335    findFrame();
336}
337
338ID3::Iterator::~Iterator() {
339    if (mID) {
340        free(mID);
341        mID = NULL;
342    }
343}
344
345bool ID3::Iterator::done() const {
346    return mFrameData == NULL;
347}
348
349void ID3::Iterator::next() {
350    if (mFrameData == NULL) {
351        return;
352    }
353
354    mOffset += mFrameSize;
355
356    findFrame();
357}
358
359void ID3::Iterator::getID(String8 *id) const {
360    id->setTo("");
361
362    if (mFrameData == NULL) {
363        return;
364    }
365
366    if (mParent.mVersion == ID3_V2_2) {
367        id->setTo((const char *)&mParent.mData[mOffset], 3);
368    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
369        id->setTo((const char *)&mParent.mData[mOffset], 4);
370    } else {
371        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
372
373        switch (mOffset) {
374            case 3:
375                id->setTo("TT2");
376                break;
377            case 33:
378                id->setTo("TP1");
379                break;
380            case 63:
381                id->setTo("TAL");
382                break;
383            case 93:
384                id->setTo("TYE");
385                break;
386            case 97:
387                id->setTo("COM");
388                break;
389            case 126:
390                id->setTo("TRK");
391                break;
392            case 127:
393                id->setTo("TCO");
394                break;
395            default:
396                CHECK(!"should not be here.");
397                break;
398        }
399    }
400}
401
402static void convertISO8859ToString8(
403        const uint8_t *data, size_t size,
404        String8 *s) {
405    size_t utf8len = 0;
406    for (size_t i = 0; i < size; ++i) {
407        if (data[i] == '\0') {
408            size = i;
409            break;
410        } else if (data[i] < 0x80) {
411            ++utf8len;
412        } else {
413            utf8len += 2;
414        }
415    }
416
417    if (utf8len == size) {
418        // Only ASCII characters present.
419
420        s->setTo((const char *)data, size);
421        return;
422    }
423
424    char *tmp = new char[utf8len];
425    char *ptr = tmp;
426    for (size_t i = 0; i < size; ++i) {
427        if (data[i] == '\0') {
428            break;
429        } else if (data[i] < 0x80) {
430            *ptr++ = data[i];
431        } else if (data[i] < 0xc0) {
432            *ptr++ = 0xc2;
433            *ptr++ = data[i];
434        } else {
435            *ptr++ = 0xc3;
436            *ptr++ = data[i] - 64;
437        }
438    }
439
440    s->setTo(tmp, utf8len);
441
442    delete[] tmp;
443    tmp = NULL;
444}
445
446void ID3::Iterator::getString(String8 *id) const {
447    id->setTo("");
448
449    if (mFrameData == NULL) {
450        return;
451    }
452
453    if (mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1) {
454        if (mOffset == 126 || mOffset == 127) {
455            // Special treatment for the track number and genre.
456            char tmp[16];
457            sprintf(tmp, "%d", (int)*mFrameData);
458
459            id->setTo(tmp);
460            return;
461        }
462
463        convertISO8859ToString8(mFrameData, mFrameSize, id);
464        return;
465    }
466
467    size_t n = mFrameSize - getHeaderLength() - 1;
468
469    if (*mFrameData == 0x00) {
470        // ISO 8859-1
471        convertISO8859ToString8(mFrameData + 1, n, id);
472    } else if (*mFrameData == 0x03) {
473        // UTF-8
474        id->setTo((const char *)(mFrameData + 1), n);
475    } else if (*mFrameData == 0x02) {
476        // UTF-16 BE, no byte order mark.
477        // API wants number of characters, not number of bytes...
478        int len = n / 2;
479        const char16_t *framedata = (const char16_t *) (mFrameData + 1);
480        char16_t *framedatacopy = NULL;
481#if BYTE_ORDER == LITTLE_ENDIAN
482        framedatacopy = new char16_t[len];
483        for (int i = 0; i < len; i++) {
484            framedatacopy[i] = bswap_16(framedata[i]);
485        }
486        framedata = framedatacopy;
487#endif
488        id->setTo(framedata, len);
489        if (framedatacopy != NULL) {
490            delete[] framedatacopy;
491        }
492    } else {
493        // UCS-2
494        // API wants number of characters, not number of bytes...
495        int len = n / 2;
496        const char16_t *framedata = (const char16_t *) (mFrameData + 1);
497        char16_t *framedatacopy = NULL;
498        if (*framedata == 0xfffe) {
499            // endianness marker doesn't match host endianness, convert
500            framedatacopy = new char16_t[len];
501            for (int i = 0; i < len; i++) {
502                framedatacopy[i] = bswap_16(framedata[i]);
503            }
504            framedata = framedatacopy;
505        }
506        // If the string starts with an endianness marker, skip it
507        if (*framedata == 0xfeff) {
508            framedata++;
509            len--;
510        }
511        id->setTo(framedata, len);
512        if (framedatacopy != NULL) {
513            delete[] framedatacopy;
514        }
515    }
516}
517
518const uint8_t *ID3::Iterator::getData(size_t *length) const {
519    *length = 0;
520
521    if (mFrameData == NULL) {
522        return NULL;
523    }
524
525    *length = mFrameSize - getHeaderLength();
526
527    return mFrameData;
528}
529
530size_t ID3::Iterator::getHeaderLength() const {
531    if (mParent.mVersion == ID3_V2_2) {
532        return 6;
533    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
534        return 10;
535    } else {
536        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
537        return 0;
538    }
539}
540
541void ID3::Iterator::findFrame() {
542    for (;;) {
543        mFrameData = NULL;
544        mFrameSize = 0;
545
546        if (mParent.mVersion == ID3_V2_2) {
547            if (mOffset + 6 > mParent.mSize) {
548                return;
549            }
550
551            if (!memcmp(&mParent.mData[mOffset], "\0\0\0", 3)) {
552                return;
553            }
554
555            mFrameSize =
556                (mParent.mData[mOffset + 3] << 16)
557                | (mParent.mData[mOffset + 4] << 8)
558                | mParent.mData[mOffset + 5];
559
560            mFrameSize += 6;
561
562            if (mOffset + mFrameSize > mParent.mSize) {
563                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
564                     mOffset, mFrameSize, mParent.mSize - mOffset - 6);
565                return;
566            }
567
568            mFrameData = &mParent.mData[mOffset + 6];
569
570            if (!mID) {
571                break;
572            }
573
574            char id[4];
575            memcpy(id, &mParent.mData[mOffset], 3);
576            id[3] = '\0';
577
578            if (!strcmp(id, mID)) {
579                break;
580            }
581        } else if (mParent.mVersion == ID3_V2_3
582                || mParent.mVersion == ID3_V2_4) {
583            if (mOffset + 10 > mParent.mSize) {
584                return;
585            }
586
587            if (!memcmp(&mParent.mData[mOffset], "\0\0\0\0", 4)) {
588                return;
589            }
590
591            size_t baseSize;
592            if (mParent.mVersion == ID3_V2_4) {
593                if (!ParseSyncsafeInteger(
594                            &mParent.mData[mOffset + 4], &baseSize)) {
595                    return;
596                }
597            } else {
598                baseSize = U32_AT(&mParent.mData[mOffset + 4]);
599            }
600
601            mFrameSize = 10 + baseSize;
602
603            if (mOffset + mFrameSize > mParent.mSize) {
604                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
605                     mOffset, mFrameSize, mParent.mSize - mOffset - 10);
606                return;
607            }
608
609            uint16_t flags = U16_AT(&mParent.mData[mOffset + 8]);
610
611            if ((mParent.mVersion == ID3_V2_4 && (flags & 0x000c))
612                || (mParent.mVersion == ID3_V2_3 && (flags & 0x00c0))) {
613                // Compression or encryption are not supported at this time.
614                // Per-frame unsynchronization and data-length indicator
615                // have already been taken care of.
616
617                LOGV("Skipping unsupported frame (compression, encryption "
618                     "or per-frame unsynchronization flagged");
619
620                mOffset += mFrameSize;
621                continue;
622            }
623
624            mFrameData = &mParent.mData[mOffset + 10];
625
626            if (!mID) {
627                break;
628            }
629
630            char id[5];
631            memcpy(id, &mParent.mData[mOffset], 4);
632            id[4] = '\0';
633
634            if (!strcmp(id, mID)) {
635                break;
636            }
637        } else {
638            CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
639
640            if (mOffset >= mParent.mSize) {
641                return;
642            }
643
644            mFrameData = &mParent.mData[mOffset];
645
646            switch (mOffset) {
647                case 3:
648                case 33:
649                case 63:
650                    mFrameSize = 30;
651                    break;
652                case 93:
653                    mFrameSize = 4;
654                    break;
655                case 97:
656                    if (mParent.mVersion == ID3_V1) {
657                        mFrameSize = 30;
658                    } else {
659                        mFrameSize = 29;
660                    }
661                    break;
662                case 126:
663                    mFrameSize = 1;
664                    break;
665                case 127:
666                    mFrameSize = 1;
667                    break;
668                default:
669                    CHECK(!"Should not be here, invalid offset.");
670                    break;
671            }
672
673            if (!mID) {
674                break;
675            }
676
677            String8 id;
678            getID(&id);
679
680            if (id == mID) {
681                break;
682            }
683        }
684
685        mOffset += mFrameSize;
686    }
687}
688
689static size_t StringSize(const uint8_t *start, uint8_t encoding) {
690    if (encoding == 0x00 || encoding == 0x03) {
691        // ISO 8859-1 or UTF-8
692        return strlen((const char *)start) + 1;
693    }
694
695    // UCS-2
696    size_t n = 0;
697    while (start[n] != '\0' || start[n + 1] != '\0') {
698        n += 2;
699    }
700
701    return n;
702}
703
704const void *
705ID3::getAlbumArt(size_t *length, String8 *mime) const {
706    *length = 0;
707    mime->setTo("");
708
709    Iterator it(
710            *this,
711            (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) ? "APIC" : "PIC");
712
713    while (!it.done()) {
714        size_t size;
715        const uint8_t *data = it.getData(&size);
716
717        if (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) {
718            uint8_t encoding = data[0];
719            mime->setTo((const char *)&data[1]);
720            size_t mimeLen = strlen((const char *)&data[1]) + 1;
721
722            uint8_t picType = data[1 + mimeLen];
723#if 0
724            if (picType != 0x03) {
725                // Front Cover Art
726                it.next();
727                continue;
728            }
729#endif
730
731            size_t descLen = StringSize(&data[2 + mimeLen], encoding);
732
733            *length = size - 2 - mimeLen - descLen;
734
735            return &data[2 + mimeLen + descLen];
736        } else {
737            uint8_t encoding = data[0];
738
739            if (!memcmp(&data[1], "PNG", 3)) {
740                mime->setTo("image/png");
741            } else if (!memcmp(&data[1], "JPG", 3)) {
742                mime->setTo("image/jpeg");
743            } else if (!memcmp(&data[1], "-->", 3)) {
744                mime->setTo("text/plain");
745            } else {
746                return NULL;
747            }
748
749#if 0
750            uint8_t picType = data[4];
751            if (picType != 0x03) {
752                // Front Cover Art
753                it.next();
754                continue;
755            }
756#endif
757
758            size_t descLen = StringSize(&data[5], encoding);
759
760            *length = size - 5 - descLen;
761
762            return &data[5 + descLen];
763        }
764    }
765
766    return NULL;
767}
768
769bool ID3::parseV1(const sp<DataSource> &source) {
770    const size_t V1_TAG_SIZE = 128;
771
772    off_t size;
773    if (source->getSize(&size) != OK || size < (off_t)V1_TAG_SIZE) {
774        return false;
775    }
776
777    mData = (uint8_t *)malloc(V1_TAG_SIZE);
778    if (source->readAt(size - V1_TAG_SIZE, mData, V1_TAG_SIZE)
779            != (ssize_t)V1_TAG_SIZE) {
780        free(mData);
781        mData = NULL;
782
783        return false;
784    }
785
786    if (memcmp("TAG", mData, 3)) {
787        free(mData);
788        mData = NULL;
789
790        return false;
791    }
792
793    mSize = V1_TAG_SIZE;
794    mFirstFrameOffset = 3;
795
796    if (mData[V1_TAG_SIZE - 3] != 0) {
797        mVersion = ID3_V1;
798    } else {
799        mVersion = ID3_V1_1;
800    }
801
802    return true;
803}
804
805}  // namespace android
806