ID3.cpp revision bebd11b5a406bc4243cb7bd55f6849841bf911a7
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ID3"
19#include <utils/Log.h>
20
21#include "../include/ID3.h"
22
23#include <media/stagefright/DataSource.h>
24#include <media/stagefright/MediaDebug.h>
25#include <media/stagefright/Utils.h>
26#include <utils/String8.h>
27#include <byteswap.h>
28
29namespace android {
30
31static const size_t kMaxMetadataSize = 3 * 1024 * 1024;
32
33ID3::ID3(const sp<DataSource> &source)
34    : mIsValid(false),
35      mData(NULL),
36      mSize(0),
37      mFirstFrameOffset(0),
38      mVersion(ID3_UNKNOWN) {
39    mIsValid = parseV2(source);
40
41    if (!mIsValid) {
42        mIsValid = parseV1(source);
43    }
44}
45
46ID3::~ID3() {
47    if (mData) {
48        free(mData);
49        mData = NULL;
50    }
51}
52
53bool ID3::isValid() const {
54    return mIsValid;
55}
56
57ID3::Version ID3::version() const {
58    return mVersion;
59}
60
61// static
62bool ID3::ParseSyncsafeInteger(const uint8_t encoded[4], size_t *x) {
63    *x = 0;
64    for (int32_t i = 0; i < 4; ++i) {
65        if (encoded[i] & 0x80) {
66            return false;
67        }
68
69        *x = ((*x) << 7) | encoded[i];
70    }
71
72    return true;
73}
74
75bool ID3::parseV2(const sp<DataSource> &source) {
76struct id3_header {
77    char id[3];
78    uint8_t version_major;
79    uint8_t version_minor;
80    uint8_t flags;
81    uint8_t enc_size[4];
82    };
83
84    id3_header header;
85    if (source->readAt(
86                0, &header, sizeof(header)) != (ssize_t)sizeof(header)) {
87        return false;
88    }
89
90    if (memcmp(header.id, "ID3", 3)) {
91        return false;
92    }
93
94    if (header.version_major == 0xff || header.version_minor == 0xff) {
95        return false;
96    }
97
98    if (header.version_major == 2) {
99        if (header.flags & 0x3f) {
100            // We only support the 2 high bits, if any of the lower bits are
101            // set, we cannot guarantee to understand the tag format.
102            return false;
103        }
104
105        if (header.flags & 0x40) {
106            // No compression scheme has been decided yet, ignore the
107            // tag if compression is indicated.
108
109            return false;
110        }
111    } else if (header.version_major == 3) {
112        if (header.flags & 0x1f) {
113            // We only support the 3 high bits, if any of the lower bits are
114            // set, we cannot guarantee to understand the tag format.
115            return false;
116        }
117    } else if (header.version_major == 4) {
118        if (header.flags & 0x0f) {
119            // The lower 4 bits are undefined in this spec.
120            return false;
121        }
122    } else {
123        return false;
124    }
125
126    size_t size;
127    if (!ParseSyncsafeInteger(header.enc_size, &size)) {
128        return false;
129    }
130
131    if (size > kMaxMetadataSize) {
132        LOGE("skipping huge ID3 metadata of size %d", size);
133        return false;
134    }
135
136    mData = (uint8_t *)malloc(size);
137
138    if (mData == NULL) {
139        return false;
140    }
141
142    mSize = size;
143
144    if (source->readAt(sizeof(header), mData, mSize) != (ssize_t)mSize) {
145        return false;
146    }
147
148    if (header.flags & 0x80) {
149        LOGV("removing unsynchronization");
150        removeUnsynchronization();
151    }
152
153    mFirstFrameOffset = 0;
154    if (header.version_major == 3 && (header.flags & 0x40)) {
155        // Version 2.3 has an optional extended header.
156
157        if (mSize < 4) {
158            free(mData);
159            mData = NULL;
160
161            return false;
162        }
163
164        size_t extendedHeaderSize = U32_AT(&mData[0]) + 4;
165
166        if (extendedHeaderSize > mSize) {
167            free(mData);
168            mData = NULL;
169
170            return false;
171        }
172
173        mFirstFrameOffset = extendedHeaderSize;
174
175        uint16_t extendedFlags = 0;
176        if (extendedHeaderSize >= 6) {
177            extendedFlags = U16_AT(&mData[4]);
178
179            if (extendedHeaderSize >= 10) {
180                size_t paddingSize = U32_AT(&mData[6]);
181
182                if (mFirstFrameOffset + paddingSize > mSize) {
183                    free(mData);
184                    mData = NULL;
185
186                    return false;
187                }
188
189                mSize -= paddingSize;
190            }
191
192            if (extendedFlags & 0x8000) {
193                LOGV("have crc");
194            }
195        }
196    } else if (header.version_major == 4 && (header.flags & 0x40)) {
197        // Version 2.4 has an optional extended header, that's different
198        // from Version 2.3's...
199
200        if (mSize < 4) {
201            free(mData);
202            mData = NULL;
203
204            return false;
205        }
206
207        size_t ext_size;
208        if (!ParseSyncsafeInteger(mData, &ext_size)) {
209            free(mData);
210            mData = NULL;
211
212            return false;
213        }
214
215        if (ext_size < 6 || ext_size > mSize) {
216            free(mData);
217            mData = NULL;
218
219            return false;
220        }
221
222        mFirstFrameOffset = ext_size;
223    }
224
225    if (header.version_major == 2) {
226        mVersion = ID3_V2_2;
227    } else if (header.version_major == 3) {
228        mVersion = ID3_V2_3;
229    } else {
230        CHECK_EQ(header.version_major, 4);
231        mVersion = ID3_V2_4;
232    }
233
234    return true;
235}
236
237void ID3::removeUnsynchronization() {
238    for (size_t i = 0; i + 1 < mSize; ++i) {
239        if (mData[i] == 0xff && mData[i + 1] == 0x00) {
240            memmove(&mData[i + 1], &mData[i + 2], mSize - i - 2);
241            --mSize;
242        }
243    }
244}
245
246ID3::Iterator::Iterator(const ID3 &parent, const char *id)
247    : mParent(parent),
248      mID(NULL),
249      mOffset(mParent.mFirstFrameOffset),
250      mFrameData(NULL),
251      mFrameSize(0) {
252    if (id) {
253        mID = strdup(id);
254    }
255
256    findFrame();
257}
258
259ID3::Iterator::~Iterator() {
260    if (mID) {
261        free(mID);
262        mID = NULL;
263    }
264}
265
266bool ID3::Iterator::done() const {
267    return mFrameData == NULL;
268}
269
270void ID3::Iterator::next() {
271    if (mFrameData == NULL) {
272        return;
273    }
274
275    mOffset += mFrameSize;
276
277    findFrame();
278}
279
280void ID3::Iterator::getID(String8 *id) const {
281    id->setTo("");
282
283    if (mFrameData == NULL) {
284        return;
285    }
286
287    if (mParent.mVersion == ID3_V2_2) {
288        id->setTo((const char *)&mParent.mData[mOffset], 3);
289    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
290        id->setTo((const char *)&mParent.mData[mOffset], 4);
291    } else {
292        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
293
294        switch (mOffset) {
295            case 3:
296                id->setTo("TT2");
297                break;
298            case 33:
299                id->setTo("TP1");
300                break;
301            case 63:
302                id->setTo("TAL");
303                break;
304            case 93:
305                id->setTo("TYE");
306                break;
307            case 97:
308                id->setTo("COM");
309                break;
310            case 126:
311                id->setTo("TRK");
312                break;
313            case 127:
314                id->setTo("TCO");
315                break;
316            default:
317                CHECK(!"should not be here.");
318                break;
319        }
320    }
321}
322
323static void convertISO8859ToString8(
324        const uint8_t *data, size_t size,
325        String8 *s) {
326    size_t utf8len = 0;
327    for (size_t i = 0; i < size; ++i) {
328        if (data[i] == '\0') {
329            size = i;
330            break;
331        } else if (data[i] < 0x80) {
332            ++utf8len;
333        } else {
334            utf8len += 2;
335        }
336    }
337
338    if (utf8len == size) {
339        // Only ASCII characters present.
340
341        s->setTo((const char *)data, size);
342        return;
343    }
344
345    char *tmp = new char[utf8len];
346    char *ptr = tmp;
347    for (size_t i = 0; i < size; ++i) {
348        if (data[i] == '\0') {
349            break;
350        } else if (data[i] < 0x80) {
351            *ptr++ = data[i];
352        } else if (data[i] < 0xc0) {
353            *ptr++ = 0xc2;
354            *ptr++ = data[i];
355        } else {
356            *ptr++ = 0xc3;
357            *ptr++ = data[i] - 64;
358        }
359    }
360
361    s->setTo(tmp, utf8len);
362
363    delete[] tmp;
364    tmp = NULL;
365}
366
367void ID3::Iterator::getString(String8 *id) const {
368    id->setTo("");
369
370    if (mFrameData == NULL) {
371        return;
372    }
373
374    if (mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1) {
375        if (mOffset == 126 || mOffset == 127) {
376            // Special treatment for the track number and genre.
377            char tmp[16];
378            sprintf(tmp, "%d", (int)*mFrameData);
379
380            id->setTo(tmp);
381            return;
382        }
383
384        convertISO8859ToString8(mFrameData, mFrameSize, id);
385        return;
386    }
387
388    size_t n = mFrameSize - getHeaderLength() - 1;
389
390    if (*mFrameData == 0x00) {
391        // ISO 8859-1
392        convertISO8859ToString8(mFrameData + 1, n, id);
393    } else if (*mFrameData == 0x03) {
394        // UTF-8
395        id->setTo((const char *)(mFrameData + 1), n);
396    } else if (*mFrameData == 0x02) {
397        // UTF-16 BE, no byte order mark.
398        // API wants number of characters, not number of bytes...
399        int len = n / 2;
400        const char16_t *framedata = (const char16_t *) (mFrameData + 1);
401        char16_t *framedatacopy = NULL;
402#if BYTE_ORDER == LITTLE_ENDIAN
403        framedatacopy = new char16_t[len];
404        for (int i = 0; i < len; i++) {
405            framedatacopy[i] = bswap_16(framedata[i]);
406        }
407        framedata = framedatacopy;
408#endif
409        id->setTo(framedata, len);
410        if (framedatacopy != NULL) {
411            delete[] framedatacopy;
412        }
413    } else {
414        // UCS-2
415        // API wants number of characters, not number of bytes...
416        int len = n / 2;
417        const char16_t *framedata = (const char16_t *) (mFrameData + 1);
418        char16_t *framedatacopy = NULL;
419        if (*framedata == 0xfffe) {
420            // endianness marker doesn't match host endianness, convert
421            framedatacopy = new char16_t[len];
422            for (int i = 0; i < len; i++) {
423                framedatacopy[i] = bswap_16(framedata[i]);
424            }
425            framedata = framedatacopy;
426        }
427        // If the string starts with an endianness marker, skip it
428        if (*framedata == 0xfeff) {
429            framedata++;
430            len--;
431        }
432        id->setTo(framedata, len);
433        if (framedatacopy != NULL) {
434            delete[] framedatacopy;
435        }
436    }
437}
438
439const uint8_t *ID3::Iterator::getData(size_t *length) const {
440    *length = 0;
441
442    if (mFrameData == NULL) {
443        return NULL;
444    }
445
446    *length = mFrameSize - getHeaderLength();
447
448    return mFrameData;
449}
450
451size_t ID3::Iterator::getHeaderLength() const {
452    if (mParent.mVersion == ID3_V2_2) {
453        return 6;
454    } else if (mParent.mVersion == ID3_V2_3 || mParent.mVersion == ID3_V2_4) {
455        return 10;
456    } else {
457        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
458        return 0;
459    }
460}
461
462void ID3::Iterator::findFrame() {
463    for (;;) {
464        mFrameData = NULL;
465        mFrameSize = 0;
466
467        if (mParent.mVersion == ID3_V2_2) {
468            if (mOffset + 6 > mParent.mSize) {
469                return;
470            }
471
472            if (!memcmp(&mParent.mData[mOffset], "\0\0\0", 3)) {
473                return;
474            }
475
476            mFrameSize =
477                (mParent.mData[mOffset + 3] << 16)
478                | (mParent.mData[mOffset + 4] << 8)
479                | mParent.mData[mOffset + 5];
480
481            mFrameSize += 6;
482
483            if (mOffset + mFrameSize > mParent.mSize) {
484                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
485                     mOffset, mFrameSize, mParent.mSize - mOffset - 6);
486                return;
487            }
488
489            mFrameData = &mParent.mData[mOffset + 6];
490
491            if (!mID) {
492                break;
493            }
494
495            char id[4];
496            memcpy(id, &mParent.mData[mOffset], 3);
497            id[3] = '\0';
498
499            if (!strcmp(id, mID)) {
500                break;
501            }
502        } else if (mParent.mVersion == ID3_V2_3
503                || mParent.mVersion == ID3_V2_4) {
504            if (mOffset + 10 > mParent.mSize) {
505                return;
506            }
507
508            if (!memcmp(&mParent.mData[mOffset], "\0\0\0\0", 4)) {
509                return;
510            }
511
512            size_t baseSize;
513            if (mParent.mVersion == ID3_V2_4) {
514                if (!ParseSyncsafeInteger(
515                            &mParent.mData[mOffset + 4], &baseSize)) {
516                    return;
517                }
518            } else {
519                baseSize = U32_AT(&mParent.mData[mOffset + 4]);
520            }
521
522            mFrameSize = 10 + baseSize;
523
524            if (mOffset + mFrameSize > mParent.mSize) {
525                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
526                     mOffset, mFrameSize, mParent.mSize - mOffset - 10);
527                return;
528            }
529
530            uint16_t flags = U16_AT(&mParent.mData[mOffset + 8]);
531
532            if ((mParent.mVersion == ID3_V2_4 && (flags & 0x000e))
533                || (mParent.mVersion == ID3_V2_3 && (flags & 0x00c0))) {
534                // Compression, Encryption or per-Frame unsynchronization
535                // are not supported at this time.
536
537                LOGV("Skipping unsupported frame (compression, encryption "
538                     "or per-frame unsynchronization flagged");
539
540                mOffset += mFrameSize;
541                continue;
542            }
543
544            mFrameData = &mParent.mData[mOffset + 10];
545
546            if (!mID) {
547                break;
548            }
549
550            char id[5];
551            memcpy(id, &mParent.mData[mOffset], 4);
552            id[4] = '\0';
553
554            if (!strcmp(id, mID)) {
555                break;
556            }
557        } else {
558            CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
559
560            if (mOffset >= mParent.mSize) {
561                return;
562            }
563
564            mFrameData = &mParent.mData[mOffset];
565
566            switch (mOffset) {
567                case 3:
568                case 33:
569                case 63:
570                    mFrameSize = 30;
571                    break;
572                case 93:
573                    mFrameSize = 4;
574                    break;
575                case 97:
576                    if (mParent.mVersion == ID3_V1) {
577                        mFrameSize = 30;
578                    } else {
579                        mFrameSize = 29;
580                    }
581                    break;
582                case 126:
583                    mFrameSize = 1;
584                    break;
585                case 127:
586                    mFrameSize = 1;
587                    break;
588                default:
589                    CHECK(!"Should not be here, invalid offset.");
590                    break;
591            }
592
593            if (!mID) {
594                break;
595            }
596
597            String8 id;
598            getID(&id);
599
600            if (id == mID) {
601                break;
602            }
603        }
604
605        mOffset += mFrameSize;
606    }
607}
608
609static size_t StringSize(const uint8_t *start, uint8_t encoding) {
610    if (encoding == 0x00 || encoding == 0x03) {
611        // ISO 8859-1 or UTF-8
612        return strlen((const char *)start) + 1;
613    }
614
615    // UCS-2
616    size_t n = 0;
617    while (start[n] != '\0' || start[n + 1] != '\0') {
618        n += 2;
619    }
620
621    return n;
622}
623
624const void *
625ID3::getAlbumArt(size_t *length, String8 *mime) const {
626    *length = 0;
627    mime->setTo("");
628
629    Iterator it(
630            *this,
631            (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) ? "APIC" : "PIC");
632
633    while (!it.done()) {
634        size_t size;
635        const uint8_t *data = it.getData(&size);
636
637        if (mVersion == ID3_V2_3 || mVersion == ID3_V2_4) {
638            uint8_t encoding = data[0];
639            mime->setTo((const char *)&data[1]);
640            size_t mimeLen = strlen((const char *)&data[1]) + 1;
641
642            uint8_t picType = data[1 + mimeLen];
643#if 0
644            if (picType != 0x03) {
645                // Front Cover Art
646                it.next();
647                continue;
648            }
649#endif
650
651            size_t descLen = StringSize(&data[2 + mimeLen], encoding);
652
653            *length = size - 2 - mimeLen - descLen;
654
655            return &data[2 + mimeLen + descLen];
656        } else {
657            uint8_t encoding = data[0];
658
659            if (!memcmp(&data[1], "PNG", 3)) {
660                mime->setTo("image/png");
661            } else if (!memcmp(&data[1], "JPG", 3)) {
662                mime->setTo("image/jpeg");
663            } else if (!memcmp(&data[1], "-->", 3)) {
664                mime->setTo("text/plain");
665            } else {
666                return NULL;
667            }
668
669#if 0
670            uint8_t picType = data[4];
671            if (picType != 0x03) {
672                // Front Cover Art
673                it.next();
674                continue;
675            }
676#endif
677
678            size_t descLen = StringSize(&data[5], encoding);
679
680            *length = size - 5 - descLen;
681
682            return &data[5 + descLen];
683        }
684    }
685
686    return NULL;
687}
688
689bool ID3::parseV1(const sp<DataSource> &source) {
690    const size_t V1_TAG_SIZE = 128;
691
692    off_t size;
693    if (source->getSize(&size) != OK || size < (off_t)V1_TAG_SIZE) {
694        return false;
695    }
696
697    mData = (uint8_t *)malloc(V1_TAG_SIZE);
698    if (source->readAt(size - V1_TAG_SIZE, mData, V1_TAG_SIZE)
699            != (ssize_t)V1_TAG_SIZE) {
700        free(mData);
701        mData = NULL;
702
703        return false;
704    }
705
706    if (memcmp("TAG", mData, 3)) {
707        free(mData);
708        mData = NULL;
709
710        return false;
711    }
712
713    mSize = V1_TAG_SIZE;
714    mFirstFrameOffset = 3;
715
716    if (mData[V1_TAG_SIZE - 3] != 0) {
717        mVersion = ID3_V1;
718    } else {
719        mVersion = ID3_V1_1;
720    }
721
722    return true;
723}
724
725}  // namespace android
726