ID3.cpp revision 65997f022fa3f557ba864ecc99a1aa33df54c2db
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//#define LOG_NDEBUG 0
18#define LOG_TAG "ID3"
19#include <utils/Log.h>
20
21#include "../include/ID3.h"
22
23#include <media/stagefright/DataSource.h>
24#include <media/stagefright/MediaDebug.h>
25#include <media/stagefright/Utils.h>
26#include <utils/String8.h>
27#include <byteswap.h>
28
29namespace android {
30
31static const size_t kMaxMetadataSize = 3 * 1024 * 1024;
32
33ID3::ID3(const sp<DataSource> &source)
34    : mIsValid(false),
35      mData(NULL),
36      mSize(0),
37      mFirstFrameOffset(0),
38      mVersion(ID3_UNKNOWN) {
39    mIsValid = parseV2(source);
40
41    if (!mIsValid) {
42        mIsValid = parseV1(source);
43    }
44}
45
46ID3::~ID3() {
47    if (mData) {
48        free(mData);
49        mData = NULL;
50    }
51}
52
53bool ID3::isValid() const {
54    return mIsValid;
55}
56
57ID3::Version ID3::version() const {
58    return mVersion;
59}
60
61bool ID3::parseV2(const sp<DataSource> &source) {
62    struct id3_header {
63        char id[3];
64        uint8_t version_major;
65        uint8_t version_minor;
66        uint8_t flags;
67        uint8_t enc_size[4];
68    };
69
70    id3_header header;
71    if (source->readAt(
72                0, &header, sizeof(header)) != (ssize_t)sizeof(header)) {
73        return false;
74    }
75
76    if (memcmp(header.id, "ID3", 3)) {
77        return false;
78    }
79
80    if (header.version_major == 0xff || header.version_minor == 0xff) {
81        return false;
82    }
83
84    if (header.version_major == 2) {
85        if (header.flags & 0x3f) {
86            // We only support the 2 high bits, if any of the lower bits are
87            // set, we cannot guarantee to understand the tag format.
88            return false;
89        }
90
91        if (header.flags & 0x40) {
92            // No compression scheme has been decided yet, ignore the
93            // tag if compression is indicated.
94
95            return false;
96        }
97    } else if (header.version_major == 3) {
98        if (header.flags & 0x1f) {
99            // We only support the 3 high bits, if any of the lower bits are
100            // set, we cannot guarantee to understand the tag format.
101            return false;
102        }
103    } else {
104        return false;
105    }
106
107    size_t size = 0;
108    for (int32_t i = 0; i < 4; ++i) {
109        if (header.enc_size[i] & 0x80) {
110            return false;
111        }
112
113        size = (size << 7) | header.enc_size[i];
114    }
115
116    if (size > kMaxMetadataSize) {
117        LOGE("skipping huge ID3 metadata of size %d", size);
118        return false;
119    }
120
121    mData = (uint8_t *)malloc(size);
122
123    if (mData == NULL) {
124        return false;
125    }
126
127    mSize = size;
128
129    if (source->readAt(sizeof(header), mData, mSize) != (ssize_t)mSize) {
130        return false;
131    }
132
133    if (header.flags & 0x80) {
134        LOGV("removing unsynchronization");
135        removeUnsynchronization();
136    }
137
138    mFirstFrameOffset = 0;
139    if (header.version_major == 3 && (header.flags & 0x40)) {
140        // Version 2.3 has an optional extended header.
141
142        if (mSize < 4) {
143            free(mData);
144            mData = NULL;
145
146            return false;
147        }
148
149        size_t extendedHeaderSize = U32_AT(&mData[0]) + 4;
150
151        if (extendedHeaderSize > mSize) {
152            free(mData);
153            mData = NULL;
154
155            return false;
156        }
157
158        mFirstFrameOffset = extendedHeaderSize;
159
160        uint16_t extendedFlags = 0;
161        if (extendedHeaderSize >= 6) {
162            extendedFlags = U16_AT(&mData[4]);
163
164            if (extendedHeaderSize >= 10) {
165                size_t paddingSize = U32_AT(&mData[6]);
166
167                if (mFirstFrameOffset + paddingSize > mSize) {
168                    free(mData);
169                    mData = NULL;
170
171                    return false;
172                }
173
174                mSize -= paddingSize;
175            }
176
177            if (extendedFlags & 0x8000) {
178                LOGV("have crc");
179            }
180        }
181    }
182
183    if (header.version_major == 2) {
184        mVersion = ID3_V2_2;
185    } else {
186        CHECK_EQ(header.version_major, 3);
187        mVersion = ID3_V2_3;
188    }
189
190    return true;
191}
192
193void ID3::removeUnsynchronization() {
194    for (size_t i = 0; i + 1 < mSize; ++i) {
195        if (mData[i] == 0xff && mData[i + 1] == 0x00) {
196            memmove(&mData[i + 1], &mData[i + 2], mSize - i - 2);
197            --mSize;
198        }
199    }
200}
201
202ID3::Iterator::Iterator(const ID3 &parent, const char *id)
203    : mParent(parent),
204      mID(NULL),
205      mOffset(mParent.mFirstFrameOffset),
206      mFrameData(NULL),
207      mFrameSize(0) {
208    if (id) {
209        mID = strdup(id);
210    }
211
212    findFrame();
213}
214
215ID3::Iterator::~Iterator() {
216    if (mID) {
217        free(mID);
218        mID = NULL;
219    }
220}
221
222bool ID3::Iterator::done() const {
223    return mFrameData == NULL;
224}
225
226void ID3::Iterator::next() {
227    if (mFrameData == NULL) {
228        return;
229    }
230
231    mOffset += mFrameSize;
232
233    findFrame();
234}
235
236void ID3::Iterator::getID(String8 *id) const {
237    id->setTo("");
238
239    if (mFrameData == NULL) {
240        return;
241    }
242
243    if (mParent.mVersion == ID3_V2_2) {
244        id->setTo((const char *)&mParent.mData[mOffset], 3);
245    } else if (mParent.mVersion == ID3_V2_3) {
246        id->setTo((const char *)&mParent.mData[mOffset], 4);
247    } else {
248        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
249
250        switch (mOffset) {
251            case 3:
252                id->setTo("TT2");
253                break;
254            case 33:
255                id->setTo("TP1");
256                break;
257            case 63:
258                id->setTo("TAL");
259                break;
260            case 93:
261                id->setTo("TYE");
262                break;
263            case 97:
264                id->setTo("COM");
265                break;
266            case 126:
267                id->setTo("TRK");
268                break;
269            case 127:
270                id->setTo("TCO");
271                break;
272            default:
273                CHECK(!"should not be here.");
274                break;
275        }
276    }
277}
278
279static void convertISO8859ToString8(
280        const uint8_t *data, size_t size,
281        String8 *s) {
282    size_t utf8len = 0;
283    for (size_t i = 0; i < size; ++i) {
284        if (data[i] == '\0') {
285            size = i;
286            break;
287        } else if (data[i] < 0x80) {
288            ++utf8len;
289        } else {
290            utf8len += 2;
291        }
292    }
293
294    if (utf8len == size) {
295        // Only ASCII characters present.
296
297        s->setTo((const char *)data, size);
298        return;
299    }
300
301    char *tmp = new char[utf8len];
302    char *ptr = tmp;
303    for (size_t i = 0; i < size; ++i) {
304        if (data[i] == '\0') {
305            break;
306        } else if (data[i] < 0x80) {
307            *ptr++ = data[i];
308        } else if (data[i] < 0xc0) {
309            *ptr++ = 0xc2;
310            *ptr++ = data[i];
311        } else {
312            *ptr++ = 0xc3;
313            *ptr++ = data[i] - 64;
314        }
315    }
316
317    s->setTo(tmp, utf8len);
318
319    delete[] tmp;
320    tmp = NULL;
321}
322
323void ID3::Iterator::getString(String8 *id) const {
324    id->setTo("");
325
326    if (mFrameData == NULL) {
327        return;
328    }
329
330    if (mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1) {
331        if (mOffset == 126 || mOffset == 127) {
332            // Special treatment for the track number and genre.
333            char tmp[16];
334            sprintf(tmp, "%d", (int)*mFrameData);
335
336            id->setTo(tmp);
337            return;
338        }
339
340        convertISO8859ToString8(mFrameData, mFrameSize, id);
341        return;
342    }
343
344    size_t n = mFrameSize - getHeaderLength() - 1;
345
346    if (*mFrameData == 0x00) {
347        // ISO 8859-1
348        convertISO8859ToString8(mFrameData + 1, n, id);
349    } else {
350        // UCS-2
351        // API wants number of characters, not number of bytes...
352        int len = n / 2;
353        const char16_t *framedata = (const char16_t *) (mFrameData + 1);
354        char16_t *framedatacopy = NULL;
355        if (*framedata == 0xfffe) {
356            // endianness marker doesn't match host endianness, convert
357            framedatacopy = new char16_t[len];
358            for (int i = 0; i < len; i++) {
359                framedatacopy[i] = bswap_16(framedata[i]);
360            }
361            framedata = framedatacopy;
362        }
363        // If the string starts with an endianness marker, skip it
364        if (*framedata == 0xfeff) {
365            framedata++;
366            len--;
367        }
368        id->setTo(framedata, len);
369        if (framedatacopy != NULL) {
370            delete[] framedatacopy;
371        }
372    }
373}
374
375const uint8_t *ID3::Iterator::getData(size_t *length) const {
376    *length = 0;
377
378    if (mFrameData == NULL) {
379        return NULL;
380    }
381
382    *length = mFrameSize - getHeaderLength();
383
384    return mFrameData;
385}
386
387size_t ID3::Iterator::getHeaderLength() const {
388    if (mParent.mVersion == ID3_V2_2) {
389        return 6;
390    } else if (mParent.mVersion == ID3_V2_3) {
391        return 10;
392    } else {
393        CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
394        return 0;
395    }
396}
397
398void ID3::Iterator::findFrame() {
399    for (;;) {
400        mFrameData = NULL;
401        mFrameSize = 0;
402
403        if (mParent.mVersion == ID3_V2_2) {
404            if (mOffset + 6 > mParent.mSize) {
405                return;
406            }
407
408            if (!memcmp(&mParent.mData[mOffset], "\0\0\0", 3)) {
409                return;
410            }
411
412            mFrameSize =
413                (mParent.mData[mOffset + 3] << 16)
414                | (mParent.mData[mOffset + 4] << 8)
415                | mParent.mData[mOffset + 5];
416
417            mFrameSize += 6;
418
419            if (mOffset + mFrameSize > mParent.mSize) {
420                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
421                     mOffset, mFrameSize, mParent.mSize - mOffset - 6);
422                return;
423            }
424
425            mFrameData = &mParent.mData[mOffset + 6];
426
427            if (!mID) {
428                break;
429            }
430
431            char id[4];
432            memcpy(id, &mParent.mData[mOffset], 3);
433            id[3] = '\0';
434
435            if (!strcmp(id, mID)) {
436                break;
437            }
438        } else if (mParent.mVersion == ID3_V2_3) {
439            if (mOffset + 10 > mParent.mSize) {
440                return;
441            }
442
443            if (!memcmp(&mParent.mData[mOffset], "\0\0\0\0", 4)) {
444                return;
445            }
446
447            mFrameSize = 10 + U32_AT(&mParent.mData[mOffset + 4]);
448
449            if (mOffset + mFrameSize > mParent.mSize) {
450                LOGV("partial frame at offset %d (size = %d, bytes-remaining = %d)",
451                     mOffset, mFrameSize, mParent.mSize - mOffset - 10);
452                return;
453            }
454
455            mFrameData = &mParent.mData[mOffset + 10];
456
457            if (!mID) {
458                break;
459            }
460
461            char id[5];
462            memcpy(id, &mParent.mData[mOffset], 4);
463            id[4] = '\0';
464
465            if (!strcmp(id, mID)) {
466                break;
467            }
468        } else {
469            CHECK(mParent.mVersion == ID3_V1 || mParent.mVersion == ID3_V1_1);
470
471            if (mOffset >= mParent.mSize) {
472                return;
473            }
474
475            mFrameData = &mParent.mData[mOffset];
476
477            switch (mOffset) {
478                case 3:
479                case 33:
480                case 63:
481                    mFrameSize = 30;
482                    break;
483                case 93:
484                    mFrameSize = 4;
485                    break;
486                case 97:
487                    if (mParent.mVersion == ID3_V1) {
488                        mFrameSize = 30;
489                    } else {
490                        mFrameSize = 29;
491                    }
492                    break;
493                case 126:
494                    mFrameSize = 1;
495                    break;
496                case 127:
497                    mFrameSize = 1;
498                    break;
499                default:
500                    CHECK(!"Should not be here, invalid offset.");
501                    break;
502            }
503
504            if (!mID) {
505                break;
506            }
507
508            String8 id;
509            getID(&id);
510
511            if (id == mID) {
512                break;
513            }
514        }
515
516        mOffset += mFrameSize;
517    }
518}
519
520static size_t StringSize(const uint8_t *start, uint8_t encoding) {
521    if (encoding== 0x00) {
522        // ISO 8859-1
523        return strlen((const char *)start) + 1;
524    }
525
526    // UCS-2
527    size_t n = 0;
528    while (start[n] != '\0' || start[n + 1] != '\0') {
529        n += 2;
530    }
531
532    return n;
533}
534
535const void *
536ID3::getAlbumArt(size_t *length, String8 *mime) const {
537    *length = 0;
538    mime->setTo("");
539
540    Iterator it(*this, mVersion == ID3_V2_3 ? "APIC" : "PIC");
541
542    while (!it.done()) {
543        size_t size;
544        const uint8_t *data = it.getData(&size);
545
546        if (mVersion == ID3_V2_3) {
547            uint8_t encoding = data[0];
548            mime->setTo((const char *)&data[1]);
549            size_t mimeLen = strlen((const char *)&data[1]) + 1;
550
551            uint8_t picType = data[1 + mimeLen];
552#if 0
553            if (picType != 0x03) {
554                // Front Cover Art
555                it.next();
556                continue;
557            }
558#endif
559
560            size_t descLen = StringSize(&data[2 + mimeLen], encoding);
561
562            *length = size - 2 - mimeLen - descLen;
563
564            return &data[2 + mimeLen + descLen];
565        } else {
566            uint8_t encoding = data[0];
567
568            if (!memcmp(&data[1], "PNG", 3)) {
569                mime->setTo("image/png");
570            } else if (!memcmp(&data[1], "JPG", 3)) {
571                mime->setTo("image/jpeg");
572            } else if (!memcmp(&data[1], "-->", 3)) {
573                mime->setTo("text/plain");
574            } else {
575                return NULL;
576            }
577
578#if 0
579            uint8_t picType = data[4];
580            if (picType != 0x03) {
581                // Front Cover Art
582                it.next();
583                continue;
584            }
585#endif
586
587            size_t descLen = StringSize(&data[5], encoding);
588
589            *length = size - 5 - descLen;
590
591            return &data[5 + descLen];
592        }
593    }
594
595    return NULL;
596}
597
598bool ID3::parseV1(const sp<DataSource> &source) {
599    const size_t V1_TAG_SIZE = 128;
600
601    off_t size;
602    if (source->getSize(&size) != OK || size < (off_t)V1_TAG_SIZE) {
603        return false;
604    }
605
606    mData = (uint8_t *)malloc(V1_TAG_SIZE);
607    if (source->readAt(size - V1_TAG_SIZE, mData, V1_TAG_SIZE)
608            != (ssize_t)V1_TAG_SIZE) {
609        free(mData);
610        mData = NULL;
611
612        return false;
613    }
614
615    if (memcmp("TAG", mData, 3)) {
616        free(mData);
617        mData = NULL;
618
619        return false;
620    }
621
622    mSize = V1_TAG_SIZE;
623    mFirstFrameOffset = 3;
624
625    if (mData[V1_TAG_SIZE - 3] != 0) {
626        mVersion = ID3_V1;
627    } else {
628        mVersion = ID3_V1_1;
629    }
630
631    return true;
632}
633
634}  // namespace android
635