MediaScannerClient.cpp revision 83bc7f3cf78b28a818417f40a4f0c00593993366
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <media/mediascanner.h>
18
19#include <utils/StringArray.h>
20
21#include "autodetect.h"
22#include "unicode/ucnv.h"
23#include "unicode/ustring.h"
24
25namespace android {
26
27MediaScannerClient::MediaScannerClient()
28    :   mNames(NULL),
29        mValues(NULL),
30        mLocaleEncoding(kEncodingNone)
31{
32}
33
34MediaScannerClient::~MediaScannerClient()
35{
36    delete mNames;
37    delete mValues;
38}
39
40void MediaScannerClient::setLocale(const char* locale)
41{
42    if (!locale) return;
43
44    if (!strncmp(locale, "ja", 2))
45        mLocaleEncoding = kEncodingShiftJIS;
46    else if (!strncmp(locale, "ko", 2))
47        mLocaleEncoding = kEncodingEUCKR;
48    else if (!strncmp(locale, "zh", 2)) {
49        if (!strcmp(locale, "zh_CN")) {
50            // simplified chinese for mainland China
51            mLocaleEncoding = kEncodingGBK;
52        } else {
53            // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore)
54            mLocaleEncoding = kEncodingBig5;
55        }
56    }
57}
58
59void MediaScannerClient::beginFile()
60{
61    mNames = new StringArray;
62    mValues = new StringArray;
63}
64
65status_t MediaScannerClient::addStringTag(const char* name, const char* value)
66{
67    if (mLocaleEncoding != kEncodingNone) {
68        // don't bother caching strings that are all ASCII.
69        // call handleStringTag directly instead.
70        // check to see if value (which should be utf8) has any non-ASCII characters
71        bool nonAscii = false;
72        const char* chp = value;
73        char ch;
74        while ((ch = *chp++)) {
75            if (ch & 0x80) {
76                nonAscii = true;
77                break;
78            }
79        }
80
81        if (nonAscii) {
82            // save the strings for later so they can be used for native encoding detection
83            mNames->push_back(name);
84            mValues->push_back(value);
85            return OK;
86        }
87        // else fall through
88    }
89
90    // autodetection is not necessary, so no need to cache the values
91    // pass directly to the client instead
92    return handleStringTag(name, value);
93}
94
95static uint32_t possibleEncodings(const char* s)
96{
97    uint32_t result = kEncodingAll;
98    // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
99    // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
100    uint8_t ch1, ch2;
101    uint8_t* chp = (uint8_t *)s;
102
103    while ((ch1 = *chp++)) {
104        if (ch1 & 0x80) {
105            ch2 = *chp++;
106            ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
107            // ch1 is now the first byte of the potential native char
108
109            ch2 = *chp++;
110            if (ch2 & 0x80)
111                ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
112            // ch2 is now the second byte of the potential native char
113            int ch = (int)ch1 << 8 | (int)ch2;
114            result &= findPossibleEncodings(ch);
115        }
116        // else ASCII character, which could be anything
117    }
118
119    return result;
120}
121
122void MediaScannerClient::convertValues(uint32_t encoding)
123{
124    const char* enc = NULL;
125    switch (encoding) {
126        case kEncodingShiftJIS:
127            enc = "shift-jis";
128            break;
129        case kEncodingGBK:
130            enc = "gbk";
131            break;
132        case kEncodingBig5:
133            enc = "Big5";
134            break;
135        case kEncodingEUCKR:
136            enc = "EUC-KR";
137            break;
138    }
139
140    if (enc) {
141        UErrorCode status = U_ZERO_ERROR;
142
143        UConverter *conv = ucnv_open(enc, &status);
144        if (U_FAILURE(status)) {
145            ALOGE("could not create UConverter for %s\n", enc);
146            return;
147        }
148        UConverter *utf8Conv = ucnv_open("UTF-8", &status);
149        if (U_FAILURE(status)) {
150            ALOGE("could not create UConverter for UTF-8\n");
151            ucnv_close(conv);
152            return;
153        }
154
155        // for each value string, convert from native encoding to UTF-8
156        for (int i = 0; i < mNames->size(); i++) {
157            // first we need to untangle the utf8 and convert it back to the original bytes
158            // since we are reducing the length of the string, we can do this in place
159            uint8_t* src = (uint8_t *)mValues->getEntry(i);
160            int len = strlen((char *)src);
161            uint8_t* dest = src;
162
163            uint8_t uch;
164            while ((uch = *src++)) {
165                if (uch & 0x80)
166                    *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F);
167                else
168                    *dest++ = uch;
169            }
170            *dest = 0;
171
172            // now convert from native encoding to UTF-8
173            const char* source = mValues->getEntry(i);
174            int targetLength = len * 3 + 1;
175            char* buffer = new char[targetLength];
176            // don't normally check for NULL, but in this case targetLength may be large
177            if (!buffer)
178                break;
179            char* target = buffer;
180
181            ucnv_convertEx(utf8Conv, conv, &target, target + targetLength,
182                    &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
183            if (U_FAILURE(status)) {
184                ALOGE("ucnv_convertEx failed: %d\n", status);
185                mValues->setEntry(i, "???");
186            } else {
187                // zero terminate
188                *target = 0;
189                mValues->setEntry(i, buffer);
190            }
191
192            delete[] buffer;
193        }
194
195        ucnv_close(conv);
196        ucnv_close(utf8Conv);
197    }
198}
199
200void MediaScannerClient::endFile()
201{
202    if (mLocaleEncoding != kEncodingNone) {
203        int size = mNames->size();
204        uint32_t encoding = kEncodingAll;
205
206        // compute a bit mask containing all possible encodings
207        for (int i = 0; i < mNames->size(); i++)
208            encoding &= possibleEncodings(mValues->getEntry(i));
209
210        // if the locale encoding matches, then assume we have a native encoding.
211        if (encoding & mLocaleEncoding)
212            convertValues(mLocaleEncoding);
213
214        // finally, push all name/value pairs to the client
215        for (int i = 0; i < mNames->size(); i++) {
216            status_t status = handleStringTag(mNames->getEntry(i), mValues->getEntry(i));
217            if (status) {
218                break;
219            }
220        }
221    }
222    // else addStringTag() has done all the work so we have nothing to do
223
224    delete mNames;
225    delete mValues;
226    mNames = NULL;
227    mValues = NULL;
228}
229
230}  // namespace android
231
232