MediaScannerClient.cpp revision 77f8531a739c46cffe7f7fd1e035de542deccf48
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <media/mediascanner.h> 18 19#include <utils/StringArray.h> 20 21#include "autodetect.h" 22#include "unicode/ucnv.h" 23#include "unicode/ustring.h" 24 25namespace android { 26 27MediaScannerClient::MediaScannerClient() 28 : mNames(NULL), 29 mValues(NULL), 30 mLocaleEncoding(kEncodingNone) 31{ 32} 33 34MediaScannerClient::~MediaScannerClient() 35{ 36 delete mNames; 37 delete mValues; 38} 39 40void MediaScannerClient::setLocale(const char* locale) 41{ 42 if (!locale) return; 43 44 if (!strncmp(locale, "ja", 2)) 45 mLocaleEncoding = kEncodingShiftJIS; 46 else if (!strncmp(locale, "ko", 2)) 47 mLocaleEncoding = kEncodingEUCKR; 48 else if (!strncmp(locale, "zh", 2)) { 49 if (!strcmp(locale, "zh_CN")) { 50 // simplified chinese for mainland China 51 mLocaleEncoding = kEncodingGBK; 52 } else { 53 // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore) 54 mLocaleEncoding = kEncodingBig5; 55 } 56 } 57} 58 59void MediaScannerClient::beginFile() 60{ 61 mNames = new StringArray; 62 mValues = new StringArray; 63} 64 65bool MediaScannerClient::addStringTag(const char* name, const char* value) 66{ 67 // don't bother caching strings that are all ASCII. 68 // call handleStringTag directly instead. 69 // check to see if value (which should be utf8) has any non-ASCII characters 70 bool nonAscii = false; 71 const char* chp = value; 72 char ch; 73 while ((ch = *chp++)) { 74 if (ch & 0x80) { 75 nonAscii = true; 76 break; 77 } 78 } 79 80 if (nonAscii) { 81 // save the strings for later so they can be used for native encoding detection 82 mNames->push_back(name); 83 mValues->push_back(value); 84 return true; 85 } 86 // else fall through 87 88 // autodetection is not necessary, so no need to cache the values 89 // pass directly to the client instead 90 return handleStringTag(name, value); 91} 92 93static uint32_t possibleEncodings(const char* s) 94{ 95 uint32_t result = kEncodingAll; 96 // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1 97 // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back 98 uint8_t ch1, ch2; 99 uint8_t* chp = (uint8_t *)s; 100 101 while ((ch1 = *chp++)) { 102 if (ch1 & 0x80) { 103 ch2 = *chp++; 104 ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F); 105 // ch1 is now the first byte of the potential native char 106 107 ch2 = *chp++; 108 if (ch2 & 0x80) 109 ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F); 110 // ch2 is now the second byte of the potential native char 111 int ch = (int)ch1 << 8 | (int)ch2; 112 result &= findPossibleEncodings(ch); 113 } 114 // else ASCII character, which could be anything 115 } 116 117 return result; 118} 119 120void MediaScannerClient::convertValues(uint32_t encoding) 121{ 122 const char* enc = NULL; 123 switch (encoding) { 124 case kEncodingShiftJIS: 125 enc = "shift-jis"; 126 break; 127 case kEncodingGBK: 128 enc = "gbk"; 129 break; 130 case kEncodingBig5: 131 enc = "Big5"; 132 break; 133 case kEncodingEUCKR: 134 enc = "EUC-KR"; 135 break; 136 } 137 138 if (enc) { 139 UErrorCode status = U_ZERO_ERROR; 140 141 UConverter *conv = ucnv_open(enc, &status); 142 if (U_FAILURE(status)) { 143 LOGE("could not create UConverter for %s\n", enc); 144 return; 145 } 146 UConverter *utf8Conv = ucnv_open("UTF-8", &status); 147 if (U_FAILURE(status)) { 148 LOGE("could not create UConverter for UTF-8\n"); 149 ucnv_close(conv); 150 return; 151 } 152 153 // for each value string, convert from native encoding to UTF-8 154 for (int i = 0; i < mNames->size(); i++) { 155 // first we need to untangle the utf8 and convert it back to the original bytes 156 // since we are reducing the length of the string, we can do this in place 157 uint8_t* src = (uint8_t *)mValues->getEntry(i); 158 int len = strlen((char *)src); 159 uint8_t* dest = src; 160 161 uint8_t uch; 162 while ((uch = *src++)) { 163 if (uch & 0x80) 164 *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F); 165 else 166 *dest++ = uch; 167 } 168 *dest = 0; 169 170 // now convert from native encoding to UTF-8 171 const char* source = mValues->getEntry(i); 172 int targetLength = len * 3 + 1; 173 char* buffer = new char[targetLength]; 174 if (!buffer) 175 break; 176 char* target = buffer; 177 178 ucnv_convertEx(utf8Conv, conv, &target, target + targetLength, 179 &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status); 180 if (U_FAILURE(status)) { 181 LOGE("ucnv_convertEx failed: %d\n", status); 182 mValues->setEntry(i, "???"); 183 } else { 184 // zero terminate 185 *target = 0; 186 mValues->setEntry(i, buffer); 187 } 188 189 delete[] buffer; 190 } 191 192 ucnv_close(conv); 193 ucnv_close(utf8Conv); 194 } 195} 196 197void MediaScannerClient::endFile() 198{ 199 int size = mNames->size(); 200 uint32_t encoding = kEncodingAll; 201 202 // compute a bit mask containing all possible encodings 203 for (int i = 0; i < mNames->size(); i++) 204 encoding &= possibleEncodings(mValues->getEntry(i)); 205 206 // If one of the possible encodings matches the locale encoding, use that. 207 // Otherwise, if there is only one possible encoding, use that. 208 if (encoding & mLocaleEncoding) 209 convertValues(mLocaleEncoding); 210 else if ((encoding & (encoding - 1)) == 0) 211 convertValues(encoding); 212 else { 213 // TODO: try harder to disambiguate the encoding, perhaps by looking at 214 // other files by same artist, or even the user's entire collection. 215 // For now, fall through and insert the strings as they are. 216 } 217 218 // finally, push all name/value pairs to the client 219 for (int i = 0; i < mNames->size(); i++) { 220 if (!handleStringTag(mNames->getEntry(i), mValues->getEntry(i))) 221 break; 222 } 223 // else addStringTag() has done all the work so we have nothing to do 224 225 delete mNames; 226 delete mValues; 227 mNames = NULL; 228 mValues = NULL; 229} 230 231} // namespace android 232 233