1/*
2 * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
3 * Copyright (C) 2010-2011 Patrick Gansterer <paroga@paroga.com>
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 *  This library is distributed in the hope that i will be useful,
15 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 *  Library General Public License for more details.
18 *
19 *  You should have received a copy of the GNU Library General Public License
20 *  along with this library; see the file COPYING.LIB.  If not, write to
21 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 *  Boston, MA 02110-1301, USA.
23 */
24
25#include "config.h"
26#include "TextCodecWinCE.h"
27
28#include "FontCache.h"
29#include "PlatformString.h"
30#include <mlang.h>
31#include <winbase.h>
32#include <winnls.h>
33#include <wtf/HashMap.h>
34#include <wtf/HashSet.h>
35#include <wtf/text/CString.h>
36#include <wtf/text/StringConcatenate.h>
37#include <wtf/text/StringHash.h>
38
39namespace WebCore {
40
41struct CharsetInfo {
42    CString m_name;
43    String m_friendlyName;
44    UINT m_codePage;
45    Vector<CString> m_aliases;
46};
47
48class LanguageManager {
49private:
50    LanguageManager();
51
52    friend LanguageManager& languageManager();
53};
54
55// Usage: a lookup table used to get CharsetInfo with code page ID.
56// Key: code page ID. Value: charset information.
57static HashMap<UINT, CString>& codePageCharsets()
58{
59    static HashMap<UINT, CString> cc;
60    return cc;
61}
62
63static HashMap<String, CharsetInfo>& knownCharsets()
64{
65    static HashMap<String, CharsetInfo> kc;
66    return kc;
67}
68
69// Usage: a map that stores charsets that are supported by system. Sorted by name.
70// Key: charset. Value: code page ID.
71typedef HashSet<String> CharsetSet;
72static CharsetSet& supportedCharsets()
73{
74    static CharsetSet sl;
75    return sl;
76}
77
78static LanguageManager& languageManager()
79{
80    static LanguageManager lm;
81    return lm;
82}
83
84LanguageManager::LanguageManager()
85{
86    IEnumCodePage* enumInterface;
87    IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
88    if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
89        MIMECPINFO cpInfo;
90        ULONG ccpInfo;
91        while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
92            if (!IsValidCodePage(cpInfo.uiCodePage))
93                continue;
94
95            HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
96
97            CString name(String(cpInfo.wszWebCharset).latin1());
98            if (i == codePageCharsets().end()) {
99                CharsetInfo info;
100                info.m_codePage = cpInfo.uiCodePage;
101                knownCharsets().set(name.data(), info);
102                i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
103            }
104            if (i != codePageCharsets().end()) {
105                HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
106                ASSERT(j != knownCharsets().end());
107                CharsetInfo& info = j->second;
108                info.m_name = i->second.data();
109                info.m_friendlyName = cpInfo.wszDescription;
110                info.m_aliases.append(name);
111                info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
112                info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
113                String cpName = makeString("cp", String::number(cpInfo.uiCodePage));
114                info.m_aliases.append(cpName.latin1());
115                supportedCharsets().add(i->second.data());
116            }
117        }
118        enumInterface->Release();
119    }
120}
121
122static UINT getCodePage(const char* name)
123{
124    // Explicitly use a "const" reference to fix the silly VS build error
125    // saying "==" is not found for const_iterator and iterator
126    const HashMap<String, CharsetInfo>& charsets = knownCharsets();
127    HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
128    return i == charsets.end() ? CP_ACP : i->second.m_codePage;
129}
130
131static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
132{
133    return new TextCodecWinCE(getCodePage(encoding.name()));
134}
135
136TextCodecWinCE::TextCodecWinCE(UINT codePage)
137    : m_codePage(codePage)
138{
139}
140
141TextCodecWinCE::~TextCodecWinCE()
142{
143}
144
145void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
146{
147    languageManager();
148    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
149        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
150        if (j != knownCharsets().end()) {
151            registrar(j->second.m_name.data(), j->second.m_name.data());
152            for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
153                registrar(alias->data(), j->second.m_name.data());
154        }
155    }
156}
157
158void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
159{
160    languageManager();
161    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
162        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
163        if (j != knownCharsets().end())
164            registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
165    }
166}
167
168static DWORD getCodePageFlags(UINT codePage)
169{
170    if (codePage == 42) // Symbol
171        return 0;
172
173    // Microsoft says the flag must be 0 for the following code pages
174    if (codePage > 50000) {
175        if ((codePage >= 50220 && codePage <= 50222)
176            || codePage == 50225
177            || codePage == 50227
178            || codePage == 50229
179            || codePage == 52936
180            || codePage == 54936
181            || (codePage >= 57002 && codePage <= 57001)
182            || codePage == 65000 // UTF-7
183            )
184            return 0;
185    }
186
187    return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
188}
189
190static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
191{
192    for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
193        if (*bytes & 0x80)
194            break;
195    }
196    return bytes;
197}
198
199static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left)
200{
201    *left = length;
202    if (!bytes || !length)
203        return;
204
205    DWORD flags = getCodePageFlags(codePage);
206
207    int testLength = length;
208    int untestedLength = length;
209    for (;;) {
210        int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
211
212        if (resultLength > 0) {
213            int oldSize = result.size();
214            result.resize(oldSize + resultLength);
215
216            MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
217
218            if (testLength == untestedLength) {
219                *left = length - testLength;
220                break;
221            }
222            untestedLength -= testLength;
223            length -= testLength;
224            bytes += testLength;
225        } else {
226            untestedLength = testLength - 1;
227            if (!untestedLength) {
228                *left = length;
229                break;
230            }
231        }
232        testLength = (untestedLength + 1) / 2;
233    }
234}
235
236String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
237{
238    if (!m_decodeBuffer.isEmpty()) {
239        m_decodeBuffer.append(bytes, length);
240        bytes = m_decodeBuffer.data();
241        length = m_decodeBuffer.size();
242    }
243
244    size_t left;
245    Vector<UChar, 8192> result;
246    for (;;) {
247        decodeInternal(result, m_codePage, bytes, length, &left);
248        if (!left)
249            break;
250
251        if (!flush && left < 16)
252            break;
253
254        result.append(L'?');
255        sawError = true;
256        if (stopOnError)
257            return String::adopt(result);
258
259        if (left == 1)
260            break;
261
262        bytes += length - left + 1;
263        length = left - 1;
264    }
265    if (left && !flush) {
266        if (m_decodeBuffer.isEmpty())
267            m_decodeBuffer.append(bytes + length - left, left);
268        else {
269            memmove(m_decodeBuffer.data(), bytes + length - left, left);
270            m_decodeBuffer.resize(left);
271        }
272    } else
273        m_decodeBuffer.clear();
274
275    return String::adopt(result);
276}
277
278CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
279{
280    if (!characters || !length)
281        return CString();
282
283    int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
284
285    // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
286
287    if (resultLength <= 0)
288        return "?";
289
290    char* characterBuffer;
291    CString result = CString::newUninitialized(resultLength, characterBuffer);
292
293    WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, characterBuffer, resultLength, 0, 0);
294
295    return result;
296}
297
298void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
299{
300    languageManager();
301    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
302        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
303        if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
304            break;
305    }
306}
307
308} // namespace WebCore
309