1/*
2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3 * Copyright (C) 2008 Holger Hans Peter Freyther
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "TextCodecQt.h"
29#include "PlatformString.h"
30#include <wtf/text/CString.h>
31#include <qset.h>
32
33namespace WebCore {
34
35static QSet<QByteArray> *unique_names = 0;
36
37static const char *getAtomicName(const QByteArray &name)
38{
39    if (!unique_names)
40        unique_names = new QSet<QByteArray>;
41
42    unique_names->insert(name);
43    return unique_names->find(name)->constData();
44}
45
46void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar)
47{
48    QList<int> mibs = QTextCodec::availableMibs();
49//     qDebug() << ">>>>>>>>> registerEncodingNames";
50
51    for (int i = 0; i < mibs.size(); ++i) {
52        QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
53        const char *name = getAtomicName(c->name());
54        registrar(name, name);
55//         qDebug() << "    " << name << name;
56        QList<QByteArray> aliases = c->aliases();
57        for (int i = 0; i < aliases.size(); ++i) {
58            const char *a = getAtomicName(aliases.at(i));
59//             qDebug() << "     (a) " << a << name;
60            registrar(a, name);
61        }
62    }
63}
64
65static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*)
66{
67    return new TextCodecQt(encoding);
68}
69
70void TextCodecQt::registerCodecs(TextCodecRegistrar registrar)
71{
72    QList<int> mibs = QTextCodec::availableMibs();
73//     qDebug() << ">>>>>>>>> registerCodecs";
74
75    for (int i = 0; i < mibs.size(); ++i) {
76        QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
77        const char *name = getAtomicName(c->name());
78//         qDebug() << "    " << name;
79        registrar(name, newTextCodecQt, 0);
80    }
81}
82
83TextCodecQt::TextCodecQt(const TextEncoding& encoding)
84    : m_encoding(encoding)
85{
86    m_codec = QTextCodec::codecForName(m_encoding.name());
87}
88
89TextCodecQt::~TextCodecQt()
90{
91}
92
93
94String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError)
95{
96    // We chop input buffer to smaller buffers to avoid excessive memory consumption
97    // when the input buffer is big.  This helps reduce peak memory consumption in
98    // mobile devices where system RAM is limited.
99#if OS(SYMBIAN)
100    static const int MaxInputChunkSize = 32 * 1024;
101#else
102    static const int MaxInputChunkSize = 1024 * 1024;
103#endif
104    const char* buf = bytes;
105    const char* end = buf + length;
106    String unicode(""); // a non-null string is expected
107
108    while (buf < end) {
109        int size = end - buf;
110        size = qMin(size, MaxInputChunkSize);
111        QString decoded = m_codec->toUnicode(buf, size, &m_state);
112        unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length());
113        buf += size;
114    }
115
116    sawError = m_state.invalidChars != 0;
117
118    if (flush) {
119        m_state.flags = QTextCodec::DefaultConversion;
120        m_state.remainingChars = 0;
121        m_state.invalidChars = 0;
122    }
123
124    return unicode;
125}
126
127CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling)
128{
129    QTextCodec::ConverterState state;
130    state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader);
131
132    if (!length)
133        return "";
134
135    QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state);
136
137    // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b>
138    // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we
139    // escape it with getUnencodableReplacement, append it, then move to the next chunk.
140    if (state.invalidChars) {
141        state.invalidChars = 0;
142        state.remainingChars = 0;
143        int len = 0;
144        ba.clear();
145        for (size_t pos = 0; pos < length; ++pos) {
146            QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state);
147            if (state.remainingChars)
148                continue;
149            if (state.invalidChars) {
150                UnencodableReplacementArray replacement;
151                getUnencodableReplacement(characters[0], handling, replacement);
152                tba.replace('\0', replacement);
153                state.invalidChars = 0;
154            }
155            ba.append(tba);
156            characters += len;
157            len = 0;
158            state.remainingChars = 0;
159        }
160    }
161
162    return CString(ba.constData(), ba.length());
163}
164
165
166} // namespace WebCore
167