1/* 2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> 3 * Copyright (C) 2008 Holger Hans Peter Freyther 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "TextCodecQt.h" 29#include "PlatformString.h" 30#include <wtf/text/CString.h> 31#include <qset.h> 32 33namespace WebCore { 34 35static QSet<QByteArray> *unique_names = 0; 36 37static const char *getAtomicName(const QByteArray &name) 38{ 39 if (!unique_names) 40 unique_names = new QSet<QByteArray>; 41 42 unique_names->insert(name); 43 return unique_names->find(name)->constData(); 44} 45 46void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar) 47{ 48 QList<int> mibs = QTextCodec::availableMibs(); 49// qDebug() << ">>>>>>>>> registerEncodingNames"; 50 51 for (int i = 0; i < mibs.size(); ++i) { 52 QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); 53 const char *name = getAtomicName(c->name()); 54 registrar(name, name); 55// qDebug() << " " << name << name; 56 QList<QByteArray> aliases = c->aliases(); 57 for (int i = 0; i < aliases.size(); ++i) { 58 const char *a = getAtomicName(aliases.at(i)); 59// qDebug() << " (a) " << a << name; 60 registrar(a, name); 61 } 62 } 63} 64 65static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*) 66{ 67 return new TextCodecQt(encoding); 68} 69 70void TextCodecQt::registerCodecs(TextCodecRegistrar registrar) 71{ 72 QList<int> mibs = QTextCodec::availableMibs(); 73// qDebug() << ">>>>>>>>> registerCodecs"; 74 75 for (int i = 0; i < mibs.size(); ++i) { 76 QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); 77 const char *name = getAtomicName(c->name()); 78// qDebug() << " " << name; 79 registrar(name, newTextCodecQt, 0); 80 } 81} 82 83TextCodecQt::TextCodecQt(const TextEncoding& encoding) 84 : m_encoding(encoding) 85{ 86 m_codec = QTextCodec::codecForName(m_encoding.name()); 87} 88 89TextCodecQt::~TextCodecQt() 90{ 91} 92 93 94String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError) 95{ 96 // We chop input buffer to smaller buffers to avoid excessive memory consumption 97 // when the input buffer is big. This helps reduce peak memory consumption in 98 // mobile devices where system RAM is limited. 99#if OS(SYMBIAN) 100 static const int MaxInputChunkSize = 32 * 1024; 101#else 102 static const int MaxInputChunkSize = 1024 * 1024; 103#endif 104 const char* buf = bytes; 105 const char* end = buf + length; 106 String unicode(""); // a non-null string is expected 107 108 while (buf < end) { 109 int size = end - buf; 110 size = qMin(size, MaxInputChunkSize); 111 QString decoded = m_codec->toUnicode(buf, size, &m_state); 112 unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length()); 113 buf += size; 114 } 115 116 sawError = m_state.invalidChars != 0; 117 118 if (flush) { 119 m_state.flags = QTextCodec::DefaultConversion; 120 m_state.remainingChars = 0; 121 m_state.invalidChars = 0; 122 } 123 124 return unicode; 125} 126 127CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling) 128{ 129 QTextCodec::ConverterState state; 130 state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader); 131 132 if (!length) 133 return ""; 134 135 QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state); 136 137 // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b> 138 // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we 139 // escape it with getUnencodableReplacement, append it, then move to the next chunk. 140 if (state.invalidChars) { 141 state.invalidChars = 0; 142 state.remainingChars = 0; 143 int len = 0; 144 ba.clear(); 145 for (size_t pos = 0; pos < length; ++pos) { 146 QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state); 147 if (state.remainingChars) 148 continue; 149 if (state.invalidChars) { 150 UnencodableReplacementArray replacement; 151 getUnencodableReplacement(characters[0], handling, replacement); 152 tba.replace('\0', replacement); 153 state.invalidChars = 0; 154 } 155 ba.append(tba); 156 characters += len; 157 len = 0; 158 state.remainingChars = 0; 159 } 160 } 161 162 return CString(ba.constData(), ba.length()); 163} 164 165 166} // namespace WebCore 167