15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/* 293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) * Copyright (C) 2013 Google Inc. All rights reserved. 35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without 55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions are 65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * met: 75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * * Redistributions of source code must retain the above copyright 95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer. 105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * * Redistributions in binary form must reproduce the above 115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * copyright notice, this list of conditions and the following disclaimer 125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * in the documentation and/or other materials provided with the 135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * distribution. 145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * * Neither the name of Google Inc. nor the names of its 155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * contributors may be used to endorse or promote products derived from 165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * this software without specific prior written permission. 175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */ 305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 3193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#ifndef StringUTF8Adaptor_h 3293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#define StringUTF8Adaptor_h 335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 3493ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/text/CString.h" 35d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles)#include "wtf/text/TextEncoding.h" 3653e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "wtf/text/WTFString.h" 375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 3893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)namespace WTF { 395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 4093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)// This class lets you get UTF-8 data out of a String without mallocing a 4193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)// separate buffer to hold the data if the String happens to be 8 bit and 4293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)// contain only ASCII characters. 4393ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)class StringUTF8Adaptor { 445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)public: 45d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) enum ShouldNormalize { 46d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) DoNotNormalize, 47d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) Normalize 48d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) }; 49d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) 50d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize = DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables) 5193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) : m_data(0) 5293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) , m_length(0) 535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) { 5493ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) if (string.isEmpty()) 5593ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) return; 5693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses UTF-8 5793ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) // when processing 8 bit strings. If |relative| is entirely ASCII, we luck out 5893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) // and can avoid mallocing a new buffer to hold the UTF-8 data because UTF-8 5993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) // and Latin-1 use the same code units for ASCII code points. 6093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) if (string.is8Bit() && string.containsOnlyASCII()) { 6193ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) m_data = reinterpret_cast<const char*>(string.characters8()); 6293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) m_length = string.length(); 6393ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) } else { 64d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) if (normalize == Normalize) 65d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handling); 66d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) else 67d5428f32f5d1719f774f62e19147104ca245a3abTorne (Richard Coles) m_utf8Buffer = string.utf8(); 6893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) m_data = m_utf8Buffer.data(); 6993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) m_length = m_utf8Buffer.length(); 7093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) } 715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 7393ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) const char* data() const { return m_data; } 7493ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) size_t length() const { return m_length; } 755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)private: 7793ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) CString m_utf8Buffer; 7893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) const char* m_data; 7993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) size_t m_length; 805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}; 815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 8293ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)} // namespace WTF 8393ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) 8493ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)using WTF::StringUTF8Adaptor; 855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 8693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#endif // StringUTF8Adaptor_h 87