1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file. 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string> 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <vector> 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 12ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/base_api.h" 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string16.h" 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace base { 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass StringPiece; 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 19ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Like the conversions in utf_string_conversions.h, but also takes one or more 20ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// offsets (|offset[s]_for_adjustment|) into the source strings, each offset 21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// will be adjusted to point at the same logical place in the result strings. 22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// If this isn't possible because an offset points past the end of the source 23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// strings or into the middle of a multibyte sequence, the offending offset will 24ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// be set to std::wstring::npos. |offset[s]_for_adjustment| may be NULL. 25ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF8ToWideAndAdjustOffset(const char* src, 26ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t src_len, 27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::wstring* output, 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t* offset_for_adjustment); 29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF8ToWideAndAdjustOffsets( 30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const char* src, 31ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t src_len, 32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::wstring* output, 33ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment); 34ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 35ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, 36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t* offset_for_adjustment); 37ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF8ToWideAndAdjustOffsets( 38ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const base::StringPiece& utf8, 39ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment); 40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 41ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF16ToWideAndAdjustOffset(const char16* src, 42ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t src_len, 43ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::wstring* output, 44ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t* offset_for_adjustment); 45ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF16ToWideAndAdjustOffsets( 46ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const char16* src, 47ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t src_len, 48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::wstring* output, 49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment); 50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, 52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t* offset_for_adjustment); 53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF16ToWideAndAdjustOffsets( 54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const string16& utf16, 55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment); 56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Limiting function callable by std::for_each which will replace any value 58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// which is equal to or greater than |limit| with npos. 59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsentemplate <typename T> 60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstruct LimitOffset { 61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen explicit LimitOffset(size_t limit) 62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : limit_(limit) {} 63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen void operator()(size_t& offset) { 65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset >= limit_) 66ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offset = T::npos; 67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t limit_; 70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}; 71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 72ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Adjustment function called by std::transform which will adjust any offset 73ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// that occurs after one or more modified substrings. To use, create any 74ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// number of AdjustOffset::Adjustments, drop them into a vector, then call 75ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// std::transform with the transform function being something similar to 76ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// AdjustOffset(adjustments). Each Adjustment gives the original |location| 77ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// of the encoded section and the |old_length| and |new_length| of the section 78ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// before and after decoding. 79ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstruct AdjustOffset { 80ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Helper structure which indicates where an encoded character occurred 81ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // and how long that encoding was. 82ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen struct Adjustment { 83ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen Adjustment(size_t location, size_t old_length, size_t new_length); 84ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 85ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t location; 86ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t old_length; 87ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t new_length; 88ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen }; 89ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 90ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen typedef std::vector<Adjustment> Adjustments; 91ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 92ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen explicit AdjustOffset(const Adjustments& adjustments); 93ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen void operator()(size_t& offset); 94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const Adjustments& adjustments_; 96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}; 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_ 99