15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/base_export.h" 12868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string16.h" 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace base { 162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 170529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// A helper class and associated data structures to adjust offsets into a 180529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// string in response to various adjustments one might do to that string 190529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// (e.g., eliminating a range). For details on offsets, see the comments by 200529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// the AdjustOffsets() function below. 210529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochclass BASE_EXPORT OffsetAdjuster { 220529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch public: 230529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch struct BASE_EXPORT Adjustment { 240529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Adjustment(size_t original_offset, 250529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t original_length, 260529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t output_length); 270529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch 280529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t original_offset; 290529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t original_length; 300529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t output_length; 310529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch }; 320529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch typedef std::vector<Adjustment> Adjustments; 330529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch 340529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments 350529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // recorded in |adjustments|. 360529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // 370529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // Offsets represents insertion/selection points between characters: if |src| 380529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the 390529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // end of the string. Valid input offsets range from 0 to |src_len|. On 400529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // exit, each offset will have been modified to point at the same logical 410529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // position in the output string. If an offset cannot be successfully 420529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // adjusted (e.g., because it points into the middle of a multibyte sequence), 430529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // it will be set to string16::npos. 440529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch static void AdjustOffsets(const Adjustments& adjustments, 450529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch std::vector<size_t>* offsets_for_adjustment); 460529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch 470529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // Adjusts the single |offset| to reflect the adjustments recorded in 480529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // |adjustments|. 490529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch static void AdjustOffset(const Adjustments& adjustments, 500529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch size_t* offset); 510529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch 525c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse 535c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // of the adjustments recorded in |adjustments|. In other words, the offsets 545c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // provided represent offsets into an adjusted string and the caller wants 555c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // to know the offsets they correspond to in the original string. If an 565c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // offset cannot be successfully unadjusted (e.g., because it points into 575c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // the middle of a multibyte sequence), it will be set to string16::npos. 585c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu static void UnadjustOffsets(const Adjustments& adjustments, 595c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu std::vector<size_t>* offsets_for_unadjustment); 605c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 615c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // Adjusts the single |offset| to reflect the reverse of the adjustments 625c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu // recorded in |adjustments|. 635c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu static void UnadjustOffset(const Adjustments& adjustments, 645c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu size_t* offset); 655c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu 660529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // Combines two sequential sets of adjustments, storing the combined revised 670529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // adjustments in |adjustments_on_adjusted_string|. That is, suppose a 680529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // string was altered in some way, with the alterations recorded as 690529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // adjustments in |first_adjustments|. Then suppose the resulting string is 700529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // further altered, with the alterations recorded as adjustments scored in 710529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // |adjustments_on_adjusted_string|, with the offsets recorded in these 720529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // adjustments being with respect to the intermediate string. This function 730529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // combines the two sets of adjustments into one, storing the result in 740529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // |adjustments_on_adjusted_string|, whose offsets are correct with respect 750529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // to the original string. 760529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // 770529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // Assumes both parameters are sorted by increasing offset. 780529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // 790529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // WARNING: Only supports |first_adjustments| that involve collapsing ranges 800529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // of text, not expanding ranges. 810529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch static void MergeSequentialAdjustments( 820529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const Adjustments& first_adjustments, 830529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch Adjustments* adjustments_on_adjusted_string); 840529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch}; 850529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch 860529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// Like the conversions in utf_string_conversions.h, but also fills in an 870529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// |adjustments| parameter that reflects the alterations done to the string. 880529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// It may be NULL. 890529e5d033099cbfc42635f6f6183833b09dff6eBen MurdochBASE_EXPORT bool UTF8ToUTF16WithAdjustments( 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* src, 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t src_len, 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16* output, 930529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch base::OffsetAdjuster::Adjustments* adjustments); 940529e5d033099cbfc42635f6f6183833b09dff6eBen MurdochBASE_EXPORT string16 UTF8ToUTF16WithAdjustments( 950529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const base::StringPiece& utf8, 960529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch base::OffsetAdjuster::Adjustments* adjustments); 970529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// As above, but instead internally examines the adjustments and applies them 980529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch// to |offsets_for_adjustment|. See comments by AdjustOffsets(). 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::StringPiece& utf8, 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<size_t>* offsets_for_adjustment); 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::StringPiece16& utf16, 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<size_t>* offsets_for_adjustment); 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Limiting function callable by std::for_each which will replace any value 10858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// which is greater than |limit| with npos. Typically this is called with a 10958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// string length to clamp offsets into the string to [0, length] (as opposed to 11058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// [0, length); see comments above). 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <typename T> 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct LimitOffset { 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit LimitOffset(size_t limit) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : limit_(limit) {} 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void operator()(size_t& offset) { 11758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (offset > limit_) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) offset = T::npos; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t limit_; 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} // namespace base 1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 127