1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 6#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 7 8#include <string> 9#include <vector> 10 11#include "base/base_export.h" 12#include "base/strings/string16.h" 13#include "base/strings/string_piece.h" 14 15namespace base { 16 17// A helper class and associated data structures to adjust offsets into a 18// string in response to various adjustments one might do to that string 19// (e.g., eliminating a range). For details on offsets, see the comments by 20// the AdjustOffsets() function below. 21class BASE_EXPORT OffsetAdjuster { 22 public: 23 struct BASE_EXPORT Adjustment { 24 Adjustment(size_t original_offset, 25 size_t original_length, 26 size_t output_length); 27 28 size_t original_offset; 29 size_t original_length; 30 size_t output_length; 31 }; 32 typedef std::vector<Adjustment> Adjustments; 33 34 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments 35 // recorded in |adjustments|. 36 // 37 // Offsets represents insertion/selection points between characters: if |src| 38 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the 39 // end of the string. Valid input offsets range from 0 to |src_len|. On 40 // exit, each offset will have been modified to point at the same logical 41 // position in the output string. If an offset cannot be successfully 42 // adjusted (e.g., because it points into the middle of a multibyte sequence), 43 // it will be set to string16::npos. 44 static void AdjustOffsets(const Adjustments& adjustments, 45 std::vector<size_t>* offsets_for_adjustment); 46 47 // Adjusts the single |offset| to reflect the adjustments recorded in 48 // |adjustments|. 49 static void AdjustOffset(const Adjustments& adjustments, 50 size_t* offset); 51 52 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse 53 // of the adjustments recorded in |adjustments|. In other words, the offsets 54 // provided represent offsets into an adjusted string and the caller wants 55 // to know the offsets they correspond to in the original string. If an 56 // offset cannot be successfully unadjusted (e.g., because it points into 57 // the middle of a multibyte sequence), it will be set to string16::npos. 58 static void UnadjustOffsets(const Adjustments& adjustments, 59 std::vector<size_t>* offsets_for_unadjustment); 60 61 // Adjusts the single |offset| to reflect the reverse of the adjustments 62 // recorded in |adjustments|. 63 static void UnadjustOffset(const Adjustments& adjustments, 64 size_t* offset); 65 66 // Combines two sequential sets of adjustments, storing the combined revised 67 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a 68 // string was altered in some way, with the alterations recorded as 69 // adjustments in |first_adjustments|. Then suppose the resulting string is 70 // further altered, with the alterations recorded as adjustments scored in 71 // |adjustments_on_adjusted_string|, with the offsets recorded in these 72 // adjustments being with respect to the intermediate string. This function 73 // combines the two sets of adjustments into one, storing the result in 74 // |adjustments_on_adjusted_string|, whose offsets are correct with respect 75 // to the original string. 76 // 77 // Assumes both parameters are sorted by increasing offset. 78 // 79 // WARNING: Only supports |first_adjustments| that involve collapsing ranges 80 // of text, not expanding ranges. 81 static void MergeSequentialAdjustments( 82 const Adjustments& first_adjustments, 83 Adjustments* adjustments_on_adjusted_string); 84}; 85 86// Like the conversions in utf_string_conversions.h, but also fills in an 87// |adjustments| parameter that reflects the alterations done to the string. 88// It may be NULL. 89BASE_EXPORT bool UTF8ToUTF16WithAdjustments( 90 const char* src, 91 size_t src_len, 92 string16* output, 93 base::OffsetAdjuster::Adjustments* adjustments); 94BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( 95 const base::StringPiece& utf8, 96 base::OffsetAdjuster::Adjustments* adjustments); 97// As above, but instead internally examines the adjustments and applies them 98// to |offsets_for_adjustment|. See comments by AdjustOffsets(). 99BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( 100 const base::StringPiece& utf8, 101 std::vector<size_t>* offsets_for_adjustment); 102 103BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( 104 const base::StringPiece16& utf16, 105 std::vector<size_t>* offsets_for_adjustment); 106 107// Limiting function callable by std::for_each which will replace any value 108// which is greater than |limit| with npos. Typically this is called with a 109// string length to clamp offsets into the string to [0, length] (as opposed to 110// [0, length); see comments above). 111template <typename T> 112struct LimitOffset { 113 explicit LimitOffset(size_t limit) 114 : limit_(limit) {} 115 116 void operator()(size_t& offset) { 117 if (offset > limit_) 118 offset = T::npos; 119 } 120 121 size_t limit_; 122}; 123 124} // namespace base 125 126#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 127