1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
7
8#include <string>
9#include <vector>
10
11#include "base/base_export.h"
12#include "base/strings/string16.h"
13#include "base/strings/string_piece.h"
14
15namespace base {
16
17// A helper class and associated data structures to adjust offsets into a
18// string in response to various adjustments one might do to that string
19// (e.g., eliminating a range).  For details on offsets, see the comments by
20// the AdjustOffsets() function below.
21class BASE_EXPORT OffsetAdjuster {
22 public:
23  struct BASE_EXPORT Adjustment {
24    Adjustment(size_t original_offset,
25               size_t original_length,
26               size_t output_length);
27
28    size_t original_offset;
29    size_t original_length;
30    size_t output_length;
31  };
32  typedef std::vector<Adjustment> Adjustments;
33
34  // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
35  // recorded in |adjustments|.
36  //
37  // Offsets represents insertion/selection points between characters: if |src|
38  // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
39  // end of the string.  Valid input offsets range from 0 to |src_len|.  On
40  // exit, each offset will have been modified to point at the same logical
41  // position in the output string.  If an offset cannot be successfully
42  // adjusted (e.g., because it points into the middle of a multibyte sequence),
43  // it will be set to string16::npos.
44  static void AdjustOffsets(const Adjustments& adjustments,
45                            std::vector<size_t>* offsets_for_adjustment);
46
47  // Adjusts the single |offset| to reflect the adjustments recorded in
48  // |adjustments|.
49  static void AdjustOffset(const Adjustments& adjustments,
50                           size_t* offset);
51
52  // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
53  // of the adjustments recorded in |adjustments|.  In other words, the offsets
54  // provided represent offsets into an adjusted string and the caller wants
55  // to know the offsets they correspond to in the original string.  If an
56  // offset cannot be successfully unadjusted (e.g., because it points into
57  // the middle of a multibyte sequence), it will be set to string16::npos.
58  static void UnadjustOffsets(const Adjustments& adjustments,
59                              std::vector<size_t>* offsets_for_unadjustment);
60
61  // Adjusts the single |offset| to reflect the reverse of the adjustments
62  // recorded in |adjustments|.
63  static void UnadjustOffset(const Adjustments& adjustments,
64                             size_t* offset);
65
66  // Combines two sequential sets of adjustments, storing the combined revised
67  // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
68  // string was altered in some way, with the alterations recorded as
69  // adjustments in |first_adjustments|.  Then suppose the resulting string is
70  // further altered, with the alterations recorded as adjustments scored in
71  // |adjustments_on_adjusted_string|, with the offsets recorded in these
72  // adjustments being with respect to the intermediate string.  This function
73  // combines the two sets of adjustments into one, storing the result in
74  // |adjustments_on_adjusted_string|, whose offsets are correct with respect
75  // to the original string.
76  //
77  // Assumes both parameters are sorted by increasing offset.
78  //
79  // WARNING: Only supports |first_adjustments| that involve collapsing ranges
80  // of text, not expanding ranges.
81  static void MergeSequentialAdjustments(
82      const Adjustments& first_adjustments,
83      Adjustments* adjustments_on_adjusted_string);
84};
85
86// Like the conversions in utf_string_conversions.h, but also fills in an
87// |adjustments| parameter that reflects the alterations done to the string.
88// It may be NULL.
89BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
90    const char* src,
91    size_t src_len,
92    string16* output,
93    base::OffsetAdjuster::Adjustments* adjustments);
94BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
95    const base::StringPiece& utf8,
96    base::OffsetAdjuster::Adjustments* adjustments);
97// As above, but instead internally examines the adjustments and applies them
98// to |offsets_for_adjustment|.  See comments by AdjustOffsets().
99BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
100    const base::StringPiece& utf8,
101    std::vector<size_t>* offsets_for_adjustment);
102
103BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
104    const base::StringPiece16& utf16,
105    std::vector<size_t>* offsets_for_adjustment);
106
107// Limiting function callable by std::for_each which will replace any value
108// which is greater than |limit| with npos.  Typically this is called with a
109// string length to clamp offsets into the string to [0, length] (as opposed to
110// [0, length); see comments above).
111template <typename T>
112struct LimitOffset {
113  explicit LimitOffset(size_t limit)
114    : limit_(limit) {}
115
116  void operator()(size_t& offset) {
117    if (offset > limit_)
118      offset = T::npos;
119  }
120
121  size_t limit_;
122};
123
124}  // namespace base
125
126#endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
127