utf_offset_string_conversions.h revision ddb351dbec246cf1fab5ec20d2d5520909041de1
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
6#define BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
7#pragma once
8
9#include <string>
10#include <vector>
11
12#include "base/base_api.h"
13#include "base/string16.h"
14
15namespace base {
16class StringPiece;
17}
18
19// Like the conversions in utf_string_conversions.h, but also takes one or more
20// offsets (|offset[s]_for_adjustment|) into the source strings, each offset
21// will be adjusted to point at the same logical place in the result strings.
22// If this isn't possible because an offset points past the end of the source
23// strings or into the middle of a multibyte sequence, the offending offset will
24// be set to std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
25BASE_API bool UTF8ToWideAndAdjustOffset(const char* src,
26                                        size_t src_len,
27                                        std::wstring* output,
28                                        size_t* offset_for_adjustment);
29BASE_API bool UTF8ToWideAndAdjustOffsets(
30    const char* src,
31    size_t src_len,
32    std::wstring* output,
33    std::vector<size_t>* offsets_for_adjustment);
34
35BASE_API std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
36                                                size_t* offset_for_adjustment);
37BASE_API std::wstring UTF8ToWideAndAdjustOffsets(
38    const base::StringPiece& utf8,
39    std::vector<size_t>* offsets_for_adjustment);
40
41BASE_API bool UTF16ToWideAndAdjustOffset(const char16* src,
42                                         size_t src_len,
43                                         std::wstring* output,
44                                         size_t* offset_for_adjustment);
45BASE_API bool UTF16ToWideAndAdjustOffsets(
46    const char16* src,
47    size_t src_len,
48    std::wstring* output,
49    std::vector<size_t>* offsets_for_adjustment);
50
51BASE_API std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
52                                                 size_t* offset_for_adjustment);
53BASE_API std::wstring UTF16ToWideAndAdjustOffsets(
54    const string16& utf16,
55    std::vector<size_t>* offsets_for_adjustment);
56
57// Limiting function callable by std::for_each which will replace any value
58// which is equal to or greater than |limit| with npos.
59template <typename T>
60struct LimitOffset {
61  explicit LimitOffset(size_t limit)
62    : limit_(limit) {}
63
64  void operator()(size_t& offset) {
65    if (offset >= limit_)
66      offset = T::npos;
67  }
68
69  size_t limit_;
70};
71
72// Adjustment function called by std::transform which will adjust any offset
73// that occurs after one or more modified substrings. To use, create any
74// number of AdjustOffset::Adjustments, drop them into a vector, then call
75// std::transform with the transform function being something similar to
76// AdjustOffset(adjustments). Each Adjustment gives the original |location|
77// of the encoded section and the |old_length| and |new_length| of the section
78// before and after decoding.
79struct AdjustOffset {
80  // Helper structure which indicates where an encoded character occurred
81  // and how long that encoding was.
82  struct Adjustment {
83    Adjustment(size_t location, size_t old_length, size_t new_length);
84
85    size_t location;
86    size_t old_length;
87    size_t new_length;
88  };
89
90  typedef std::vector<Adjustment> Adjustments;
91
92  explicit AdjustOffset(const Adjustments& adjustments);
93  void operator()(size_t& offset);
94
95  const Adjustments& adjustments_;
96};
97
98#endif  // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
99