1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string>
10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <vector>
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
12ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/base_api.h"
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string16.h"
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace base {
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass StringPiece;
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
19ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Like the conversions in utf_string_conversions.h, but also takes one or more
20ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// offsets (|offset[s]_for_adjustment|) into the source strings, each offset
21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// will be adjusted to point at the same logical place in the result strings.
22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// If this isn't possible because an offset points past the end of the source
23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// strings or into the middle of a multibyte sequence, the offending offset will
24ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// be set to std::wstring::npos. |offset[s]_for_adjustment| may be NULL.
25ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF8ToWideAndAdjustOffset(const char* src,
26ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                        size_t src_len,
27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                        std::wstring* output,
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        size_t* offset_for_adjustment);
29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF8ToWideAndAdjustOffsets(
30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const char* src,
31ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t src_len,
32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::wstring* output,
33ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment);
34ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
35ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
36ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                                size_t* offset_for_adjustment);
37ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF8ToWideAndAdjustOffsets(
38ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const base::StringPiece& utf8,
39ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment);
40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
41ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF16ToWideAndAdjustOffset(const char16* src,
42ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                         size_t src_len,
43ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                         std::wstring* output,
44ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                         size_t* offset_for_adjustment);
45ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API bool UTF16ToWideAndAdjustOffsets(
46ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const char16* src,
47ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t src_len,
48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::wstring* output,
49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment);
50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                                 size_t* offset_for_adjustment);
53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenBASE_API std::wstring UTF16ToWideAndAdjustOffsets(
54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const string16& utf16,
55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment);
56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Limiting function callable by std::for_each which will replace any value
58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// which is equal to or greater than |limit| with npos.
59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsentemplate <typename T>
60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstruct LimitOffset {
61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  explicit LimitOffset(size_t limit)
62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    : limit_(limit) {}
63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  void operator()(size_t& offset) {
65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset >= limit_)
66ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      offset = T::npos;
67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  size_t limit_;
70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen};
71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
72ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Adjustment function called by std::transform which will adjust any offset
73ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// that occurs after one or more modified substrings. To use, create any
74ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// number of AdjustOffset::Adjustments, drop them into a vector, then call
75ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// std::transform with the transform function being something similar to
76ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// AdjustOffset(adjustments). Each Adjustment gives the original |location|
77ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// of the encoded section and the |old_length| and |new_length| of the section
78ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// before and after decoding.
79ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstruct AdjustOffset {
80ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Helper structure which indicates where an encoded character occurred
81ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // and how long that encoding was.
82ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  struct Adjustment {
83ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    Adjustment(size_t location, size_t old_length, size_t new_length);
84ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
85ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t location;
86ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t old_length;
87ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t new_length;
88ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  };
89ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
90ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  typedef std::vector<Adjustment> Adjustments;
91ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
92ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  explicit AdjustOffset(const Adjustments& adjustments);
93ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  void operator()(size_t& offset);
94ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const Adjustments& adjustments_;
96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen};
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // BASE_UTF_OFFSET_STRING_CONVERSIONS_H_
99