utf_offset_string_conversions.h revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/base_export.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string16.h"
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace base {
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Like the conversions in utf_string_conversions.h, but also takes one or more
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// offsets (|offset[s]_for_adjustment|) into the source strings, each offset
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// will be adjusted to point at the same logical place in the result strings.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If this isn't possible because an offset points past the end of the source
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// strings or into the middle of a multibyte sequence, the offending offset will
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// be set to string16::npos. |offset[s]_for_adjustment| may be NULL.
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT bool UTF8ToUTF16AndAdjustOffset(const char* src,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                            size_t src_len,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                            string16* output,
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                            size_t* offset_for_adjustment);
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT bool UTF8ToUTF16AndAdjustOffsets(
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const char* src,
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t src_len,
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string16* output,
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::vector<size_t>* offsets_for_adjustment);
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                size_t* offset_for_adjustment);
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::StringPiece& utf8,
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::vector<size_t>* offsets_for_adjustment);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffset(
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::StringPiece16& utf16,
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t* offset_for_adjustment);
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const base::StringPiece16& utf16,
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::vector<size_t>* offsets_for_adjustment);
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Limiting function callable by std::for_each which will replace any value
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// which is equal to or greater than |limit| with npos.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <typename T>
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct LimitOffset {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit LimitOffset(size_t limit)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : limit_(limit) {}
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void operator()(size_t& offset) {
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (offset >= limit_)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      offset = T::npos;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t limit_;
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Stack object which, on destruction, will update a vector of offsets based on
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// any supplied adjustments.  To use, declare one of these, providing the
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// address of the offset vector to adjust.  Then Add() any number of Adjustments
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (each Adjustment gives the |original_offset| of a substring and the lengths
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of the substring before and after transforming).  When the OffsetAdjuster
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// goes out of scope, all the offsets in the provided vector will be updated.
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_EXPORT OffsetAdjuster {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct BASE_EXPORT Adjustment {
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Adjustment(size_t original_offset,
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               size_t original_length,
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               size_t output_length);
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t original_offset;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t original_length;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t output_length;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~OffsetAdjuster();
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Add(const Adjustment& adjustment);
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void AdjustOffset(std::vector<size_t>::iterator offset);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<size_t>* offsets_for_adjustment_;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<Adjustment> adjustments_;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}  // namespace base
922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
94