word_trimmer.cc revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// found in the LICENSE file.
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "chrome/browser/spellchecker/word_trimmer.h"
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include <algorithm>
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include <vector>
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "base/i18n/break_iterator.h"
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)string16 TrimWords(
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t* start,
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t end,
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const string16& text,
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t keep) {
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  if (*start > text.length() || *start > end)
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return text;
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  if (!iter.Init())
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return text;
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  // A circular buffer of the last |keep + 1| words seen before position |start|
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  // in |text|.
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  std::vector<size_t> word_offset(keep + 1, 0);
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  size_t first = std::string::npos;
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  size_t last = std::string::npos;
275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  while (iter.Advance()) {
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (iter.IsWord()) {
295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)      word_offset[keep] = iter.prev();
305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)      if ((*start >= iter.prev() && *start < iter.pos()) ||
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)          (end > iter.prev() && end <= iter.pos())) {
3206f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)        if (first == std::string::npos)
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)          first = word_offset[0];
345d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)        last = iter.pos();
355d92fedcae5e801a8b224de090094f2d9df0b54aTorne (Richard Coles)      }
3653e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)      if (first == std::string::npos) {
3706f816c7c76bc45a15e452ade8a34e8af077693eTorne (Richard Coles)        std::rotate(word_offset.begin(),
381e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)                    word_offset.begin() + 1,
391e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)                    word_offset.end());
40e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch      }
41e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch      if (iter.prev() > end && keep) {
42e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch        last = iter.pos();
43e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch        keep--;
44e69819bd8e388ea4ad1636a19aa6b2eed4952191Ben Murdoch      }
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
46c1847b1379d12d0e05df27436bf19a9b1bf12deaTorne (Richard Coles)  }
475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  if (first == std::string::npos)
485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return text;
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  *start -= first;
505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)  return text.substr(first, last - first);
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)