1393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski/*
2393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Copyright (C) 2016 The Android Open Source Project
3393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski *
4393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Licensed under the Apache License, Version 2.0 (the "License");
5393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * you may not use this file except in compliance with the License.
6393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * You may obtain a copy of the License at
7393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski *
8393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski *      http://www.apache.org/licenses/LICENSE-2.0
9393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski *
10393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Unless required by applicable law or agreed to in writing, software
11393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * distributed under the License is distributed on an "AS IS" BASIS,
12393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * See the License for the specific language governing permissions and
14393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * limitations under the License.
15393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski */
16393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
17cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski#include "compile/PseudolocaleGenerator.h"
18ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski
19ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski#include <algorithm>
20ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski
21393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "ResourceTable.h"
22393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "ResourceValues.h"
23393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "ValueVisitor.h"
24393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "compile/Pseudolocalizer.h"
258049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski#include "util/Util.h"
26393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
27d5083f6f6b9bc76bbe64052bcec639eee752a321Adam Lesinskiusing android::StringPiece;
288049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskiusing android::StringPiece16;
29d5083f6f6b9bc76bbe64052bcec639eee752a321Adam Lesinski
30393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskinamespace aapt {
31393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
328049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski// The struct that represents both Span objects and UntranslatableSections.
338049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskistruct UnifiedSpan {
348049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Only present for Span objects. If not present, this was an UntranslatableSection.
358049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  Maybe<std::string> tag;
36cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
378049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The UTF-16 index into the string where this span starts.
388049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  uint32_t first_char;
39cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
408049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The UTF-16 index into the string where this span ends, inclusive.
418049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  uint32_t last_char;
428049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski};
43cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
448049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskiinline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
458049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  if (left.first_char < right.first_char) {
468049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    return true;
478049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  } else if (left.first_char > right.first_char) {
488049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    return false;
498049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  } else if (left.last_char < right.last_char) {
508049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    return true;
51cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
528049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  return false;
538049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski}
54cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
558049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskiinline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
568049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  return UnifiedSpan{*span.name, span.first_char, span.last_char};
578049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski}
588049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
598049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskiinline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
608049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  return UnifiedSpan{
618049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
628049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski}
63393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
648049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski// Merges the Span and UntranslatableSections of this StyledString into a single vector of
658049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
668049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskistatic std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
678049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Ensure the Spans are sorted and converted.
688049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::vector<UnifiedSpan> sorted_spans;
698049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  sorted_spans.reserve(string.value->spans.size());
708049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::transform(string.value->spans.begin(), string.value->spans.end(),
718049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
728049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
738049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Stable sort to ensure tag sequences like "<b><i>" are preserved.
748049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::stable_sort(sorted_spans.begin(), sorted_spans.end());
758049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
768049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Ensure the UntranslatableSections are sorted and converted.
778049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::vector<UnifiedSpan> sorted_untranslatable_sections;
788049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
798049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
808049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                 std::back_inserter(sorted_untranslatable_sections),
818049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                 UntranslatableSectionToUnifiedSpan);
828049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
838049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
848049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::vector<UnifiedSpan> merged_spans;
858049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
868049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  auto span_iter = sorted_spans.begin();
878049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  auto untranslatable_iter = sorted_untranslatable_sections.begin();
888049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  while (span_iter != sorted_spans.end() &&
898049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski         untranslatable_iter != sorted_untranslatable_sections.end()) {
908049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    if (*span_iter < *untranslatable_iter) {
918049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      merged_spans.push_back(std::move(*span_iter));
928049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      ++span_iter;
93cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    } else {
948049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      merged_spans.push_back(std::move(*untranslatable_iter));
958049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      ++untranslatable_iter;
967542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    }
977542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski  }
987542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
998049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  while (span_iter != sorted_spans.end()) {
1008049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    merged_spans.push_back(std::move(*span_iter));
1018049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    ++span_iter;
1028049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  }
1037542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
1048049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  while (untranslatable_iter != sorted_untranslatable_sections.end()) {
1058049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    merged_spans.push_back(std::move(*untranslatable_iter));
1068049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    ++untranslatable_iter;
107cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
1088049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  return merged_spans;
1098049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski}
110393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
1118049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskistd::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
1128049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                                         Pseudolocalizer::Method method,
1138049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                                         StringPool* pool) {
1148049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  Pseudolocalizer localizer(method);
115393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
1168049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
1178049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // This will effectively subdivide the string into multiple sections that can be individually
1188049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // pseudolocalized, while keeping the span indices synchronized.
1198049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
120393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
1218049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
1228049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // runtime. So we will do all our processing in UTF-16, then convert back.
1238049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);
124393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
1258049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Convenient wrapper around the text that allows us to work with StringPieces.
1268049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  const StringPiece16 text(text16);
1278049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1288049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The new string.
1298049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::string new_string = localizer.Start();
1308049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1318049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The stack that keeps track of what nested Span we're in.
1328049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  std::vector<size_t> span_stack;
1338049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1348049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The current position in the original text.
1358049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  uint32_t cursor = 0u;
1368049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1378049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // The current position in the new text.
1388049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
1398049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                             new_string.size(), false);
140393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
1418049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
1428049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  bool translatable = true;
1438049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  size_t span_idx = 0u;
1448049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  while (span_idx < merged_spans.size() || !span_stack.empty()) {
1458049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
1468049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
1478049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1488049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    if (span != nullptr) {
1498049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      if (parent_span == nullptr || parent_span->last_char > span->first_char) {
1508049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        // There is no parent, or this span is the child of the parent.
1518049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        // Pseudolocalize all the text until this span.
1528049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
1538049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        cursor += substr.size();
1548049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1558049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        // Pseudolocalize the substring.
1568049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        std::string new_substr = util::Utf16ToUtf8(substr);
1578049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        if (translatable) {
1588049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          new_substr = localizer.Text(new_substr);
1598049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        }
1608049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
1618049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                           new_substr.size(), false);
1628049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        new_string += new_substr;
1638049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1648049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        // Rewrite the first_char.
1658049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        span->first_char = new_cursor;
1668049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        if (!span->tag) {
1678049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          // An untranslatable section has begun!
1688049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          translatable = false;
1698049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        }
1708049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        span_stack.push_back(span_idx);
1718049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        ++span_idx;
1728049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        continue;
1738049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      }
1747542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    }
1757542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
1768049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    if (parent_span != nullptr) {
1778049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      // There is a parent, and either this span is not a child of it, or there are no more spans.
1788049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      // Pop this off the stack.
1798049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
1808049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      cursor += substr.size();
1818049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1828049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      // Pseudolocalize the substring.
1838049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      std::string new_substr = util::Utf16ToUtf8(substr);
1848049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      if (translatable) {
1858049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        new_substr = localizer.Text(new_substr);
1868049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      }
1878049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
1888049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                         new_substr.size(), false);
1898049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      new_string += new_substr;
1908049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
1918049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      parent_span->last_char = new_cursor - 1;
1928049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      if (parent_span->tag) {
1938049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        // An end to an untranslatable section.
1948049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        translatable = true;
1958049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      }
1968049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      span_stack.pop_back();
1977542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    }
198cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
199cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
2008049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Finish the pseudolocalization at the end of the string.
2018049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
2028049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  new_string += localizer.End();
2038049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski
2048049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  StyleString localized;
2058049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  localized.str = std::move(new_string);
206393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
2078049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
2088049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  for (UnifiedSpan& span : merged_spans) {
2098049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    if (span.tag) {
2108049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski      localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
2118049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    }
2128049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  }
2137542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski  return util::make_unique<StyledString>(pool->MakeRef(localized));
214393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}
215393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
216393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskinamespace {
217393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
218ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinskiclass Visitor : public RawValueVisitor {
219ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski public:
220cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  // Either value or item will be populated upon visiting the value.
221ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  std::unique_ptr<Value> value;
222ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  std::unique_ptr<Item> item;
223cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
224cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  Visitor(StringPool* pool, Pseudolocalizer::Method method)
225ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski      : pool_(pool), method_(method), localizer_(method) {}
226cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
227ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  void Visit(Plural* plural) override {
228cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    std::unique_ptr<Plural> localized = util::make_unique<Plural>();
229cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    for (size_t i = 0; i < plural->values.size(); i++) {
230ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski      Visitor sub_visitor(pool_, method_);
231cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      if (plural->values[i]) {
232ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski        plural->values[i]->Accept(&sub_visitor);
233ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski        if (sub_visitor.value) {
234ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski          localized->values[i] = std::move(sub_visitor.item);
235cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski        } else {
2368049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
237393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski        }
238cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      }
239393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski    }
240ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized->SetSource(plural->GetSource());
241ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized->SetWeak(true);
242ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    value = std::move(localized);
243cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
244cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
245ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  void Visit(String* string) override {
2467542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    const StringPiece original_string = *string->value;
2477542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    std::string result = localizer_.Start();
2487542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
2497542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    // Pseudolocalize only the translatable sections.
2507542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    size_t start = 0u;
2517542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    for (const UntranslatableSection& section : string->untranslatable_sections) {
2527542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      // Pseudolocalize the content before the untranslatable section.
2537542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      const size_t len = section.start - start;
2547542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      if (len > 0u) {
2557542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski        result += localizer_.Text(original_string.substr(start, len));
2567542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      }
2577542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
2587542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      // Copy the untranslatable content.
2597542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      result += original_string.substr(section.start, section.end - section.start);
2607542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      start = section.end;
2617542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    }
2627542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
2637542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    // Pseudolocalize the content after the last untranslatable section.
2647542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    if (start != original_string.size()) {
2657542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      const size_t len = original_string.size() - start;
2667542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski      result += localizer_.Text(original_string.substr(start, len));
2677542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    }
2687542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    result += localizer_.End();
2697542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski
2708049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski    std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
271ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized->SetSource(string->GetSource());
272ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized->SetWeak(true);
273ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    item = std::move(localized);
274cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
275cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
276ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  void Visit(StyledString* string) override {
277ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    item = PseudolocalizeStyledString(string, method_, pool_);
2787542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski    item->SetSource(string->GetSource());
279ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    item->SetWeak(true);
280cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
281ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski
282ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski private:
283ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  DISALLOW_COPY_AND_ASSIGN(Visitor);
284ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski
285ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  StringPool* pool_;
286ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  Pseudolocalizer::Method method_;
287ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  Pseudolocalizer localizer_;
288393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski};
289393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
290ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam LesinskiConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
291393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski                                              Pseudolocalizer::Method m) {
292cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  ConfigDescription modified = base;
293cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  switch (m) {
294393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski    case Pseudolocalizer::Method::kAccent:
295cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.language[0] = 'e';
296cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.language[1] = 'n';
297cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.country[0] = 'X';
298cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.country[1] = 'A';
299cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      break;
300393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
301393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski    case Pseudolocalizer::Method::kBidi:
302cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.language[0] = 'a';
303cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.language[1] = 'r';
304cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.country[0] = 'X';
305cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      modified.country[1] = 'B';
306cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      break;
307393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski    default:
308cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      break;
309cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
310cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  return modified;
311393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}
312393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
313ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinskivoid PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
314ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski                            ResourceConfigValue* original_value,
315cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski                            StringPool* pool, ResourceEntry* entry) {
316cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  Visitor visitor(pool, method);
317ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  original_value->value->Accept(&visitor);
318cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
319ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  std::unique_ptr<Value> localized_value;
320ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  if (visitor.value) {
321ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized_value = std::move(visitor.value);
322ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  } else if (visitor.item) {
323ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    localized_value = std::move(visitor.item);
324cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
325cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
326ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  if (!localized_value) {
327cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    return;
328cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
329cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
330ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  ConfigDescription config_with_accent =
331ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski      ModifyConfigForPseudoLocale(original_value->config, method);
332cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
333ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  ResourceConfigValue* new_config_value =
334ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski      entry->FindOrCreateValue(config_with_accent, original_value->product);
335ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski  if (!new_config_value->value) {
336cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    // Only use auto-generated pseudo-localization if none is defined.
337ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinski    new_config_value->value = std::move(localized_value);
338cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
339393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}
340393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
3418049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
3428049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski// translatable.
343ce5e56e243d262a9b65459c3bd0bb9eaadd40628Adam Lesinskistatic bool IsPseudolocalizable(ResourceConfigValue* config_value) {
3448049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski  const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
345cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  if (diff & ConfigDescription::CONFIG_LOCALE) {
346cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    return false;
347cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
3487542162cb1b1fd2ce8a26dd7f3fedc8de8160d38Adam Lesinski  return config_value->value->IsTranslatable();
349458b877488c12ea4336d8fc00a95d9c0298bd6d0Adam Lesinski}
350458b877488c12ea4336d8fc00a95d9c0298bd6d0Adam Lesinski
351cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski}  // namespace
352cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski
3538049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinskibool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
354cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  for (auto& package : table->packages) {
355cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski    for (auto& type : package->types) {
356cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      for (auto& entry : type->entries) {
3578049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski        std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
358cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski        for (ResourceConfigValue* value : values) {
3598049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
3608049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                 entry.get());
3618049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
3628049f3da712ea9c3154b57ce2276c97e749d1f2cAdam Lesinski                                 entry.get());
363393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski        }
364cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski      }
365393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski    }
366cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  }
367cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski  return true;
368393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}
369393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski
370cacb28f2d60858106e2819cc7d95a65e8bda890bAdam Lesinski}  // namespace aapt
371