1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "compile/PseudolocaleGenerator.h"
18
19#include <algorithm>
20
21#include "ResourceTable.h"
22#include "ResourceValues.h"
23#include "ValueVisitor.h"
24#include "compile/Pseudolocalizer.h"
25#include "util/Util.h"
26
27using android::StringPiece;
28using android::StringPiece16;
29
30namespace aapt {
31
32// The struct that represents both Span objects and UntranslatableSections.
33struct UnifiedSpan {
34  // Only present for Span objects. If not present, this was an UntranslatableSection.
35  Maybe<std::string> tag;
36
37  // The UTF-16 index into the string where this span starts.
38  uint32_t first_char;
39
40  // The UTF-16 index into the string where this span ends, inclusive.
41  uint32_t last_char;
42};
43
44inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
45  if (left.first_char < right.first_char) {
46    return true;
47  } else if (left.first_char > right.first_char) {
48    return false;
49  } else if (left.last_char < right.last_char) {
50    return true;
51  }
52  return false;
53}
54
55inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
56  return UnifiedSpan{*span.name, span.first_char, span.last_char};
57}
58
59inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
60  return UnifiedSpan{
61      {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
62}
63
64// Merges the Span and UntranslatableSections of this StyledString into a single vector of
65// UnifiedSpans. This will first check that the Spans are sorted in ascending order.
66static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
67  // Ensure the Spans are sorted and converted.
68  std::vector<UnifiedSpan> sorted_spans;
69  sorted_spans.reserve(string.value->spans.size());
70  std::transform(string.value->spans.begin(), string.value->spans.end(),
71                 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
72
73  // Stable sort to ensure tag sequences like "<b><i>" are preserved.
74  std::stable_sort(sorted_spans.begin(), sorted_spans.end());
75
76  // Ensure the UntranslatableSections are sorted and converted.
77  std::vector<UnifiedSpan> sorted_untranslatable_sections;
78  sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
79  std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
80                 std::back_inserter(sorted_untranslatable_sections),
81                 UntranslatableSectionToUnifiedSpan);
82  std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
83
84  std::vector<UnifiedSpan> merged_spans;
85  merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
86  auto span_iter = sorted_spans.begin();
87  auto untranslatable_iter = sorted_untranslatable_sections.begin();
88  while (span_iter != sorted_spans.end() &&
89         untranslatable_iter != sorted_untranslatable_sections.end()) {
90    if (*span_iter < *untranslatable_iter) {
91      merged_spans.push_back(std::move(*span_iter));
92      ++span_iter;
93    } else {
94      merged_spans.push_back(std::move(*untranslatable_iter));
95      ++untranslatable_iter;
96    }
97  }
98
99  while (span_iter != sorted_spans.end()) {
100    merged_spans.push_back(std::move(*span_iter));
101    ++span_iter;
102  }
103
104  while (untranslatable_iter != sorted_untranslatable_sections.end()) {
105    merged_spans.push_back(std::move(*untranslatable_iter));
106    ++untranslatable_iter;
107  }
108  return merged_spans;
109}
110
111std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
112                                                         Pseudolocalizer::Method method,
113                                                         StringPool* pool) {
114  Pseudolocalizer localizer(method);
115
116  // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
117  // This will effectively subdivide the string into multiple sections that can be individually
118  // pseudolocalized, while keeping the span indices synchronized.
119  std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
120
121  // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
122  // runtime. So we will do all our processing in UTF-16, then convert back.
123  const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);
124
125  // Convenient wrapper around the text that allows us to work with StringPieces.
126  const StringPiece16 text(text16);
127
128  // The new string.
129  std::string new_string = localizer.Start();
130
131  // The stack that keeps track of what nested Span we're in.
132  std::vector<size_t> span_stack;
133
134  // The current position in the original text.
135  uint32_t cursor = 0u;
136
137  // The current position in the new text.
138  uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
139                                             new_string.size(), false);
140
141  // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
142  bool translatable = true;
143  size_t span_idx = 0u;
144  while (span_idx < merged_spans.size() || !span_stack.empty()) {
145    UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
146    UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
147
148    if (span != nullptr) {
149      if (parent_span == nullptr || parent_span->last_char > span->first_char) {
150        // There is no parent, or this span is the child of the parent.
151        // Pseudolocalize all the text until this span.
152        const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
153        cursor += substr.size();
154
155        // Pseudolocalize the substring.
156        std::string new_substr = util::Utf16ToUtf8(substr);
157        if (translatable) {
158          new_substr = localizer.Text(new_substr);
159        }
160        new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
161                                           new_substr.size(), false);
162        new_string += new_substr;
163
164        // Rewrite the first_char.
165        span->first_char = new_cursor;
166        if (!span->tag) {
167          // An untranslatable section has begun!
168          translatable = false;
169        }
170        span_stack.push_back(span_idx);
171        ++span_idx;
172        continue;
173      }
174    }
175
176    if (parent_span != nullptr) {
177      // There is a parent, and either this span is not a child of it, or there are no more spans.
178      // Pop this off the stack.
179      const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
180      cursor += substr.size();
181
182      // Pseudolocalize the substring.
183      std::string new_substr = util::Utf16ToUtf8(substr);
184      if (translatable) {
185        new_substr = localizer.Text(new_substr);
186      }
187      new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
188                                         new_substr.size(), false);
189      new_string += new_substr;
190
191      parent_span->last_char = new_cursor - 1;
192      if (parent_span->tag) {
193        // An end to an untranslatable section.
194        translatable = true;
195      }
196      span_stack.pop_back();
197    }
198  }
199
200  // Finish the pseudolocalization at the end of the string.
201  new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
202  new_string += localizer.End();
203
204  StyleString localized;
205  localized.str = std::move(new_string);
206
207  // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
208  for (UnifiedSpan& span : merged_spans) {
209    if (span.tag) {
210      localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
211    }
212  }
213  return util::make_unique<StyledString>(pool->MakeRef(localized));
214}
215
216namespace {
217
218class Visitor : public RawValueVisitor {
219 public:
220  // Either value or item will be populated upon visiting the value.
221  std::unique_ptr<Value> value;
222  std::unique_ptr<Item> item;
223
224  Visitor(StringPool* pool, Pseudolocalizer::Method method)
225      : pool_(pool), method_(method), localizer_(method) {}
226
227  void Visit(Plural* plural) override {
228    std::unique_ptr<Plural> localized = util::make_unique<Plural>();
229    for (size_t i = 0; i < plural->values.size(); i++) {
230      Visitor sub_visitor(pool_, method_);
231      if (plural->values[i]) {
232        plural->values[i]->Accept(&sub_visitor);
233        if (sub_visitor.value) {
234          localized->values[i] = std::move(sub_visitor.item);
235        } else {
236          localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
237        }
238      }
239    }
240    localized->SetSource(plural->GetSource());
241    localized->SetWeak(true);
242    value = std::move(localized);
243  }
244
245  void Visit(String* string) override {
246    const StringPiece original_string = *string->value;
247    std::string result = localizer_.Start();
248
249    // Pseudolocalize only the translatable sections.
250    size_t start = 0u;
251    for (const UntranslatableSection& section : string->untranslatable_sections) {
252      // Pseudolocalize the content before the untranslatable section.
253      const size_t len = section.start - start;
254      if (len > 0u) {
255        result += localizer_.Text(original_string.substr(start, len));
256      }
257
258      // Copy the untranslatable content.
259      result += original_string.substr(section.start, section.end - section.start);
260      start = section.end;
261    }
262
263    // Pseudolocalize the content after the last untranslatable section.
264    if (start != original_string.size()) {
265      const size_t len = original_string.size() - start;
266      result += localizer_.Text(original_string.substr(start, len));
267    }
268    result += localizer_.End();
269
270    std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
271    localized->SetSource(string->GetSource());
272    localized->SetWeak(true);
273    item = std::move(localized);
274  }
275
276  void Visit(StyledString* string) override {
277    item = PseudolocalizeStyledString(string, method_, pool_);
278    item->SetSource(string->GetSource());
279    item->SetWeak(true);
280  }
281
282 private:
283  DISALLOW_COPY_AND_ASSIGN(Visitor);
284
285  StringPool* pool_;
286  Pseudolocalizer::Method method_;
287  Pseudolocalizer localizer_;
288};
289
290ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
291                                              Pseudolocalizer::Method m) {
292  ConfigDescription modified = base;
293  switch (m) {
294    case Pseudolocalizer::Method::kAccent:
295      modified.language[0] = 'e';
296      modified.language[1] = 'n';
297      modified.country[0] = 'X';
298      modified.country[1] = 'A';
299      break;
300
301    case Pseudolocalizer::Method::kBidi:
302      modified.language[0] = 'a';
303      modified.language[1] = 'r';
304      modified.country[0] = 'X';
305      modified.country[1] = 'B';
306      break;
307    default:
308      break;
309  }
310  return modified;
311}
312
313void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
314                            ResourceConfigValue* original_value,
315                            StringPool* pool, ResourceEntry* entry) {
316  Visitor visitor(pool, method);
317  original_value->value->Accept(&visitor);
318
319  std::unique_ptr<Value> localized_value;
320  if (visitor.value) {
321    localized_value = std::move(visitor.value);
322  } else if (visitor.item) {
323    localized_value = std::move(visitor.item);
324  }
325
326  if (!localized_value) {
327    return;
328  }
329
330  ConfigDescription config_with_accent =
331      ModifyConfigForPseudoLocale(original_value->config, method);
332
333  ResourceConfigValue* new_config_value =
334      entry->FindOrCreateValue(config_with_accent, original_value->product);
335  if (!new_config_value->value) {
336    // Only use auto-generated pseudo-localization if none is defined.
337    new_config_value->value = std::move(localized_value);
338  }
339}
340
341// A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
342// translatable.
343static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
344  const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
345  if (diff & ConfigDescription::CONFIG_LOCALE) {
346    return false;
347  }
348  return config_value->value->IsTranslatable();
349}
350
351}  // namespace
352
353bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
354  for (auto& package : table->packages) {
355    for (auto& type : package->types) {
356      for (auto& entry : type->entries) {
357        std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
358        for (ResourceConfigValue* value : values) {
359          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
360                                 entry.get());
361          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
362                                 entry.get());
363        }
364      }
365    }
366  }
367  return true;
368}
369
370}  // namespace aapt
371