PseudolocaleGenerator.cpp revision 7542162cb1b1fd2ce8a26dd7f3fedc8de8160d38
1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "compile/PseudolocaleGenerator.h"
18
19#include <algorithm>
20
21#include "ResourceTable.h"
22#include "ResourceValues.h"
23#include "ValueVisitor.h"
24#include "compile/Pseudolocalizer.h"
25
26using android::StringPiece;
27
28namespace aapt {
29
30std::unique_ptr<StyledString> PseudolocalizeStyledString(
31    StyledString* string, Pseudolocalizer::Method method, StringPool* pool) {
32  Pseudolocalizer localizer(method);
33
34  const StringPiece original_text = *string->value->str;
35
36  StyleString localized;
37
38  // Copy the spans. We will update their offsets when we localize.
39  localized.spans.reserve(string->value->spans.size());
40  for (const StringPool::Span& span : string->value->spans) {
41    localized.spans.push_back(
42        Span{*span.name, span.first_char, span.last_char});
43  }
44
45  // The ranges are all represented with a single value. This is the start of
46  // one range and end of another.
47  struct Range {
48    size_t start;
49
50    // If set to true, toggles the state of translatability.
51    bool toggle_translatability;
52
53    // Once the new string is localized, these are the pointers to the spans to adjust.
54    // Since this struct represents the start of one range and end of another,
55    // we have the two pointers respectively.
56    uint32_t* update_start;
57    uint32_t* update_end;
58  };
59
60  auto cmp = [](const Range& r, size_t index) -> bool {
61    return r.start < index;
62  };
63
64  // Construct the ranges. The ranges are represented like so: [0, 2, 5, 7]
65  // The ranges are the spaces in between. In this example, with a total string
66  // length of 9, the vector represents: (0,1], (2,4], (5,6], (7,9]
67  //
68  std::vector<Range> ranges;
69  ranges.push_back(Range{0, false, nullptr, nullptr});
70  ranges.push_back(Range{original_text.size() - 1, false, nullptr, nullptr});
71  for (size_t i = 0; i < string->value->spans.size(); i++) {
72    const StringPool::Span& span = string->value->spans[i];
73
74    // Insert or update the Range marker for the start of this span.
75    auto iter =
76        std::lower_bound(ranges.begin(), ranges.end(), span.first_char, cmp);
77    if (iter != ranges.end() && iter->start == span.first_char) {
78      iter->update_start = &localized.spans[i].first_char;
79    } else {
80      ranges.insert(iter, Range{span.first_char, false, &localized.spans[i].first_char, nullptr});
81    }
82
83    // Insert or update the Range marker for the end of this span.
84    iter = std::lower_bound(ranges.begin(), ranges.end(), span.last_char, cmp);
85    if (iter != ranges.end() && iter->start == span.last_char) {
86      iter->update_end = &localized.spans[i].last_char;
87    } else {
88      ranges.insert(iter, Range{span.last_char, false, nullptr, &localized.spans[i].last_char});
89    }
90  }
91
92  // Parts of the string may be untranslatable. Merge those ranges
93  // in as well, so that we have continuous sections of text to
94  // feed into the pseudolocalizer.
95  // We do this by marking the beginning of a range as either toggling
96  // the translatability state or not.
97  for (const UntranslatableSection& section : string->untranslatable_sections) {
98    auto iter = std::lower_bound(ranges.begin(), ranges.end(), section.start, cmp);
99    if (iter != ranges.end() && iter->start == section.start) {
100      // An existing span starts (or ends) here. We just need to mark that
101      // the translatability should toggle here. If translatability was
102      // already being toggled, then that means we have two adjacent ranges of untranslatable
103      // text, so remove the toggle and only toggle at the end of this range,
104      // effectively merging these ranges.
105      iter->toggle_translatability = !iter->toggle_translatability;
106    } else {
107      // Insert a new range that specifies to toggle the translatability.
108      iter = ranges.insert(iter, Range{section.start, true, nullptr, nullptr});
109    }
110
111    // Update/create an end to the untranslatable section.
112    iter = std::lower_bound(iter, ranges.end(), section.end, cmp);
113    if (iter != ranges.end() && iter->start == section.end) {
114      iter->toggle_translatability = true;
115    } else {
116      iter = ranges.insert(iter, Range{section.end, true, nullptr, nullptr});
117    }
118  }
119
120  localized.str += localizer.Start();
121
122  // Iterate over the ranges and localize each section.
123  // The text starts as translatable, and each time a range has toggle_translatability
124  // set to true, we toggle whether to translate or not.
125  // This assumes no untranslatable ranges overlap.
126  bool translatable = true;
127  for (size_t i = 0; i < ranges.size(); i++) {
128    const size_t start = ranges[i].start;
129    size_t len = original_text.size() - start;
130    if (i + 1 < ranges.size()) {
131      len = ranges[i + 1].start - start;
132    }
133
134    if (ranges[i].update_start) {
135      *ranges[i].update_start = localized.str.size();
136    }
137
138    if (ranges[i].update_end) {
139      *ranges[i].update_end = localized.str.size();
140    }
141
142    if (ranges[i].toggle_translatability) {
143      translatable = !translatable;
144    }
145
146    if (translatable) {
147      localized.str += localizer.Text(original_text.substr(start, len));
148    } else {
149      localized.str += original_text.substr(start, len);
150    }
151  }
152
153  localized.str += localizer.End();
154
155  return util::make_unique<StyledString>(pool->MakeRef(localized));
156}
157
158namespace {
159
160class Visitor : public RawValueVisitor {
161 public:
162  // Either value or item will be populated upon visiting the value.
163  std::unique_ptr<Value> value;
164  std::unique_ptr<Item> item;
165
166  Visitor(StringPool* pool, Pseudolocalizer::Method method)
167      : pool_(pool), method_(method), localizer_(method) {}
168
169  void Visit(Plural* plural) override {
170    std::unique_ptr<Plural> localized = util::make_unique<Plural>();
171    for (size_t i = 0; i < plural->values.size(); i++) {
172      Visitor sub_visitor(pool_, method_);
173      if (plural->values[i]) {
174        plural->values[i]->Accept(&sub_visitor);
175        if (sub_visitor.value) {
176          localized->values[i] = std::move(sub_visitor.item);
177        } else {
178          localized->values[i] =
179              std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
180        }
181      }
182    }
183    localized->SetSource(plural->GetSource());
184    localized->SetWeak(true);
185    value = std::move(localized);
186  }
187
188  void Visit(String* string) override {
189    const StringPiece original_string = *string->value;
190    std::string result = localizer_.Start();
191
192    // Pseudolocalize only the translatable sections.
193    size_t start = 0u;
194    for (const UntranslatableSection& section : string->untranslatable_sections) {
195      // Pseudolocalize the content before the untranslatable section.
196      const size_t len = section.start - start;
197      if (len > 0u) {
198        result += localizer_.Text(original_string.substr(start, len));
199      }
200
201      // Copy the untranslatable content.
202      result += original_string.substr(section.start, section.end - section.start);
203      start = section.end;
204    }
205
206    // Pseudolocalize the content after the last untranslatable section.
207    if (start != original_string.size()) {
208      const size_t len = original_string.size() - start;
209      result += localizer_.Text(original_string.substr(start, len));
210    }
211    result += localizer_.End();
212
213    std::unique_ptr<String> localized =
214        util::make_unique<String>(pool_->MakeRef(result));
215    localized->SetSource(string->GetSource());
216    localized->SetWeak(true);
217    item = std::move(localized);
218  }
219
220  void Visit(StyledString* string) override {
221    item = PseudolocalizeStyledString(string, method_, pool_);
222    item->SetSource(string->GetSource());
223    item->SetWeak(true);
224  }
225
226 private:
227  DISALLOW_COPY_AND_ASSIGN(Visitor);
228
229  StringPool* pool_;
230  Pseudolocalizer::Method method_;
231  Pseudolocalizer localizer_;
232};
233
234ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
235                                              Pseudolocalizer::Method m) {
236  ConfigDescription modified = base;
237  switch (m) {
238    case Pseudolocalizer::Method::kAccent:
239      modified.language[0] = 'e';
240      modified.language[1] = 'n';
241      modified.country[0] = 'X';
242      modified.country[1] = 'A';
243      break;
244
245    case Pseudolocalizer::Method::kBidi:
246      modified.language[0] = 'a';
247      modified.language[1] = 'r';
248      modified.country[0] = 'X';
249      modified.country[1] = 'B';
250      break;
251    default:
252      break;
253  }
254  return modified;
255}
256
257void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
258                            ResourceConfigValue* original_value,
259                            StringPool* pool, ResourceEntry* entry) {
260  Visitor visitor(pool, method);
261  original_value->value->Accept(&visitor);
262
263  std::unique_ptr<Value> localized_value;
264  if (visitor.value) {
265    localized_value = std::move(visitor.value);
266  } else if (visitor.item) {
267    localized_value = std::move(visitor.item);
268  }
269
270  if (!localized_value) {
271    return;
272  }
273
274  ConfigDescription config_with_accent =
275      ModifyConfigForPseudoLocale(original_value->config, method);
276
277  ResourceConfigValue* new_config_value =
278      entry->FindOrCreateValue(config_with_accent, original_value->product);
279  if (!new_config_value->value) {
280    // Only use auto-generated pseudo-localization if none is defined.
281    new_config_value->value = std::move(localized_value);
282  }
283}
284
285/**
286 * A value is pseudolocalizable if it does not define a locale (or is the
287 * default locale)
288 * and is translatable.
289 */
290static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
291  const int diff =
292      config_value->config.diff(ConfigDescription::DefaultConfig());
293  if (diff & ConfigDescription::CONFIG_LOCALE) {
294    return false;
295  }
296  return config_value->value->IsTranslatable();
297}
298
299}  // namespace
300
301bool PseudolocaleGenerator::Consume(IAaptContext* context,
302                                    ResourceTable* table) {
303  for (auto& package : table->packages) {
304    for (auto& type : package->types) {
305      for (auto& entry : type->entries) {
306        std::vector<ResourceConfigValue*> values =
307            entry->FindValuesIf(IsPseudolocalizable);
308
309        for (ResourceConfigValue* value : values) {
310          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value,
311                                 &table->string_pool, entry.get());
312          PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value,
313                                 &table->string_pool, entry.get());
314        }
315      }
316    }
317  }
318  return true;
319}
320
321}  // namespace aapt
322