1c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// Copyright (C) 2014 Google Inc. 2c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// 3c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// Licensed under the Apache License, Version 2.0 (the "License"); 4c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// you may not use this file except in compliance with the License. 5c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// You may obtain a copy of the License at 6c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// 7c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// http://www.apache.org/licenses/LICENSE-2.0 8c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// 9c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// Unless required by applicable law or agreed to in writing, software 10c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// distributed under the License is distributed on an "AS IS" BASIS, 11c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// See the License for the specific language governing permissions and 13c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// limitations under the License. 14c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 15c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include "post_box_matchers.h" 16c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 17c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include <cstddef> 18c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include <map> 19c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include <string> 20c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include <utility> 21c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include <vector> 22c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 239f47fe3ed525accac995b095d408a825673a2ee1roubert@google.com#include <re2/re2.h> 249f47fe3ed525accac995b095d408a825673a2ee1roubert@google.com 25947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include "language.h" 26c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com#include "rule.h" 27ea5e77397b59d94af3591644c3dc23dc6f9ba22eroubert@google.com#include "util/re2ptr.h" 28ea5e77397b59d94af3591644c3dc23dc6f9ba22eroubert@google.com 29c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.comnamespace i18n { 30c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.comnamespace addressinput { 31c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 32c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.comnamespace { 33c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 34c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.comstd::map<std::string, const RE2ptr*> InitMatchers() { 35c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com static const struct { 36c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com const char* const language; 37c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com const RE2ptr ptr; 38c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com } kMatchers[] = { 39c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "ar", 40c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "صندوق بريد|ص[-. ]ب" */ 41c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("\xD8\xB5\xD9\x86\xD8\xAF\xD9\x88\xD9\x82 " 42c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com "\xD8\xA8\xD8\xB1\xD9\x8A\xD8\xAF|\xD8\xB5[-. ]\xD8\xA8") }, 43c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 44c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "cs", new RE2("(?i)p\\.? ?p\\.? \\d") }, 45c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "da", new RE2("(?i)Postboks") }, 46c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "de", new RE2("(?i)Postfach") }, 47c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 48c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "el", 49c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "T\\.? ?Θ\\.? \\d{2}" */ 50c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)T\\.? ?\xCE\x98\\.? \\d{2}") }, 51c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 52c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "en", new RE2("Private Bag|Post(?:al)? Box") }, 53c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "es", new RE2("(?i)(?:Apartado|Casillas) de correos?") }, 54c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "fi", new RE2("(?i)Postilokero|P\\.?L\\.? \\d") }, 55c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "hr", new RE2("(?i)p\\.? ?p\\.? \\d") }, 56c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 57c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "hu", 58c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "Postafi(?:[oó]|ó)k|Pf\\.? \\d" */ 59c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)Postafi(?:[o\xC3\xB3]|o\xCC\x81)k|Pf\\.? \\d") }, 60c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 61c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "fr", 62c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "Bo(?:[iî]|î)te Postale|BP \\d|CEDEX \\d" */ 63c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)Bo(?:[i\xC3\xAE]|i\xCC\x82)te Postale|BP \\d|CEDEX \\d") }, 64c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 65c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "ja", 66c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "私書箱\\d{1,5}号" */ 67c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)\xE7\xA7\x81\xE6\x9B\xB8\xE7\xAE\xB1\\d{1,5}\xE5\x8F\xB7") }, 68c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 69c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "nl", new RE2("(?i)Postbus") }, 70c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "no", new RE2("(?i)Postboks") }, 71c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "pl", new RE2("(?i)Skr(?:\\.?|ytka) poczt(?:\\.?|owa)") }, 72c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "pt", new RE2("(?i)Apartado") }, 73c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 74c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "ru", 75c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "абонентский ящик|[аa]\\\" */ 76c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)\xD0\xB0\xD0\xB1\xD0\xBE\xD0\xBD\xD0\xB5\xD0\xBD\xD1\x82\xD1" 77c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com "\x81\xD0\xBA\xD0\xB8\xD0\xB9 \xD1\x8F\xD1\x89\xD0\xB8\xD0\xBA|" 78c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com "[\xD0\xB0""a]\\\"\xD1\x8F (?:(?:\xE2\x84\x96|#|N) ?)?\\d") }, 79c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 80c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "sv", new RE2("(?i)Box \\d") }, 81c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 82c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "zh", 83c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com /* "郵政信箱.{1,5}號|郵局第.{1,10}號信箱" */ 84c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com new RE2("(?i)\xE9\x83\xB5\xE6\x94\xBF\xE4\xBF\xA1\xE7\xAE\xB1.{1,5}" 85c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com "\xE8\x99\x9F|\xE9\x83\xB5\xE5\xB1\x80\xE7\xAC\xAC.{1,10}" 86c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com "\xE8\x99\x9F\xE4\xBF\xA1\xE7\xAE\xB1") }, 87c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 88c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com { "und", new RE2("P\\.? ?O\\.? Box") } 89c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com }; 90c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 91c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com std::map<std::string, const RE2ptr*> matchers; 92c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 93c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com for (size_t i = 0; i < sizeof kMatchers / sizeof *kMatchers; ++i) { 94c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com matchers.insert(std::make_pair(kMatchers[i].language, &kMatchers[i].ptr)); 95c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com } 96c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 97c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com return matchers; 98c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com} 99c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 100000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com} // namespace 101c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 102c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com// static 103c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.comstd::vector<const RE2ptr*> PostBoxMatchers::GetMatchers( 104c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com const Rule& country_rule) { 105c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com static const std::map<std::string, const RE2ptr*> kMatchers(InitMatchers()); 106c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 107c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com // Always add any expressions defined for "und" (English-like defaults). 108c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com std::vector<std::string> languages(1, "und"); 109c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com for (std::vector<std::string>::const_iterator 110c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com it = country_rule.GetLanguages().begin(); 111c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com it != country_rule.GetLanguages().end(); ++it) { 112947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org Language language(*it); 113947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org languages.push_back(language.base); 114c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com } 115c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 116c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com std::vector<const RE2ptr*> result; 117c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 118c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com for (std::vector<std::string>::const_iterator 119c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com it = languages.begin(); 120c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com it != languages.end(); ++it) { 121c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com std::map<std::string, const RE2ptr*>::const_iterator 122c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com jt = kMatchers.find(*it); 123c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com if (jt != kMatchers.end()) { 124c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com result.push_back(jt->second); 125c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com } 126c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com } 127c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 128c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com return result; 129c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com} 130c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com 131c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com} // namespace addressinput 132c3c546166647eeda301711d0ee83d4f7187f3a9croubert@google.com} // namespace i18n 133