1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file contains UTF8 strings that we want as char arrays.  To avoid
6// different compilers, we use a script to convert the UTF8 strings into
7// numeric literals (\x##).
8
9#include "components/autofill/core/browser/autofill_regex_constants.h"
10
11namespace autofill {
12
13/////////////////////////////////////////////////////////////////////////////
14// address_field.cc
15/////////////////////////////////////////////////////////////////////////////
16const char kAttentionIgnoredRe[] = "attention|attn";
17const char kRegionIgnoredRe[] =
18    "province|region|other"
19    "|provincia"  // es
20    "|bairro|suburb";  // pt-BR, pt-PT
21const char kCompanyRe[] =
22    "company|business|organization|organisation"
23    "|firma|firmenname"  // de-DE
24    "|empresa"  // es
25    "|societe|société"  // fr-FR
26    "|ragione.?sociale"  // it-IT
27    "|会社"  // ja-JP
28    "|название.?компании"  // ru
29    "|单位|公司"  // zh-CN
30    "|회사|직장";  // ko-KR
31const char kAddressLine1Re[] =
32    "address.*line|address1|addr1|street"
33    "|strasse|straße|hausnummer|housenumber"  // de-DE
34    "|house.?name"  // en-GB
35    "|direccion|dirección"  // es
36    "|adresse"  // fr-FR
37    "|indirizzo"  // it-IT
38    "|住所1"  // ja-JP
39    "|morada|endereço"  // pt-BR, pt-PT
40    "|Адрес"  // ru
41    "|地址"  // zh-CN
42    "|주소.?1";  // ko-KR
43const char kAddressLine1LabelRe[] =
44    "address"
45    "|adresse"  // fr-FR
46    "|indirizzo"  // it-IT
47    "|住所"  // ja-JP
48    "|地址"  // zh-CN
49    "|주소";  // ko-KR
50const char kAddressLine2Re[] =
51    "address.*line2|address2|addr2|street|suite|unit"
52    "|adresszusatz|ergänzende.?angaben"  // de-DE
53    "|direccion2|colonia|adicional"  // es
54    "|addresssuppl|complementnom|appartement"  // fr-FR
55    "|indirizzo2"  // it-IT
56    "|住所2"  // ja-JP
57    "|complemento|addrcomplement"  // pt-BR, pt-PT
58    "|Улица"  // ru
59    "|地址2"  // zh-CN
60    "|주소.?2";  // ko-KR
61const char kAddressLine2LabelRe[] =
62    "address"
63    "|adresse"  // fr-FR
64    "|indirizzo"  // it-IT
65    "|地址"  // zh-CN
66    "|주소";  // ko-KR
67const char kAddressLinesExtraRe[] =
68    "address.*line[3-9]|address[3-9]|addr[3-9]|street|line[3-9]"
69    "|municipio"  // es
70    "|batiment|residence"  // fr-FR
71    "|indirizzo[3-9]";  // it-IT
72const char kCountryRe[] =
73    "country|countries|location"
74    "|país|pais"  // es
75    "|国"  // ja-JP
76    "|国家"  // zh-CN
77    "|국가|나라";  // ko-KR
78const char kZipCodeRe[] =
79    "zip|postal|post.*code|pcode"
80    "|pin.?code"  // en-IN
81    "|postleitzahl"  // de-DE
82    "|\\bcp\\b"  // es
83    "|\\bcdp\\b"  // fr-FR
84    "|\\bcap\\b"  // it-IT
85    "|郵便番号"  // ja-JP
86    "|codigo|codpos|\\bcep\\b"  // pt-BR, pt-PT
87    "|Почтовый.?Индекс"  // ru
88    "|邮政编码|邮编"  // zh-CN
89    "|郵遞區號"  // zh-TW
90    "|우편.?번호";  // ko-KR
91const char kZip4Re[] =
92    "zip|^-$|post2"
93    "|codpos2";  // pt-BR, pt-PT
94const char kCityRe[] =
95    "city|town"
96    "|\\bort\\b|stadt"  // de-DE
97    "|suburb"  // en-AU
98    "|ciudad|provincia|localidad|poblacion"  // es
99    "|ville|commune"  // fr-FR
100    "|localita"  // it-IT
101    "|市区町村"  // ja-JP
102    "|cidade"  // pt-BR, pt-PT
103    "|Город"  // ru
104    "|市"  // zh-CN
105    "|分區"  // zh-TW
106    "|^시[^도·・]|시[·・]?군[·・]?구";  // ko-KR
107const char kStateRe[] =
108    "(?<!united )state|county|region|province"
109    "|land"  // de-DE
110    "|county|principality"  // en-UK
111    "|都道府県"  // ja-JP
112    "|estado|provincia"  // pt-BR, pt-PT
113    "|область"  // ru
114    "|省"  // zh-CN
115    "|地區"  // zh-TW
116    "|^시[·・]?도";  // ko-KR
117
118/////////////////////////////////////////////////////////////////////////////
119// credit_card_field.cc
120/////////////////////////////////////////////////////////////////////////////
121const char kNameOnCardRe[] =
122    "card.?holder|name.*\\bon\\b.*card|cc.?name|cc.?full.?name|owner"
123    "|karteninhaber"  // de-DE
124    "|nombre.*tarjeta"  // es
125    "|nom.*carte"  // fr-FR
126    "|nome.*cart"  // it-IT
127    "|名前"  // ja-JP
128    "|Имя.*карты"  // ru
129    "|信用卡开户名|开户名|持卡人姓名"  // zh-CN
130    "|持卡人姓名";  // zh-TW
131const char kNameOnCardContextualRe[] =
132    "name";
133const char kCardNumberRe[] =
134    "card.?number|card.?#|card.?no|cc.?num|acct.?num"
135    "|nummer"  // de-DE
136    "|credito|numero|número"  // es
137    "|numéro"  // fr-FR
138    "|カード番号"  // ja-JP
139    "|Номер.*карты"  // ru
140    "|信用卡号|信用卡号码"  // zh-CN
141    "|信用卡卡號"  // zh-TW
142    "|카드";  // ko-KR
143const char kCardCvcRe[] =
144    "verification|card identification|security code|cvn|cvv|cvc|csc|\\bcid\\b";
145const char kCardTypeRe[] =
146    "card.?type|cc.?type|payment.?method";
147
148// "Expiration date" is the most common label here, but some pages have
149// "Expires", "exp. date" or "exp. month" and "exp. year".  We also look
150// for the field names ccmonth and ccyear, which appear on at least 4 of
151// our test pages.
152
153// On at least one page (The China Shop2.html) we find only the labels
154// "month" and "year".  So for now we match these words directly; we'll
155// see if this turns out to be too general.
156
157// Toolbar Bug 51451: indeed, simply matching "month" is too general for
158//   https://rps.fidelity.com/ftgw/rps/RtlCust/CreatePIN/Init.
159// Instead, we match only words beginning with "month".
160const char kExpirationMonthRe[] =
161    "expir|exp.*mo|exp.*date|ccmonth|cardmonth"
162    "|gueltig|gültig|monat"  // de-DE
163    "|fecha"  // es
164    "|date.*exp"  // fr-FR
165    "|scadenza"  // it-IT
166    "|有効期限"  // ja-JP
167    "|validade"  // pt-BR, pt-PT
168    "|Срок действия карты"  // ru
169    "|月";  // zh-CN
170const char kExpirationYearRe[] =
171    "exp|^/|year"
172    "|ablaufdatum|gueltig|gültig|yahr"  // de-DE
173    "|fecha"  // es
174    "|scadenza"  // it-IT
175    "|有効期限"  // ja-JP
176    "|validade"  // pt-BR, pt-PT
177    "|Срок действия карты"  // ru
178    "|年|有效期";  // zh-CN
179
180// This regex is a little bit nasty, but it is simply requiring exactly two
181// adjacent y's.
182const char kExpirationDate2DigitYearRe[] =
183    "exp.*date.*[^y]yy([^y]|$)";
184const char kExpirationDateRe[] =
185    "expir|exp.*date"
186    "|gueltig|gültig"  // de-DE
187    "|fecha"  // es
188    "|date.*exp"  // fr-FR
189    "|scadenza"  // it-IT
190    "|有効期限"  // ja-JP
191    "|validade"  // pt-BR, pt-PT
192    "|Срок действия карты";  // ru
193const char kCardIgnoredRe[] =
194    "^card";
195const char kGiftCardRe[] =
196    "gift.?card";
197
198
199/////////////////////////////////////////////////////////////////////////////
200// email_field.cc
201/////////////////////////////////////////////////////////////////////////////
202const char kEmailRe[] =
203    "e.?mail"
204    "|courriel"  // fr
205    "|メールアドレス"  // ja-JP
206    "|Электронной.?Почты"  // ru
207    "|邮件|邮箱"  // zh-CN
208    "|電郵地址"  // zh-TW
209    "|(이메일|전자.?우편|[Ee]-?mail)(.?주소)?";  // ko-KR
210
211
212/////////////////////////////////////////////////////////////////////////////
213// name_field.cc
214/////////////////////////////////////////////////////////////////////////////
215const char kNameIgnoredRe[] =
216    "user.?name|user.?id|nickname|maiden name|title|prefix|suffix"
217    "|vollständiger.?name"  // de-DE
218    "|用户名"  // zh-CN
219    "|(사용자.?)?아이디|사용자.?ID";  // ko-KR
220const char kNameRe[] =
221    "^name|full.?name|your.?name|customer.?name|firstandlastname|bill.?name"
222        "|ship.?name"
223    "|nombre.*y.*apellidos"  // es
224    "|^nom"  // fr-FR
225    "|お名前|氏名"  // ja-JP
226    "|^nome"  // pt-BR, pt-PT
227    "|姓名"  // zh-CN
228    "|성명";  // ko-KR
229const char kNameSpecificRe[] =
230    "^name"
231    "|^nom"  // fr-FR
232    "|^nome";  // pt-BR, pt-PT
233const char kFirstNameRe[] =
234    "first.*name|initials|fname|first$"
235    "|vorname"  // de-DE
236    "|nombre"  // es
237    "|forename|prénom|prenom"  // fr-FR
238    "|名"  // ja-JP
239    "|nome"  // pt-BR, pt-PT
240    "|Имя"  // ru
241    "|이름";  // ko-KR
242const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b";
243const char kMiddleNameRe[] =
244    "middle.*name|mname|middle$"
245    "|apellido.?materno|lastlastname";  // es
246const char kLastNameRe[] =
247    "last.*name|lname|surname|last$|secondname"
248    "|nachname"  // de-DE
249    "|apellido"  // es
250    "|famille|^nom"  // fr-FR
251    "|cognome"  // it-IT
252    "|姓"  // ja-JP
253    "|morada|apelidos|surename|sobrenome"  // pt-BR, pt-PT
254    "|Фамилия"  // ru
255    "|성[^명]?";  // ko-KR
256
257/////////////////////////////////////////////////////////////////////////////
258// phone_field.cc
259/////////////////////////////////////////////////////////////////////////////
260const char kPhoneRe[] =
261    "phone|mobile"
262    "|telefonnummer"  // de-DE
263    "|telefono|teléfono"  // es
264    "|telfixe"  // fr-FR
265    "|電話"  // ja-JP
266    "|telefone|telemovel"  // pt-BR, pt-PT
267    "|телефон"  // ru
268    "|电话"  // zh-CN
269    "|(전화|핸드폰|휴대폰|휴대전화)(.?번호)?";  // ko-KR
270const char kCountryCodeRe[] =
271    "country.*code|ccode|_cc";
272const char kAreaCodeNotextRe[] =
273    "^\\($";
274const char kAreaCodeRe[] =
275    "area.*code|acode|area"
276    "|지역.?번호";  // ko-KR
277const char kPhonePrefixSeparatorRe[] =
278    "^-$|^\\)$";
279const char kPhoneSuffixSeparatorRe[] =
280    "^-$";
281const char kPhonePrefixRe[] =
282    "prefix|exchange"
283    "|preselection"  // fr-FR
284    "|ddd";  // pt-BR, pt-PT
285const char kPhoneSuffixRe[] =
286    "suffix";
287const char kPhoneExtensionRe[] =
288    "\\bext|ext\\b|extension"
289    "|ramal";  // pt-BR, pt-PT
290
291}  // namespace autofill
292