1/*
2 * This file is part of the WebKit project.
3 *
4 * Copyright (C) 2009 Michelangelo De Simone <micdesim@gmail.com>
5 * Copyright (C) 2010 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB.  If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "core/html/forms/EmailInputType.h"
26
27#include "bindings/core/v8/ScriptRegexp.h"
28#include "core/InputTypeNames.h"
29#include "core/html/HTMLInputElement.h"
30#include "core/html/parser/HTMLParserIdioms.h"
31#include "core/page/Chrome.h"
32#include "core/page/ChromeClient.h"
33#include "platform/text/PlatformLocale.h"
34#include "public/platform/Platform.h"
35#include "wtf/PassOwnPtr.h"
36#include "wtf/text/StringBuilder.h"
37#include <unicode/idna.h>
38#include <unicode/unistr.h>
39
40namespace blink {
41
42using blink::WebLocalizedString;
43
44// http://www.whatwg.org/specs/web-apps/current-work/multipage/states-of-the-type-attribute.html#valid-e-mail-address
45static const char localPartCharacters[] = "abcdefghijklmnopqrstuvwxyz0123456789!#$%&'*+/=?^_`{|}~.-";
46static const char emailPattern[] =
47    "[a-z0-9!#$%&'*+/=?^_`{|}~.-]+" // local part
48    "@"
49    "[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?" // domain part
50    "(?:\\.[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?)*";
51
52// RFC5321 says the maximum total length of a domain name is 255 octets.
53static const int32_t maximumDomainNameLength = 255;
54// Use the same option as in url/url_canon_icu.cc
55static const int32_t idnaConversionOption = UIDNA_CHECK_BIDI;
56
57static String convertEmailAddressToASCII(const String& address)
58{
59    if (address.containsOnlyASCII())
60        return address;
61
62    size_t atPosition = address.find('@');
63    if (atPosition == kNotFound)
64        return address;
65
66    // UnicodeString ctor for copy-on-write does not work reliably (in debug
67    // build.) TODO(jshin): In an unlikely case this is a perf-issue, treat
68    // 8bit and non-8bit strings separately.
69    icu::UnicodeString idnDomainName(address.charactersWithNullTermination().data() + atPosition + 1, address.length() - atPosition - 1);
70    icu::UnicodeString domainName;
71
72    // Leak |idna| at the end.
73    UErrorCode errorCode = U_ZERO_ERROR;
74    static icu::IDNA *idna = icu::IDNA::createUTS46Instance(idnaConversionOption, errorCode);
75    ASSERT(idna);
76    icu::IDNAInfo idnaInfo;
77    idna->nameToASCII(idnDomainName, domainName, idnaInfo, errorCode);
78    if (U_FAILURE(errorCode) || idnaInfo.hasErrors() || domainName.length() > maximumDomainNameLength)
79        return address;
80
81    StringBuilder builder;
82    builder.append(address, 0, atPosition + 1);
83    builder.append(domainName.getBuffer(), domainName.length());
84    return builder.toString();
85}
86
87String EmailInputType::convertEmailAddressToUnicode(const String& address) const
88{
89    if (!address.containsOnlyASCII())
90        return address;
91
92    size_t atPosition = address.find('@');
93    if (atPosition == kNotFound)
94        return address;
95
96    if (address.find("xn--", atPosition + 1) == kNotFound)
97        return address;
98
99    if (!chrome())
100        return address;
101
102    String languages = chrome()->client().acceptLanguages();
103    String unicodeHost = blink::Platform::current()->convertIDNToUnicode(address.substring(atPosition + 1), languages);
104    StringBuilder builder;
105    builder.append(address, 0, atPosition + 1);
106    builder.append(unicodeHost);
107    return builder.toString();
108}
109
110static bool isInvalidLocalPartCharacter(UChar ch)
111{
112    if (!isASCII(ch))
113        return true;
114    DEFINE_STATIC_LOCAL(const String, validCharacters, (localPartCharacters));
115    return validCharacters.find(toASCIILower(ch)) == kNotFound;
116}
117
118static bool isInvalidDomainCharacter(UChar ch)
119{
120    if (!isASCII(ch))
121        return true;
122    return !isASCIILower(ch) && !isASCIIUpper(ch) && !isASCIIDigit(ch) && ch != '.' && ch != '-';
123}
124
125static bool checkValidDotUsage(const String& domain)
126{
127    if (domain.isEmpty())
128        return true;
129    if (domain[0] == '.' || domain[domain.length() - 1] == '.')
130        return false;
131    return domain.find("..") == kNotFound;
132}
133
134static bool isValidEmailAddress(const String& address)
135{
136    int addressLength = address.length();
137    if (!addressLength)
138        return false;
139
140    DEFINE_STATIC_LOCAL(const ScriptRegexp, regExp, (emailPattern, TextCaseInsensitive));
141
142    int matchLength;
143    int matchOffset = regExp.match(address, 0, &matchLength);
144
145    return !matchOffset && matchLength == addressLength;
146}
147
148PassRefPtrWillBeRawPtr<InputType> EmailInputType::create(HTMLInputElement& element)
149{
150    return adoptRefWillBeNoop(new EmailInputType(element));
151}
152
153void EmailInputType::countUsage()
154{
155    countUsageIfVisible(UseCounter::InputTypeEmail);
156    bool hasMaxLength = element().fastHasAttribute(HTMLNames::maxlengthAttr);
157    if (hasMaxLength)
158        countUsageIfVisible(UseCounter::InputTypeEmailMaxLength);
159    if (element().multiple()) {
160        countUsageIfVisible(UseCounter::InputTypeEmailMultiple);
161        if (hasMaxLength)
162            countUsageIfVisible(UseCounter::InputTypeEmailMultipleMaxLength);
163    }
164}
165
166const AtomicString& EmailInputType::formControlType() const
167{
168    return InputTypeNames::email;
169}
170
171// The return value is an invalid email address string if the specified string
172// contains an invalid email address. Otherwise, null string is returned.
173// If an empty string is returned, it means empty address is specified.
174// e.g. "foo@example.com,,bar@example.com" for multiple case.
175String EmailInputType::findInvalidAddress(const String& value) const
176{
177    if (value.isEmpty())
178        return String();
179    if (!element().multiple())
180        return isValidEmailAddress(value) ? String() : value;
181    Vector<String> addresses;
182    value.split(',', true, addresses);
183    for (unsigned i = 0; i < addresses.size(); ++i) {
184        String stripped = stripLeadingAndTrailingHTMLSpaces(addresses[i]);
185        if (!isValidEmailAddress(stripped))
186            return stripped;
187    }
188    return String();
189}
190
191bool EmailInputType::typeMismatchFor(const String& value) const
192{
193    return !findInvalidAddress(value).isNull();
194}
195
196bool EmailInputType::typeMismatch() const
197{
198    return typeMismatchFor(element().value());
199}
200
201String EmailInputType::typeMismatchText() const
202{
203    String invalidAddress = findInvalidAddress(element().value());
204    ASSERT(!invalidAddress.isNull());
205    if (invalidAddress.isEmpty())
206        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailEmpty);
207    String atSign = String("@");
208    size_t atIndex = invalidAddress.find('@');
209    if (atIndex == kNotFound)
210        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailNoAtSign, atSign, invalidAddress);
211    // We check validity against an ASCII value because of difficulty to check
212    // invalid characters. However we should show Unicode value.
213    String unicodeAddress = convertEmailAddressToUnicode(invalidAddress);
214    String localPart = invalidAddress.left(atIndex);
215    String domain = invalidAddress.substring(atIndex + 1);
216    if (localPart.isEmpty())
217        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailEmptyLocal, atSign, unicodeAddress);
218    if (domain.isEmpty())
219        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailEmptyDomain, atSign, unicodeAddress);
220    size_t invalidCharIndex = localPart.find(isInvalidLocalPartCharacter);
221    if (invalidCharIndex != kNotFound) {
222        unsigned charLength = U_IS_LEAD(localPart[invalidCharIndex]) ? 2 : 1;
223        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailInvalidLocal, atSign, localPart.substring(invalidCharIndex, charLength));
224    }
225    invalidCharIndex = domain.find(isInvalidDomainCharacter);
226    if (invalidCharIndex != kNotFound) {
227        unsigned charLength = U_IS_LEAD(domain[invalidCharIndex]) ? 2 : 1;
228        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailInvalidDomain, atSign, domain.substring(invalidCharIndex, charLength));
229    }
230    if (!checkValidDotUsage(domain)) {
231        size_t atIndexInUnicode = unicodeAddress.find('@');
232        ASSERT(atIndexInUnicode != kNotFound);
233        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmailInvalidDots, String("."), unicodeAddress.substring(atIndexInUnicode + 1));
234    }
235    if (element().multiple())
236        return locale().queryString(WebLocalizedString::ValidationTypeMismatchForMultipleEmail);
237    return locale().queryString(WebLocalizedString::ValidationTypeMismatchForEmail);
238}
239
240bool EmailInputType::supportsSelectionAPI() const
241{
242    return false;
243}
244
245String EmailInputType::sanitizeValue(const String& proposedValue) const
246{
247    String noLineBreakValue = proposedValue.removeCharacters(isHTMLLineBreak);
248    if (!element().multiple())
249        return stripLeadingAndTrailingHTMLSpaces(noLineBreakValue);
250    Vector<String> addresses;
251    noLineBreakValue.split(',', true, addresses);
252    StringBuilder strippedValue;
253    for (size_t i = 0; i < addresses.size(); ++i) {
254        if (i > 0)
255            strippedValue.append(',');
256        strippedValue.append(stripLeadingAndTrailingHTMLSpaces(addresses[i]));
257    }
258    return strippedValue.toString();
259}
260
261String EmailInputType::convertFromVisibleValue(const String& visibleValue) const
262{
263    String sanitizedValue = sanitizeValue(visibleValue);
264    if (!element().multiple())
265        return convertEmailAddressToASCII(sanitizedValue);
266    Vector<String> addresses;
267    sanitizedValue.split(',', true, addresses);
268    StringBuilder builder;
269    builder.reserveCapacity(sanitizedValue.length());
270    for (size_t i = 0; i < addresses.size(); ++i) {
271        if (i > 0)
272            builder.append(',');
273        builder.append(convertEmailAddressToASCII(addresses[i]));
274    }
275    return builder.toString();
276}
277
278String EmailInputType::visibleValue() const
279{
280    String value = element().value();
281    if (!element().multiple())
282        return convertEmailAddressToUnicode(value);
283
284    Vector<String> addresses;
285    value.split(',', true, addresses);
286    StringBuilder builder;
287    builder.reserveCapacity(value.length());
288    for (size_t i = 0; i < addresses.size(); ++i) {
289        if (i > 0)
290            builder.append(',');
291        builder.append(convertEmailAddressToUnicode(addresses[i]));
292    }
293    return builder.toString();
294}
295
296} // namespace blink
297