1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "content/renderer/android/email_detector.h" 6 7#include "base/logging.h" 8#include "base/memory/scoped_ptr.h" 9#include "base/strings/utf_string_conversions.h" 10#include "content/public/renderer/android_content_detection_prefixes.h" 11#include "net/base/escape.h" 12#include "third_party/icu/source/i18n/unicode/regex.h" 13 14namespace { 15 16// Maximum length of an email address. 17const size_t kMaximumEmailLength = 254; 18 19// Regex to match email addresses. 20// This is more specific than RFC 2822 (uncommon special characters are 21// disallowed) in order to avoid false positives. 22// Delimiters are word boundaries to allow punctuation, quote marks etc. around 23// the address. 24const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b"; 25 26} // anonymous namespace 27 28namespace content { 29 30EmailDetector::EmailDetector() { 31} 32 33size_t EmailDetector::GetMaximumContentLength() { 34 return kMaximumEmailLength; 35} 36 37GURL EmailDetector::GetIntentURL(const std::string& content_text) { 38 if (content_text.empty()) 39 return GURL(); 40 41 return GURL(kEmailPrefix + 42 net::EscapeQueryParamValue(content_text, true)); 43} 44 45bool EmailDetector::FindContent(const base::string16::const_iterator& begin, 46 const base::string16::const_iterator& end, 47 size_t* start_pos, 48 size_t* end_pos, 49 std::string* content_text) { 50 base::string16 utf16_input = base::string16(begin, end); 51 icu::UnicodeString pattern(kEmailRegex); 52 icu::UnicodeString input(utf16_input.data(), utf16_input.length()); 53 UErrorCode status = U_ZERO_ERROR; 54 scoped_ptr<icu::RegexMatcher> matcher( 55 new icu::RegexMatcher(pattern, 56 input, 57 UREGEX_CASE_INSENSITIVE, 58 status)); 59 if (matcher->find()) { 60 *start_pos = matcher->start(status); 61 DCHECK(U_SUCCESS(status)); 62 *end_pos = matcher->end(status); 63 DCHECK(U_SUCCESS(status)); 64 icu::UnicodeString content_ustr(matcher->group(status)); 65 DCHECK(U_SUCCESS(status)); 66 base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), 67 content_text); 68 return true; 69 } 70 71 return false; 72} 73 74} // namespace content 75