19066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project/* 29066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Copyright (C) 2007 The Android Open Source Project 39066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 49066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 59066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * you may not use this file except in compliance with the License. 69066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * You may obtain a copy of the License at 79066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 89066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 99066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 109066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 119066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 129066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 139066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * See the License for the specific language governing permissions and 149066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * limitations under the License. 159066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 169066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 172269d1572e5fcfb725ea55f5764d8c3280d69f6dDianne Hackbornpackage android.util; 189066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 199066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Projectimport java.util.regex.Matcher; 209066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Projectimport java.util.regex.Pattern; 219066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 229066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project/** 23ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor * Commonly used regular expression patterns. 249066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 25ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnorpublic class Patterns { 269066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project /** 2751c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Regular expression to match all IANA top-level domains. 2852fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang * List accurate as of 2011/07/18. List taken from: 299066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 3052fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 319066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 3251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang public static final String TOP_LEVEL_DOMAIN_STR = 333ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])" 343ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(biz|b[abdefghijmnorstvwyz])" 353ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(cat|com|coop|c[acdfghiklmnoruvxyz])" 363ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|d[ejkmoz]" 373ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(edu|e[cegrstu])" 383ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|f[ijkmor]" 393ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(gov|g[abdefghilmnpqrstuwy])" 403ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|h[kmnrtu]" 413ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(info|int|i[delmnoqrst])" 423ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(jobs|j[emop])" 433ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|k[eghimnprwyz]" 443ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|l[abcikrstuvy]" 453ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" 463ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(name|net|n[acefgilopruz])" 473ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(org|om)" 483ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(pro|p[aefghklmnrstwy])" 493ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|qa" 503ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|r[eosuw]" 513ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|s[abcdeghijklmnortuvyz]" 523ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(tel|travel|t[cdfghjklmnoprtvwz])" 533ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|u[agksyz]" 543ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|v[aceginu]" 553ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|w[fs]" 5652fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang + "|(\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)" 57db990751ef8e535ea5cb3d527e36936e119095e8Shimeng (Simon) Wang + "|y[et]" 5851c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + "|z[amw])"; 5951c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang 6051c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang /** 6151c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Regular expression pattern to match all IANA top-level domains. 6251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang */ 6351c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang public static final Pattern TOP_LEVEL_DOMAIN = 6451c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang Pattern.compile(TOP_LEVEL_DOMAIN_STR); 659066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 669066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project /** 6751c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Regular expression to match all IANA top-level domains for WEB_URL. 6852fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang * List accurate as of 2011/07/18. List taken from: 699066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 7052fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 719066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 7251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = 7351c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang "(?:" 743ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])" 753ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:biz|b[abdefghijmnorstvwyz])" 763ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])" 773ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|d[ejkmoz]" 783ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:edu|e[cegrstu])" 793ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|f[ijkmor]" 803ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:gov|g[abdefghilmnpqrstuwy])" 813ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|h[kmnrtu]" 823ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:info|int|i[delmnoqrst])" 833ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:jobs|j[emop])" 843ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|k[eghimnprwyz]" 853ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|l[abcikrstuvy]" 863ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:mil|mobi|museum|m[acdeghklmnopqrstuvwxyz])" 873ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:name|net|n[acefgilopruz])" 883ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:org|om)" 893ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:pro|p[aefghklmnrstwy])" 903ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|qa" 913ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|r[eosuw]" 923ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|s[abcdeghijklmnortuvyz]" 933ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:tel|travel|t[cdfghjklmnoprtvwz])" 943ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|u[agksyz]" 953ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|v[aceginu]" 963ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|w[fs]" 9752fc810f73e0d8e005281e80a981e1ceed855850Shimeng (Simon) Wang + "|(?:\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae|\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435|\u0440\u0444|\u0441\u0440\u0431|\u05d8\u05e2\u05e1\u05d8|\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc|\u0625\u062e\u062a\u0628\u0627\u0631|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633|\u0633\u0648\u0631\u064a\u0629|\u0641\u0644\u0633\u0637\u064a\u0646|\u0642\u0637\u0631|\u0645\u0635\u0631|\u092a\u0930\u0940\u0915\u094d\u0937\u093e|\u092d\u093e\u0930\u0924|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd|\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e44\u0e17\u0e22|\u30c6\u30b9\u30c8|\u4e2d\u56fd|\u4e2d\u570b|\u53f0\u6e7e|\u53f0\u7063|\u65b0\u52a0\u5761|\u6d4b\u8bd5|\u6e2c\u8a66|\u9999\u6e2f|\ud14c\uc2a4\ud2b8|\ud55c\uad6d|xn\\-\\-0zwm56d|xn\\-\\-11b5bs3a9aj6g|xn\\-\\-3e0b707e|xn\\-\\-45brj9c|xn\\-\\-80akhbyknj4f|xn\\-\\-90a3ac|xn\\-\\-9t4b11yi5a|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-deba0ad|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-g6w251d|xn\\-\\-gecrj9c|xn\\-\\-h2brj9c|xn\\-\\-hgbk6aj7f53bba|xn\\-\\-hlcj6aya9esc7a|xn\\-\\-j6w193g|xn\\-\\-jxalpdlp|xn\\-\\-kgbechtv|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-s9brj9c|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zckzah|xxx)" 98db990751ef8e535ea5cb3d527e36936e119095e8Shimeng (Simon) Wang + "|y[et]" 9951c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + "|z[amw]))"; 10051c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang 10151c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang /** 10251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Good characters for Internationalized Resource Identifiers (IRI). 10351c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * This comprises most common used Unicode characters allowed in IRI 10451c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * as detailed in RFC 3987. 10551c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Specifically, those two byte Unicode characters are not included. 10651c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang */ 10751c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang public static final String GOOD_IRI_CHAR = 10851c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; 10951c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang 11051c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang /** 11151c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Regular expression pattern to match most part of RFC 3987 11251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * Internationalized URLs, aka IRIs. Commonly used Unicode characters are 11351c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang * added. 11451c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang */ 11551c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang public static final Pattern WEB_URL = Pattern.compile( 11651c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)" 11751c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_" 11851c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?" 11951c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + "((?:(?:[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+" // named host 12051c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang + TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL 1213ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|(?:(?:25[0-5]|2[0-4]" // or ip address 1223ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]" 1233ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]" 1243ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" 1253ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "|[1-9][0-9]|[0-9])))" 1263ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "(?:\\:\\d{1,5})?)" // plus option port number 12740064d3098e17eb447e2b9ee4512b691f6b8c269Shimeng (Simon) Wang + "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params 1283ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?" 1293ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang + "(?:\\b|$)"); // and finally, a word boundary or end of 1303ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang // input. This is to stop foo.sure from 1313ed6fbd9e141f20ca382306aa6a355cd544158d1Shimeng (Simon) Wang // matching as foo.su 1329066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 133ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor public static final Pattern IP_ADDRESS 1349066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project = Pattern.compile( 1359066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]" 1369066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]" 1379066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" 1389066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project + "|[1-9][0-9]|[0-9]))"); 1399066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 140ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor public static final Pattern DOMAIN_NAME 1419066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project = Pattern.compile( 14251c02dbf75fede9b2829af5b821f10e0bc2af124Shimeng (Simon) Wang "(((([" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]*)*[" + GOOD_IRI_CHAR + "]\\.)+" 143ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor + TOP_LEVEL_DOMAIN + ")|" 144ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor + IP_ADDRESS + ")"); 1459066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 146ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor public static final Pattern EMAIL_ADDRESS 1479066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project = Pattern.compile( 14830d203050e832c7e3b7ca32cea615cb0825b5a2fFred Quintana "[a-zA-Z0-9\\+\\.\\_\\%\\-\\+]{1,256}" + 1499066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project "\\@" + 150ba87e3e6c985e7175152993b5efcc7dd2f0e1c93The Android Open Source Project "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" + 1519066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project "(" + 1529066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project "\\." + 153ba87e3e6c985e7175152993b5efcc7dd2f0e1c93The Android Open Source Project "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" + 1549066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project ")+" 1559066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project ); 1569066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 1579066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project /** 1589066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * This pattern is intended for searching for things that look like they 1599066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * might be phone numbers in arbitrary text, not for validating whether 1609066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * something is in fact a phone number. It will miss many things that 1619066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * are legitimate phone numbers. 1629066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 1639066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * <p> The pattern matches the following: 1649066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * <ul> 1659066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes 1669066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * may follow. 1679066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes. 1689066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes. 1699066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * </ul> 1709066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 171ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor public static final Pattern PHONE 172a37fc575a4ac40f3ff2e9a9f2108b577a649db73Bin Zhu = Pattern.compile( // sdd = space, dot, or dash 173a37fc575a4ac40f3ff2e9a9f2108b577a649db73Bin Zhu "(\\+[0-9]+[\\- \\.]*)?" // +<digits><sdd>* 174a37fc575a4ac40f3ff2e9a9f2108b577a649db73Bin Zhu + "(\\([0-9]+\\)[\\- \\.]*)?" // (<digits>)<sdd>* 175a37fc575a4ac40f3ff2e9a9f2108b577a649db73Bin Zhu + "([0-9][0-9\\- \\.]+[0-9])"); // <digit><digit|sdd>+<digit> 1769066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 1779066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project /** 1789066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Convenience method to take all of the non-null matching groups in a 1799066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * regex Matcher and return them as a concatenated string. 1809066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 1819066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * @param matcher The Matcher object from which grouped text will 1829066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * be extracted 1839066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 1849066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * @return A String comprising all of the non-null matched 1859066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * groups concatenated together 1869066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 1879066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project public static final String concatGroups(Matcher matcher) { 1889066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project StringBuilder b = new StringBuilder(); 1899066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project final int numGroups = matcher.groupCount(); 1909066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 1919066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project for (int i = 1; i <= numGroups; i++) { 1929066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project String s = matcher.group(i); 1939066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 1949066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project System.err.println("Group(" + i + ") : " + s); 1959066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 1969066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project if (s != null) { 1979066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project b.append(s); 1989066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 1999066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 2009066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 2019066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project return b.toString(); 2029066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 2039066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 2049066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project /** 2059066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Convenience method to return only the digits and plus signs 2069066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * in the matching string. 2079066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 2089066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * @param matcher The Matcher object from which digits and plus will 2099066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * be extracted 2109066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * 2119066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * @return A String comprising all of the digits and plus in 2129066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * the match 2139066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */ 2149066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project public static final String digitsAndPlusOnly(Matcher matcher) { 2159066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project StringBuilder buffer = new StringBuilder(); 2169066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project String matchingRegion = matcher.group(); 2179066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 2189066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project for (int i = 0, size = matchingRegion.length(); i < size; i++) { 2199066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project char character = matchingRegion.charAt(i); 2209066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project 2219066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project if (character == '+' || Character.isDigit(character)) { 2229066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project buffer.append(character); 2239066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 2249066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 2259066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project return buffer.toString(); 2269066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project } 227ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor 228ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor /** 229ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor * Do not create this static utility class. 230ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor */ 231ded0e6447ed6e0f200dbca13e43c6cf4efc16a1dDan Egnor private Patterns() {} 2329066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project} 233