1/* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package androidx.core.util; 18 19import static androidx.annotation.RestrictTo.Scope.LIBRARY_GROUP; 20 21import androidx.annotation.RestrictTo; 22 23import java.util.regex.Pattern; 24 25/** 26 * Commonly used regular expression patterns. 27 */ 28public final class PatternsCompat { 29 /** 30 * Regular expression to match all IANA top-level domains. 31 * 32 * List accurate as of 2015/11/24. List taken from: 33 * http://data.iana.org/TLD/tlds-alpha-by-domain.txt 34 * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 35 */ 36 static final String IANA_TOP_LEVEL_DOMAINS = 37 "(?:" 38 + "(?:aaa|aarp|abb|abbott|abogado|academy|accenture|accountant|accountants|aco|active" 39 + "|actor|ads|adult|aeg|aero|afl|agency|aig|airforce|airtel|allfinanz|alsace|amica|amsterdam" 40 + "|android|apartments|app|apple|aquarelle|aramco|archi|army|arpa|arte|asia|associates" 41 + "|attorney|auction|audio|auto|autos|axa|azure|a[cdefgilmoqrstuwxz])" 42 + "|(?:band|bank|bar|barcelona|barclaycard|barclays|bargains|bauhaus|bayern|bbc|bbva" 43 + "|bcn|beats|beer|bentley|berlin|best|bet|bharti|bible|bid|bike|bing|bingo|bio|biz|black" 44 + "|blackfriday|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|bom|bond|boo|boots|boutique" 45 + "|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|build|builders|business" 46 + "|buzz|bzh|b[abdefghijmnorstvwyz])" 47 + "|(?:cab|cafe|cal|camera|camp|cancerresearch|canon|capetown|capital|car|caravan|cards" 48 + "|care|career|careers|cars|cartier|casa|cash|casino|cat|catering|cba|cbn|ceb|center|ceo" 49 + "|cern|cfa|cfd|chanel|channel|chat|cheap|chloe|christmas|chrome|church|cipriani|cisco" 50 + "|citic|city|cityeats|claims|cleaning|click|clinic|clothing|cloud|club|clubmed|coach" 51 + "|codes|coffee|college|cologne|com|commbank|community|company|computer|comsec|condos" 52 + "|construction|consulting|contractors|cooking|cool|coop|corsica|country|coupons|courses" 53 + "|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|cuisinella|cymru|cyou|c[acdfghiklmnoruvwxyz])" 54 + "|(?:dabur|dad|dance|date|dating|datsun|day|dclk|deals|degree|delivery|dell|delta" 55 + "|democrat|dental|dentist|desi|design|dev|diamonds|diet|digital|direct|directory|discount" 56 + "|dnp|docs|dog|doha|domains|doosan|download|drive|durban|dvag|d[ejkmoz])" 57 + "|(?:earth|eat|edu|education|email|emerck|energy|engineer|engineering|enterprises" 58 + "|epson|equipment|erni|esq|estate|eurovision|eus|events|everbank|exchange|expert|exposed" 59 + "|express|e[cegrstu])" 60 + "|(?:fage|fail|fairwinds|faith|family|fan|fans|farm|fashion|feedback|ferrero|film" 61 + "|final|finance|financial|firmdale|fish|fishing|fit|fitness|flights|florist|flowers|flsmidth" 62 + "|fly|foo|football|forex|forsale|forum|foundation|frl|frogans|fund|furniture|futbol|fyi" 63 + "|f[ijkmor])" 64 + "|(?:gal|gallery|game|garden|gbiz|gdn|gea|gent|genting|ggee|gift|gifts|gives|giving" 65 + "|glass|gle|global|globo|gmail|gmo|gmx|gold|goldpoint|golf|goo|goog|google|gop|gov|grainger" 66 + "|graphics|gratis|green|gripe|group|gucci|guge|guide|guitars|guru|g[abdefghilmnpqrstuwy])" 67 + "|(?:hamburg|hangout|haus|healthcare|help|here|hermes|hiphop|hitachi|hiv|hockey|holdings" 68 + "|holiday|homedepot|homes|honda|horse|host|hosting|hoteles|hotmail|house|how|hsbc|hyundai" 69 + "|h[kmnrtu])" 70 + "|(?:ibm|icbc|ice|icu|ifm|iinet|immo|immobilien|industries|infiniti|info|ing|ink|institute" 71 + "|insure|int|international|investments|ipiranga|irish|ist|istanbul|itau|iwc|i[delmnoqrst])" 72 + "|(?:jaguar|java|jcb|jetzt|jewelry|jlc|jll|jobs|joburg|jprs|juegos|j[emop])" 73 + "|(?:kaufen|kddi|kia|kim|kinder|kitchen|kiwi|koeln|komatsu|krd|kred|kyoto|k[eghimnprwyz])" 74 + "|(?:lacaixa|lancaster|land|landrover|lasalle|lat|latrobe|law|lawyer|lds|lease|leclerc" 75 + "|legal|lexus|lgbt|liaison|lidl|life|lifestyle|lighting|limited|limo|linde|link|live" 76 + "|lixil|loan|loans|lol|london|lotte|lotto|love|ltd|ltda|lupin|luxe|luxury|l[abcikrstuvy])" 77 + "|(?:madrid|maif|maison|man|management|mango|market|marketing|markets|marriott|mba" 78 + "|media|meet|melbourne|meme|memorial|men|menu|meo|miami|microsoft|mil|mini|mma|mobi|moda" 79 + "|moe|moi|mom|monash|money|montblanc|mormon|mortgage|moscow|motorcycles|mov|movie|movistar" 80 + "|mtn|mtpc|mtr|museum|mutuelle|m[acdeghklmnopqrstuvwxyz])" 81 + "|(?:nadex|nagoya|name|navy|nec|net|netbank|network|neustar|new|news|nexus|ngo|nhk" 82 + "|nico|ninja|nissan|nokia|nra|nrw|ntt|nyc|n[acefgilopruz])" 83 + "|(?:obi|office|okinawa|omega|one|ong|onl|online|ooo|oracle|orange|org|organic|osaka" 84 + "|otsuka|ovh|om)" 85 + "|(?:page|panerai|paris|partners|parts|party|pet|pharmacy|philips|photo|photography" 86 + "|photos|physio|piaget|pics|pictet|pictures|ping|pink|pizza|place|play|playstation|plumbing" 87 + "|plus|pohl|poker|porn|post|praxi|press|pro|prod|productions|prof|properties|property" 88 + "|protection|pub|p[aefghklmnrstwy])" 89 + "|(?:qpon|quebec|qa)" 90 + "|(?:racing|realtor|realty|recipes|red|redstone|rehab|reise|reisen|reit|ren|rent|rentals" 91 + "|repair|report|republican|rest|restaurant|review|reviews|rich|ricoh|rio|rip|rocher|rocks" 92 + "|rodeo|rsvp|ruhr|run|rwe|ryukyu|r[eosuw])" 93 + "|(?:saarland|sakura|sale|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|saxo" 94 + "|sbs|sca|scb|schmidt|scholarships|school|schule|schwarz|science|scor|scot|seat|security" 95 + "|seek|sener|services|seven|sew|sex|sexy|shiksha|shoes|show|shriram|singles|site|ski" 96 + "|sky|skype|sncf|soccer|social|software|sohu|solar|solutions|sony|soy|space|spiegel|spreadbetting" 97 + "|srl|stada|starhub|statoil|stc|stcgroup|stockholm|studio|study|style|sucks|supplies" 98 + "|supply|support|surf|surgery|suzuki|swatch|swiss|sydney|systems|s[abcdeghijklmnortuvxyz])" 99 + "|(?:tab|taipei|tatamotors|tatar|tattoo|tax|taxi|team|tech|technology|tel|telefonica" 100 + "|temasek|tennis|thd|theater|theatre|tickets|tienda|tips|tires|tirol|today|tokyo|tools" 101 + "|top|toray|toshiba|tours|town|toyota|toys|trade|trading|training|travel|trust|tui|t[cdfghjklmnortvwz])" 102 + "|(?:ubs|university|uno|uol|u[agksyz])" 103 + "|(?:vacations|vana|vegas|ventures|versicherung|vet|viajes|video|villas|vin|virgin" 104 + "|vision|vista|vistaprint|viva|vlaanderen|vodka|vote|voting|voto|voyage|v[aceginu])" 105 + "|(?:wales|walter|wang|watch|webcam|website|wed|wedding|weir|whoswho|wien|wiki|williamhill" 106 + "|win|windows|wine|wme|work|works|world|wtc|wtf|w[fs])" 107 + "|(?:\u03b5\u03bb|\u0431\u0435\u043b|\u0434\u0435\u0442\u0438|\u043a\u043e\u043c|\u043c\u043a\u0434" 108 + "|\u043c\u043e\u043d|\u043c\u043e\u0441\u043a\u0432\u0430|\u043e\u043d\u043b\u0430\u0439\u043d" 109 + "|\u043e\u0440\u0433|\u0440\u0443\u0441|\u0440\u0444|\u0441\u0430\u0439\u0442|\u0441\u0440\u0431" 110 + "|\u0443\u043a\u0440|\u049b\u0430\u0437|\u0570\u0561\u0575|\u05e7\u05d5\u05dd|\u0627\u0631\u0627\u0645\u0643\u0648" 111 + "|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629" 112 + "|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0627\u06cc\u0631\u0627\u0646" 113 + "|\u0628\u0627\u0632\u0627\u0631|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633" 114 + "|\u0633\u0648\u062f\u0627\u0646|\u0633\u0648\u0631\u064a\u0629|\u0634\u0628\u0643\u0629" 115 + "|\u0639\u0631\u0627\u0642|\u0639\u0645\u0627\u0646|\u0641\u0644\u0633\u0637\u064a\u0646" 116 + "|\u0642\u0637\u0631|\u0643\u0648\u0645|\u0645\u0635\u0631|\u0645\u0644\u064a\u0633\u064a\u0627" 117 + "|\u0645\u0648\u0642\u0639|\u0915\u0949\u092e|\u0928\u0947\u091f|\u092d\u093e\u0930\u0924" 118 + "|\u0938\u0902\u0917\u0920\u0928|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4" 119 + "|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd" 120 + "|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e04\u0e2d\u0e21|\u0e44\u0e17\u0e22" 121 + "|\u10d2\u10d4|\u307f\u3093\u306a|\u30b0\u30fc\u30b0\u30eb|\u30b3\u30e0|\u4e16\u754c" 122 + "|\u4e2d\u4fe1|\u4e2d\u56fd|\u4e2d\u570b|\u4e2d\u6587\u7f51|\u4f01\u4e1a|\u4f5b\u5c71" 123 + "|\u4fe1\u606f|\u5065\u5eb7|\u516b\u5366|\u516c\u53f8|\u516c\u76ca|\u53f0\u6e7e|\u53f0\u7063" 124 + "|\u5546\u57ce|\u5546\u5e97|\u5546\u6807|\u5728\u7ebf|\u5927\u62ff|\u5a31\u4e50|\u5de5\u884c" 125 + "|\u5e7f\u4e1c|\u6148\u5584|\u6211\u7231\u4f60|\u624b\u673a|\u653f\u52a1|\u653f\u5e9c" 126 + "|\u65b0\u52a0\u5761|\u65b0\u95fb|\u65f6\u5c1a|\u673a\u6784|\u6de1\u9a6c\u9521|\u6e38\u620f" 127 + "|\u70b9\u770b|\u79fb\u52a8|\u7ec4\u7ec7\u673a\u6784|\u7f51\u5740|\u7f51\u5e97|\u7f51\u7edc" 128 + "|\u8c37\u6b4c|\u96c6\u56e2|\u98de\u5229\u6d66|\u9910\u5385|\u9999\u6e2f|\ub2f7\ub137" 129 + "|\ub2f7\ucef4|\uc0bc\uc131|\ud55c\uad6d|xbox" 130 + "|xerox|xin|xn\\-\\-11b4c3d|xn\\-\\-1qqw23a|xn\\-\\-30rr7y|xn\\-\\-3bst00m|xn\\-\\-3ds443g" 131 + "|xn\\-\\-3e0b707e|xn\\-\\-3pxu8k|xn\\-\\-42c2d9a|xn\\-\\-45brj9c|xn\\-\\-45q11c|xn\\-\\-4gbrim" 132 + "|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80adxhks" 133 + "|xn\\-\\-80ao21a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-90a3ac|xn\\-\\-90ais|xn\\-\\-9dbq2a" 134 + "|xn\\-\\-9et52u|xn\\-\\-b4w605ferd|xn\\-\\-c1avg|xn\\-\\-c2br7g|xn\\-\\-cg4bki|xn\\-\\-clchc0ea0b2g2a9gcd" 135 + "|xn\\-\\-czr694b|xn\\-\\-czrs0t|xn\\-\\-czru2d|xn\\-\\-d1acj3b|xn\\-\\-d1alf|xn\\-\\-efvy88h" 136 + "|xn\\-\\-estv75g|xn\\-\\-fhbei|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s" 137 + "|xn\\-\\-fjq720a|xn\\-\\-flw351e|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-gecrj9c" 138 + "|xn\\-\\-h2brj9c|xn\\-\\-hxt814e|xn\\-\\-i1b6b1a6a2e|xn\\-\\-imr513n|xn\\-\\-io0a7i" 139 + "|xn\\-\\-j1aef|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-kcrx77d1x4a|xn\\-\\-kprw13d|xn\\-\\-kpry57d" 140 + "|xn\\-\\-kput3i|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a3ejt" 141 + "|xn\\-\\-mgba3a4f16a|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbab2bd|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e" 142 + "|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbpl2fh|xn\\-\\-mgbtx2b|xn\\-\\-mgbx4cd0ab" 143 + "|xn\\-\\-mk1bu44c|xn\\-\\-mxtq1m|xn\\-\\-ngbc5azd|xn\\-\\-node|xn\\-\\-nqv7f|xn\\-\\-nqv7fs00ema" 144 + "|xn\\-\\-nyqy26a|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1acf|xn\\-\\-p1ai|xn\\-\\-pgbs0dh" 145 + "|xn\\-\\-pssy2u|xn\\-\\-q9jyb4c|xn\\-\\-qcka1pmc|xn\\-\\-qxam|xn\\-\\-rhqv96g|xn\\-\\-s9brj9c" 146 + "|xn\\-\\-ses554g|xn\\-\\-t60b56a|xn\\-\\-tckwe|xn\\-\\-unup4y|xn\\-\\-vermgensberater\\-ctb" 147 + "|xn\\-\\-vermgensberatung\\-pwb|xn\\-\\-vhquv|xn\\-\\-vuq861b|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a" 148 + "|xn\\-\\-xhq521b|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-y9a3aq|xn\\-\\-yfro4i67o" 149 + "|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xperia|xxx|xyz)" 150 + "|(?:yachts|yamaxun|yandex|yodobashi|yoga|yokohama|youtube|y[et])" 151 + "|(?:zara|zip|zone|zuerich|z[amw]))"; 152 153 public static final Pattern IP_ADDRESS 154 = Pattern.compile( 155 "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]" 156 + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]" 157 + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" 158 + "|[1-9][0-9]|[0-9]))"); 159 160 /** 161 * Valid UCS characters defined in RFC 3987. Excludes space characters. 162 */ 163 private static final String UCS_CHAR = "[" + 164 "\u00A0-\uD7FF" + 165 "\uF900-\uFDCF" + 166 "\uFDF0-\uFFEF" + 167 "\uD800\uDC00-\uD83F\uDFFD" + 168 "\uD840\uDC00-\uD87F\uDFFD" + 169 "\uD880\uDC00-\uD8BF\uDFFD" + 170 "\uD8C0\uDC00-\uD8FF\uDFFD" + 171 "\uD900\uDC00-\uD93F\uDFFD" + 172 "\uD940\uDC00-\uD97F\uDFFD" + 173 "\uD980\uDC00-\uD9BF\uDFFD" + 174 "\uD9C0\uDC00-\uD9FF\uDFFD" + 175 "\uDA00\uDC00-\uDA3F\uDFFD" + 176 "\uDA40\uDC00-\uDA7F\uDFFD" + 177 "\uDA80\uDC00-\uDABF\uDFFD" + 178 "\uDAC0\uDC00-\uDAFF\uDFFD" + 179 "\uDB00\uDC00-\uDB3F\uDFFD" + 180 "\uDB44\uDC00-\uDB7F\uDFFD" + 181 "&&[^\u00A0[\u2000-\u200A]\u2028\u2029\u202F\u3000]]"; 182 183 /** 184 * Valid characters for IRI label defined in RFC 3987. 185 */ 186 private static final String LABEL_CHAR = "a-zA-Z0-9" + UCS_CHAR; 187 188 /** 189 * Valid characters for IRI TLD defined in RFC 3987. 190 */ 191 private static final String TLD_CHAR = "a-zA-Z" + UCS_CHAR; 192 193 /** 194 * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets. 195 */ 196 private static final String IRI_LABEL = 197 "[" + LABEL_CHAR + "](?:[" + LABEL_CHAR + "_\\-]{0,61}[" + LABEL_CHAR + "]){0,1}"; 198 199 /** 200 * RFC 3492 references RFC 1034 and limits Punycode algorithm output to 63 characters. 201 */ 202 private static final String PUNYCODE_TLD = "xn\\-\\-[\\w\\-]{0,58}\\w"; 203 204 private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")"; 205 206 private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD; 207 208 public static final Pattern DOMAIN_NAME 209 = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")"); 210 211 private static final String PROTOCOL = "(?i:http|https|rtsp)://"; 212 213 /* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */ 214 private static final String WORD_BOUNDARY = "(?:\\b|$|^)"; 215 216 private static final String USER_INFO = "(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)" 217 + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_" 218 + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@"; 219 220 private static final String PORT_NUMBER = "\\:\\d{1,5}"; 221 222 private static final String PATH_AND_QUERY = "[/\\?](?:(?:[" + LABEL_CHAR 223 + ";/\\?:@&=#~" // plus optional query params 224 + "\\-\\.\\+!\\*'\\(\\),_\\$])|(?:%[a-fA-F0-9]{2}))*"; 225 226 /** 227 * Regular expression pattern to match most part of RFC 3987 228 * Internationalized URLs, aka IRIs. 229 */ 230 public static final Pattern WEB_URL = Pattern.compile("(" 231 + "(" 232 + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")?" 233 + "(?:" + DOMAIN_NAME + ")" 234 + "(?:" + PORT_NUMBER + ")?" 235 + ")" 236 + "(" + PATH_AND_QUERY + ")?" 237 + WORD_BOUNDARY 238 + ")"); 239 240 /** 241 * Regular expression that matches known TLDs and punycode TLDs 242 */ 243 private static final String STRICT_TLD = "(?:" + 244 IANA_TOP_LEVEL_DOMAINS + "|" + PUNYCODE_TLD + ")"; 245 246 /** 247 * Regular expression that matches host names using {@link #STRICT_TLD} 248 */ 249 private static final String STRICT_HOST_NAME = "(?:(?:" + IRI_LABEL + "\\.)+" 250 + STRICT_TLD + ")"; 251 252 /** 253 * Regular expression that matches domain names using either {@link #STRICT_HOST_NAME} or 254 * {@link #IP_ADDRESS} 255 */ 256 private static final Pattern STRICT_DOMAIN_NAME 257 = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")"); 258 259 /** 260 * Regular expression that matches domain names without a TLD 261 */ 262 private static final String RELAXED_DOMAIN_NAME = 263 "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")"; 264 265 /** 266 * Regular expression to match strings that do not start with a supported protocol. The TLDs 267 * are expected to be one of the known TLDs. 268 */ 269 private static final String WEB_URL_WITHOUT_PROTOCOL = "(" 270 + WORD_BOUNDARY 271 + "(?<!:\\/\\/)" 272 + "(" 273 + "(?:" + STRICT_DOMAIN_NAME + ")" 274 + "(?:" + PORT_NUMBER + ")?" 275 + ")" 276 + "(?:" + PATH_AND_QUERY + ")?" 277 + WORD_BOUNDARY 278 + ")"; 279 280 /** 281 * Regular expression to match strings that start with a supported protocol. Rules for domain 282 * names and TLDs are more relaxed. TLDs are optional. 283 */ 284 private static final String WEB_URL_WITH_PROTOCOL = "(" 285 + WORD_BOUNDARY 286 + "(?:" 287 + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")" 288 + "(?:" + RELAXED_DOMAIN_NAME + ")?" 289 + "(?:" + PORT_NUMBER + ")?" 290 + ")" 291 + "(?:" + PATH_AND_QUERY + ")?" 292 + WORD_BOUNDARY 293 + ")"; 294 295 /** 296 * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression 297 * tries to match the URL structure with a relaxed rule for TLDs. If the string does not start 298 * with http(s):// the TLDs are expected to be one of the known TLDs. 299 * 300 * @hide 301 */ 302 @RestrictTo(LIBRARY_GROUP) 303 public static final Pattern AUTOLINK_WEB_URL = Pattern.compile( 304 "(" + WEB_URL_WITH_PROTOCOL + "|" + WEB_URL_WITHOUT_PROTOCOL + ")"); 305 306 /** 307 * Regular expression for valid email characters. Does not include some of the valid characters 308 * defined in RFC5321: #&~!^`{}/=$*?| 309 */ 310 private static final String EMAIL_CHAR = LABEL_CHAR + "\\+\\-_%'"; 311 312 /** 313 * Regular expression for local part of an email address. RFC5321 section 4.5.3.1.1 limits 314 * the local part to be at most 64 octets. 315 */ 316 private static final String EMAIL_ADDRESS_LOCAL_PART = 317 "[" + EMAIL_CHAR + "]" + "(?:[" + EMAIL_CHAR + "\\.]{0,62}[" + EMAIL_CHAR + "])?"; 318 319 /** 320 * Regular expression for the domain part of an email address. RFC5321 section 4.5.3.1.2 limits 321 * the domain to be at most 255 octets. 322 */ 323 private static final String EMAIL_ADDRESS_DOMAIN = 324 "(?=.{1,255}(?:\\s|$|^))" + HOST_NAME; 325 326 /** 327 * Regular expression pattern to match email addresses. It excludes double quoted local parts 328 * and the special characters #&~!^`{}/=$*?| that are included in RFC5321. 329 * @hide 330 */ 331 @RestrictTo(LIBRARY_GROUP) 332 public static final Pattern AUTOLINK_EMAIL_ADDRESS = Pattern.compile("(" + WORD_BOUNDARY + 333 "(?:" + EMAIL_ADDRESS_LOCAL_PART + "@" + EMAIL_ADDRESS_DOMAIN + ")" + 334 WORD_BOUNDARY + ")" 335 ); 336 337 public static final Pattern EMAIL_ADDRESS 338 = Pattern.compile( 339 "[a-zA-Z0-9\\+\\.\\_\\%\\-\\+]{1,256}" + 340 "\\@" + 341 "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" + 342 "(" + 343 "\\." + 344 "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" + 345 ")+" 346 ); 347 348 /** 349 * Do not create this static utility class. 350 */ 351 private PatternsCompat() {} 352} 353