1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.text.util; 18 19import android.widget.MultiAutoCompleteTextView; 20 21import java.util.ArrayList; 22import java.util.Collection; 23 24/** 25 * This class works as a Tokenizer for MultiAutoCompleteTextView for 26 * address list fields, and also provides a method for converting 27 * a string of addresses (such as might be typed into such a field) 28 * into a series of Rfc822Tokens. 29 */ 30public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer { 31 32 /** 33 * This constructor will try to take a string like 34 * "Foo Bar (something) <foo\@google.com>, 35 * blah\@google.com (something)" 36 * and convert it into one or more Rfc822Tokens, output into the supplied 37 * collection. 38 * 39 * It does *not* decode MIME encoded-words; charset conversion 40 * must already have taken place if necessary. 41 * It will try to be tolerant of broken syntax instead of 42 * returning an error. 43 * 44 */ 45 public static void tokenize(CharSequence text, Collection<Rfc822Token> out) { 46 StringBuilder name = new StringBuilder(); 47 StringBuilder address = new StringBuilder(); 48 StringBuilder comment = new StringBuilder(); 49 50 int i = 0; 51 int cursor = text.length(); 52 53 while (i < cursor) { 54 char c = text.charAt(i); 55 56 if (c == ',' || c == ';') { 57 i++; 58 59 while (i < cursor && text.charAt(i) == ' ') { 60 i++; 61 } 62 63 crunch(name); 64 65 if (address.length() > 0) { 66 out.add(new Rfc822Token(name.toString(), 67 address.toString(), 68 comment.toString())); 69 } else if (name.length() > 0) { 70 out.add(new Rfc822Token(null, 71 name.toString(), 72 comment.toString())); 73 } 74 75 name.setLength(0); 76 address.setLength(0); 77 comment.setLength(0); 78 } else if (c == '"') { 79 i++; 80 81 while (i < cursor) { 82 c = text.charAt(i); 83 84 if (c == '"') { 85 i++; 86 break; 87 } else if (c == '\\' && i + 1 < cursor) { 88 name.append(text.charAt(i + 1)); 89 i += 2; 90 } else { 91 name.append(c); 92 i++; 93 } 94 } 95 } else if (c == '(') { 96 int level = 1; 97 i++; 98 99 while (i < cursor && level > 0) { 100 c = text.charAt(i); 101 102 if (c == ')') { 103 if (level > 1) { 104 comment.append(c); 105 } 106 107 level--; 108 i++; 109 } else if (c == '(') { 110 comment.append(c); 111 level++; 112 i++; 113 } else if (c == '\\' && i + 1 < cursor) { 114 comment.append(text.charAt(i + 1)); 115 i += 2; 116 } else { 117 comment.append(c); 118 i++; 119 } 120 } 121 } else if (c == '<') { 122 i++; 123 124 while (i < cursor) { 125 c = text.charAt(i); 126 127 if (c == '>') { 128 i++; 129 break; 130 } else { 131 address.append(c); 132 i++; 133 } 134 } 135 } else if (c == ' ') { 136 name.append('\0'); 137 i++; 138 } else { 139 name.append(c); 140 i++; 141 } 142 } 143 144 crunch(name); 145 146 if (address.length() > 0) { 147 out.add(new Rfc822Token(name.toString(), 148 address.toString(), 149 comment.toString())); 150 } else if (name.length() > 0) { 151 out.add(new Rfc822Token(null, 152 name.toString(), 153 comment.toString())); 154 } 155 } 156 157 /** 158 * This method will try to take a string like 159 * "Foo Bar (something) <foo\@google.com>, 160 * blah\@google.com (something)" 161 * and convert it into one or more Rfc822Tokens. 162 * It does *not* decode MIME encoded-words; charset conversion 163 * must already have taken place if necessary. 164 * It will try to be tolerant of broken syntax instead of 165 * returning an error. 166 */ 167 public static Rfc822Token[] tokenize(CharSequence text) { 168 ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>(); 169 tokenize(text, out); 170 return out.toArray(new Rfc822Token[out.size()]); 171 } 172 173 private static void crunch(StringBuilder sb) { 174 int i = 0; 175 int len = sb.length(); 176 177 while (i < len) { 178 char c = sb.charAt(i); 179 180 if (c == '\0') { 181 if (i == 0 || i == len - 1 || 182 sb.charAt(i - 1) == ' ' || 183 sb.charAt(i - 1) == '\0' || 184 sb.charAt(i + 1) == ' ' || 185 sb.charAt(i + 1) == '\0') { 186 sb.deleteCharAt(i); 187 len--; 188 } else { 189 i++; 190 } 191 } else { 192 i++; 193 } 194 } 195 196 for (i = 0; i < len; i++) { 197 if (sb.charAt(i) == '\0') { 198 sb.setCharAt(i, ' '); 199 } 200 } 201 } 202 203 /** 204 * {@inheritDoc} 205 */ 206 public int findTokenStart(CharSequence text, int cursor) { 207 /* 208 * It's hard to search backward, so search forward until 209 * we reach the cursor. 210 */ 211 212 int best = 0; 213 int i = 0; 214 215 while (i < cursor) { 216 i = findTokenEnd(text, i); 217 218 if (i < cursor) { 219 i++; // Skip terminating punctuation 220 221 while (i < cursor && text.charAt(i) == ' ') { 222 i++; 223 } 224 225 if (i < cursor) { 226 best = i; 227 } 228 } 229 } 230 231 return best; 232 } 233 234 /** 235 * {@inheritDoc} 236 */ 237 public int findTokenEnd(CharSequence text, int cursor) { 238 int len = text.length(); 239 int i = cursor; 240 241 while (i < len) { 242 char c = text.charAt(i); 243 244 if (c == ',' || c == ';') { 245 return i; 246 } else if (c == '"') { 247 i++; 248 249 while (i < len) { 250 c = text.charAt(i); 251 252 if (c == '"') { 253 i++; 254 break; 255 } else if (c == '\\') { 256 i += 2; 257 } else { 258 i++; 259 } 260 } 261 } else if (c == '(') { 262 int level = 1; 263 i++; 264 265 while (i < len && level > 0) { 266 c = text.charAt(i); 267 268 if (c == ')') { 269 level--; 270 i++; 271 } else if (c == '(') { 272 level++; 273 i++; 274 } else if (c == '\\') { 275 i += 2; 276 } else { 277 i++; 278 } 279 } 280 } else if (c == '<') { 281 i++; 282 283 while (i < len) { 284 c = text.charAt(i); 285 286 if (c == '>') { 287 i++; 288 break; 289 } else { 290 i++; 291 } 292 } 293 } else { 294 i++; 295 } 296 } 297 298 return i; 299 } 300 301 /** 302 * Terminates the specified address with a comma and space. 303 * This assumes that the specified text already has valid syntax. 304 * The Adapter subclass's convertToString() method must make that 305 * guarantee. 306 */ 307 public CharSequence terminateToken(CharSequence text) { 308 return text + ", "; 309 } 310} 311