1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text.util;
18
19import android.widget.MultiAutoCompleteTextView;
20
21import java.util.ArrayList;
22import java.util.Collection;
23
24/**
25 * This class works as a Tokenizer for MultiAutoCompleteTextView for
26 * address list fields, and also provides a method for converting
27 * a string of addresses (such as might be typed into such a field)
28 * into a series of Rfc822Tokens.
29 */
30public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer {
31
32    /**
33     * This constructor will try to take a string like
34     * "Foo Bar (something) <foo\@google.com>,
35     * blah\@google.com (something)"
36     * and convert it into one or more Rfc822Tokens, output into the supplied
37     * collection.
38     *
39     * It does *not* decode MIME encoded-words; charset conversion
40     * must already have taken place if necessary.
41     * It will try to be tolerant of broken syntax instead of
42     * returning an error.
43     *
44     */
45    public static void tokenize(CharSequence text, Collection<Rfc822Token> out) {
46        StringBuilder name = new StringBuilder();
47        StringBuilder address = new StringBuilder();
48        StringBuilder comment = new StringBuilder();
49
50        int i = 0;
51        int cursor = text.length();
52
53        while (i < cursor) {
54            char c = text.charAt(i);
55
56            if (c == ',' || c == ';') {
57                i++;
58
59                while (i < cursor && text.charAt(i) == ' ') {
60                    i++;
61                }
62
63                crunch(name);
64
65                if (address.length() > 0) {
66                    out.add(new Rfc822Token(name.toString(),
67                                            address.toString(),
68                                            comment.toString()));
69                } else if (name.length() > 0) {
70                    out.add(new Rfc822Token(null,
71                                            name.toString(),
72                                            comment.toString()));
73                }
74
75                name.setLength(0);
76                address.setLength(0);
77                comment.setLength(0);
78            } else if (c == '"') {
79                i++;
80
81                while (i < cursor) {
82                    c = text.charAt(i);
83
84                    if (c == '"') {
85                        i++;
86                        break;
87                    } else if (c == '\\') {
88                        if (i + 1 < cursor) {
89                            name.append(text.charAt(i + 1));
90                        }
91                        i += 2;
92                    } else {
93                        name.append(c);
94                        i++;
95                    }
96                }
97            } else if (c == '(') {
98                int level = 1;
99                i++;
100
101                while (i < cursor && level > 0) {
102                    c = text.charAt(i);
103
104                    if (c == ')') {
105                        if (level > 1) {
106                            comment.append(c);
107                        }
108
109                        level--;
110                        i++;
111                    } else if (c == '(') {
112                        comment.append(c);
113                        level++;
114                        i++;
115                    } else if (c == '\\') {
116                        if (i + 1 < cursor) {
117                            comment.append(text.charAt(i + 1));
118                        }
119                        i += 2;
120                    } else {
121                        comment.append(c);
122                        i++;
123                    }
124                }
125            } else if (c == '<') {
126                i++;
127
128                while (i < cursor) {
129                    c = text.charAt(i);
130
131                    if (c == '>') {
132                        i++;
133                        break;
134                    } else {
135                        address.append(c);
136                        i++;
137                    }
138                }
139            } else if (c == ' ') {
140                name.append('\0');
141                i++;
142            } else {
143                name.append(c);
144                i++;
145            }
146        }
147
148        crunch(name);
149
150        if (address.length() > 0) {
151            out.add(new Rfc822Token(name.toString(),
152                                    address.toString(),
153                                    comment.toString()));
154        } else if (name.length() > 0) {
155            out.add(new Rfc822Token(null,
156                                    name.toString(),
157                                    comment.toString()));
158        }
159    }
160
161    /**
162     * This method will try to take a string like
163     * "Foo Bar (something) &lt;foo\@google.com&gt;,
164     * blah\@google.com (something)"
165     * and convert it into one or more Rfc822Tokens.
166     * It does *not* decode MIME encoded-words; charset conversion
167     * must already have taken place if necessary.
168     * It will try to be tolerant of broken syntax instead of
169     * returning an error.
170     */
171    public static Rfc822Token[] tokenize(CharSequence text) {
172        ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>();
173        tokenize(text, out);
174        return out.toArray(new Rfc822Token[out.size()]);
175    }
176
177    private static void crunch(StringBuilder sb) {
178        int i = 0;
179        int len = sb.length();
180
181        while (i < len) {
182            char c = sb.charAt(i);
183
184            if (c == '\0') {
185                if (i == 0 || i == len - 1 ||
186                    sb.charAt(i - 1) == ' ' ||
187                    sb.charAt(i - 1) == '\0' ||
188                    sb.charAt(i + 1) == ' ' ||
189                    sb.charAt(i + 1) == '\0') {
190                    sb.deleteCharAt(i);
191                    len--;
192                } else {
193                    i++;
194                }
195            } else {
196                i++;
197            }
198        }
199
200        for (i = 0; i < len; i++) {
201            if (sb.charAt(i) == '\0') {
202                sb.setCharAt(i, ' ');
203            }
204        }
205    }
206
207    /**
208     * {@inheritDoc}
209     */
210    public int findTokenStart(CharSequence text, int cursor) {
211        /*
212         * It's hard to search backward, so search forward until
213         * we reach the cursor.
214         */
215
216        int best = 0;
217        int i = 0;
218
219        while (i < cursor) {
220            i = findTokenEnd(text, i);
221
222            if (i < cursor) {
223                i++; // Skip terminating punctuation
224
225                while (i < cursor && text.charAt(i) == ' ') {
226                    i++;
227                }
228
229                if (i < cursor) {
230                    best = i;
231                }
232            }
233        }
234
235        return best;
236    }
237
238    /**
239     * {@inheritDoc}
240     */
241    public int findTokenEnd(CharSequence text, int cursor) {
242        int len = text.length();
243        int i = cursor;
244
245        while (i < len) {
246            char c = text.charAt(i);
247
248            if (c == ',' || c == ';') {
249                return i;
250            } else if (c == '"') {
251                i++;
252
253                while (i < len) {
254                    c = text.charAt(i);
255
256                    if (c == '"') {
257                        i++;
258                        break;
259                    } else if (c == '\\' && i + 1 < len) {
260                        i += 2;
261                    } else {
262                        i++;
263                    }
264                }
265            } else if (c == '(') {
266                int level = 1;
267                i++;
268
269                while (i < len && level > 0) {
270                    c = text.charAt(i);
271
272                    if (c == ')') {
273                        level--;
274                        i++;
275                    } else if (c == '(') {
276                        level++;
277                        i++;
278                    } else if (c == '\\' && i + 1 < len) {
279                        i += 2;
280                    } else {
281                        i++;
282                    }
283                }
284            } else if (c == '<') {
285                i++;
286
287                while (i < len) {
288                    c = text.charAt(i);
289
290                    if (c == '>') {
291                        i++;
292                        break;
293                    } else {
294                        i++;
295                    }
296                }
297            } else {
298                i++;
299            }
300        }
301
302        return i;
303    }
304
305    /**
306     * Terminates the specified address with a comma and space.
307     * This assumes that the specified text already has valid syntax.
308     * The Adapter subclass's convertToString() method must make that
309     * guarantee.
310     */
311    public CharSequence terminateToken(CharSequence text) {
312        return text + ", ";
313    }
314}
315