1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text.util;
18
19import android.widget.MultiAutoCompleteTextView;
20
21import java.util.ArrayList;
22import java.util.Collection;
23
24/**
25 * This class works as a Tokenizer for MultiAutoCompleteTextView for
26 * address list fields, and also provides a method for converting
27 * a string of addresses (such as might be typed into such a field)
28 * into a series of Rfc822Tokens.
29 */
30public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer {
31
32    /**
33     * This constructor will try to take a string like
34     * "Foo Bar (something) <foo\@google.com>,
35     * blah\@google.com (something)"
36     * and convert it into one or more Rfc822Tokens, output into the supplied
37     * collection.
38     *
39     * It does *not* decode MIME encoded-words; charset conversion
40     * must already have taken place if necessary.
41     * It will try to be tolerant of broken syntax instead of
42     * returning an error.
43     *
44     */
45    public static void tokenize(CharSequence text, Collection<Rfc822Token> out) {
46        StringBuilder name = new StringBuilder();
47        StringBuilder address = new StringBuilder();
48        StringBuilder comment = new StringBuilder();
49
50        int i = 0;
51        int cursor = text.length();
52
53        while (i < cursor) {
54            char c = text.charAt(i);
55
56            if (c == ',' || c == ';') {
57                i++;
58
59                while (i < cursor && text.charAt(i) == ' ') {
60                    i++;
61                }
62
63                crunch(name);
64
65                if (address.length() > 0) {
66                    out.add(new Rfc822Token(name.toString(),
67                                            address.toString(),
68                                            comment.toString()));
69                } else if (name.length() > 0) {
70                    out.add(new Rfc822Token(null,
71                                            name.toString(),
72                                            comment.toString()));
73                }
74
75                name.setLength(0);
76                address.setLength(0);
77                comment.setLength(0);
78            } else if (c == '"') {
79                i++;
80
81                while (i < cursor) {
82                    c = text.charAt(i);
83
84                    if (c == '"') {
85                        i++;
86                        break;
87                    } else if (c == '\\' && i + 1 < cursor) {
88                        name.append(text.charAt(i + 1));
89                        i += 2;
90                    } else {
91                        name.append(c);
92                        i++;
93                    }
94                }
95            } else if (c == '(') {
96                int level = 1;
97                i++;
98
99                while (i < cursor && level > 0) {
100                    c = text.charAt(i);
101
102                    if (c == ')') {
103                        if (level > 1) {
104                            comment.append(c);
105                        }
106
107                        level--;
108                        i++;
109                    } else if (c == '(') {
110                        comment.append(c);
111                        level++;
112                        i++;
113                    } else if (c == '\\' && i + 1 < cursor) {
114                        comment.append(text.charAt(i + 1));
115                        i += 2;
116                    } else {
117                        comment.append(c);
118                        i++;
119                    }
120                }
121            } else if (c == '<') {
122                i++;
123
124                while (i < cursor) {
125                    c = text.charAt(i);
126
127                    if (c == '>') {
128                        i++;
129                        break;
130                    } else {
131                        address.append(c);
132                        i++;
133                    }
134                }
135            } else if (c == ' ') {
136                name.append('\0');
137                i++;
138            } else {
139                name.append(c);
140                i++;
141            }
142        }
143
144        crunch(name);
145
146        if (address.length() > 0) {
147            out.add(new Rfc822Token(name.toString(),
148                                    address.toString(),
149                                    comment.toString()));
150        } else if (name.length() > 0) {
151            out.add(new Rfc822Token(null,
152                                    name.toString(),
153                                    comment.toString()));
154        }
155    }
156
157    /**
158     * This method will try to take a string like
159     * "Foo Bar (something) &lt;foo\@google.com&gt;,
160     * blah\@google.com (something)"
161     * and convert it into one or more Rfc822Tokens.
162     * It does *not* decode MIME encoded-words; charset conversion
163     * must already have taken place if necessary.
164     * It will try to be tolerant of broken syntax instead of
165     * returning an error.
166     */
167    public static Rfc822Token[] tokenize(CharSequence text) {
168        ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>();
169        tokenize(text, out);
170        return out.toArray(new Rfc822Token[out.size()]);
171    }
172
173    private static void crunch(StringBuilder sb) {
174        int i = 0;
175        int len = sb.length();
176
177        while (i < len) {
178            char c = sb.charAt(i);
179
180            if (c == '\0') {
181                if (i == 0 || i == len - 1 ||
182                    sb.charAt(i - 1) == ' ' ||
183                    sb.charAt(i - 1) == '\0' ||
184                    sb.charAt(i + 1) == ' ' ||
185                    sb.charAt(i + 1) == '\0') {
186                    sb.deleteCharAt(i);
187                    len--;
188                } else {
189                    i++;
190                }
191            } else {
192                i++;
193            }
194        }
195
196        for (i = 0; i < len; i++) {
197            if (sb.charAt(i) == '\0') {
198                sb.setCharAt(i, ' ');
199            }
200        }
201    }
202
203    /**
204     * {@inheritDoc}
205     */
206    public int findTokenStart(CharSequence text, int cursor) {
207        /*
208         * It's hard to search backward, so search forward until
209         * we reach the cursor.
210         */
211
212        int best = 0;
213        int i = 0;
214
215        while (i < cursor) {
216            i = findTokenEnd(text, i);
217
218            if (i < cursor) {
219                i++; // Skip terminating punctuation
220
221                while (i < cursor && text.charAt(i) == ' ') {
222                    i++;
223                }
224
225                if (i < cursor) {
226                    best = i;
227                }
228            }
229        }
230
231        return best;
232    }
233
234    /**
235     * {@inheritDoc}
236     */
237    public int findTokenEnd(CharSequence text, int cursor) {
238        int len = text.length();
239        int i = cursor;
240
241        while (i < len) {
242            char c = text.charAt(i);
243
244            if (c == ',' || c == ';') {
245                return i;
246            } else if (c == '"') {
247                i++;
248
249                while (i < len) {
250                    c = text.charAt(i);
251
252                    if (c == '"') {
253                        i++;
254                        break;
255                    } else if (c == '\\') {
256                        i += 2;
257                    } else {
258                        i++;
259                    }
260                }
261            } else if (c == '(') {
262                int level = 1;
263                i++;
264
265                while (i < len && level > 0) {
266                    c = text.charAt(i);
267
268                    if (c == ')') {
269                        level--;
270                        i++;
271                    } else if (c == '(') {
272                        level++;
273                        i++;
274                    } else if (c == '\\') {
275                        i += 2;
276                    } else {
277                        i++;
278                    }
279                }
280            } else if (c == '<') {
281                i++;
282
283                while (i < len) {
284                    c = text.charAt(i);
285
286                    if (c == '>') {
287                        i++;
288                        break;
289                    } else {
290                        i++;
291                    }
292                }
293            } else {
294                i++;
295            }
296        }
297
298        return i;
299    }
300
301    /**
302     * Terminates the specified address with a comma and space.
303     * This assumes that the specified text already has valid syntax.
304     * The Adapter subclass's convertToString() method must make that
305     * guarantee.
306     */
307    public CharSequence terminateToken(CharSequence text) {
308        return text + ", ";
309    }
310}
311