Rfc822Tokenizer.java revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text.util;
18
19import android.widget.MultiAutoCompleteTextView;
20
21import java.util.ArrayList;
22
23/**
24 * This class works as a Tokenizer for MultiAutoCompleteTextView for
25 * address list fields, and also provides a method for converting
26 * a string of addresses (such as might be typed into such a field)
27 * into a series of Rfc822Tokens.
28 */
29public class Rfc822Tokenizer implements MultiAutoCompleteTextView.Tokenizer {
30    /**
31     * This constructor will try to take a string like
32     * "Foo Bar (something) <foo\@google.com>,
33     * blah\@google.com (something)"
34     * and convert it into one or more Rfc822Tokens.
35     * It does *not* decode MIME encoded-words; charset conversion
36     * must already have taken place if necessary.
37     * It will try to be tolerant of broken syntax instead of
38     * returning an error.
39     */
40    public static Rfc822Token[] tokenize(CharSequence text) {
41        ArrayList<Rfc822Token> out = new ArrayList<Rfc822Token>();
42        StringBuilder name = new StringBuilder();
43        StringBuilder address = new StringBuilder();
44        StringBuilder comment = new StringBuilder();
45
46        int i = 0;
47        int cursor = text.length();
48
49        while (i < cursor) {
50            char c = text.charAt(i);
51
52            if (c == ',' || c == ';') {
53                i++;
54
55                while (i < cursor && text.charAt(i) == ' ') {
56                    i++;
57                }
58
59                crunch(name);
60
61                if (address.length() > 0) {
62                    out.add(new Rfc822Token(name.toString(),
63                                            address.toString(),
64                                            comment.toString()));
65                } else if (name.length() > 0) {
66                    out.add(new Rfc822Token(null,
67                                            name.toString(),
68                                            comment.toString()));
69                }
70
71                name.setLength(0);
72                address.setLength(0);
73                address.setLength(0);
74            } else if (c == '"') {
75                i++;
76
77                while (i < cursor) {
78                    c = text.charAt(i);
79
80                    if (c == '"') {
81                        i++;
82                        break;
83                    } else if (c == '\\') {
84                        name.append(text.charAt(i + 1));
85                        i += 2;
86                    } else {
87                        name.append(c);
88                        i++;
89                    }
90                }
91            } else if (c == '(') {
92                int level = 1;
93                i++;
94
95                while (i < cursor && level > 0) {
96                    c = text.charAt(i);
97
98                    if (c == ')') {
99                        if (level > 1) {
100                            comment.append(c);
101                        }
102
103                        level--;
104                        i++;
105                    } else if (c == '(') {
106                        comment.append(c);
107                        level++;
108                        i++;
109                    } else if (c == '\\') {
110                        comment.append(text.charAt(i + 1));
111                        i += 2;
112                    } else {
113                        comment.append(c);
114                        i++;
115                    }
116                }
117            } else if (c == '<') {
118                i++;
119
120                while (i < cursor) {
121                    c = text.charAt(i);
122
123                    if (c == '>') {
124                        i++;
125                        break;
126                    } else {
127                        address.append(c);
128                        i++;
129                    }
130                }
131            } else if (c == ' ') {
132                name.append('\0');
133                i++;
134            } else {
135                name.append(c);
136                i++;
137            }
138        }
139
140        crunch(name);
141
142        if (address.length() > 0) {
143            out.add(new Rfc822Token(name.toString(),
144                                    address.toString(),
145                                    comment.toString()));
146        } else if (name.length() > 0) {
147            out.add(new Rfc822Token(null,
148                                    name.toString(),
149                                    comment.toString()));
150        }
151
152        return out.toArray(new Rfc822Token[out.size()]);
153    }
154
155    private static void crunch(StringBuilder sb) {
156        int i = 0;
157        int len = sb.length();
158
159        while (i < len) {
160            char c = sb.charAt(i);
161
162            if (c == '\0') {
163                if (i == 0 || i == len - 1 ||
164                    sb.charAt(i - 1) == ' ' ||
165                    sb.charAt(i - 1) == '\0' ||
166                    sb.charAt(i + 1) == ' ' ||
167                    sb.charAt(i + 1) == '\0') {
168                    sb.deleteCharAt(i);
169                    len--;
170                } else {
171                    i++;
172                }
173            } else {
174                i++;
175            }
176        }
177
178        for (i = 0; i < len; i++) {
179            if (sb.charAt(i) == '\0') {
180                sb.setCharAt(i, ' ');
181            }
182        }
183    }
184
185    /**
186     * {@inheritDoc}
187     */
188    public int findTokenStart(CharSequence text, int cursor) {
189        /*
190         * It's hard to search backward, so search forward until
191         * we reach the cursor.
192         */
193
194        int best = 0;
195        int i = 0;
196
197        while (i < cursor) {
198            i = findTokenEnd(text, i);
199
200            if (i < cursor) {
201                i++; // Skip terminating punctuation
202
203                while (i < cursor && text.charAt(i) == ' ') {
204                    i++;
205                }
206
207                if (i < cursor) {
208                    best = i;
209                }
210            }
211        }
212
213        return best;
214    }
215
216    /**
217     * {@inheritDoc}
218     */
219    public int findTokenEnd(CharSequence text, int cursor) {
220        int len = text.length();
221        int i = cursor;
222
223        while (i < len) {
224            char c = text.charAt(i);
225
226            if (c == ',' || c == ';') {
227                return i;
228            } else if (c == '"') {
229                i++;
230
231                while (i < len) {
232                    c = text.charAt(i);
233
234                    if (c == '"') {
235                        i++;
236                        break;
237                    } else if (c == '\\') {
238                        i += 2;
239                    } else {
240                        i++;
241                    }
242                }
243            } else if (c == '(') {
244                int level = 1;
245                i++;
246
247                while (i < len && level > 0) {
248                    c = text.charAt(i);
249
250                    if (c == ')') {
251                        level--;
252                        i++;
253                    } else if (c == '(') {
254                        level++;
255                        i++;
256                    } else if (c == '\\') {
257                        i += 2;
258                    } else {
259                        i++;
260                    }
261                }
262            } else if (c == '<') {
263                i++;
264
265                while (i < len) {
266                    c = text.charAt(i);
267
268                    if (c == '>') {
269                        i++;
270                        break;
271                    } else {
272                        i++;
273                    }
274                }
275            } else {
276                i++;
277            }
278        }
279
280        return i;
281    }
282
283    /**
284     * Terminates the specified address with a comma and space.
285     * This assumes that the specified text already has valid syntax.
286     * The Adapter subclass's convertToString() method must make that
287     * guarantee.
288     */
289    public CharSequence terminateToken(CharSequence text) {
290        return text + ", ";
291    }
292}
293