VCardParserImpl_V21.java revision 677ef21613a9d35053ec098444832ce4125a847e
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package com.android.vcard;
17
18import android.text.TextUtils;
19import android.util.Log;
20
21import com.android.vcard.exception.VCardAgentNotSupportedException;
22import com.android.vcard.exception.VCardException;
23import com.android.vcard.exception.VCardInvalidCommentLineException;
24import com.android.vcard.exception.VCardInvalidLineException;
25import com.android.vcard.exception.VCardNestedException;
26import com.android.vcard.exception.VCardVersionException;
27
28import java.io.BufferedReader;
29import java.io.IOException;
30import java.io.InputStream;
31import java.io.InputStreamReader;
32import java.io.Reader;
33import java.util.ArrayList;
34import java.util.HashSet;
35import java.util.List;
36import java.util.Set;
37
38/**
39 * <p>
40 * Basic implementation achieving vCard parsing. Based on vCard 2.1.
41 * </p>
42 * @hide
43 */
44/* package */ class VCardParserImpl_V21 {
45    private static final String LOG_TAG = "VCardParserImpl_V21";
46
47    private static final class EmptyInterpreter implements VCardInterpreter {
48        @Override
49        public void end() {
50        }
51        @Override
52        public void endEntry() {
53        }
54        @Override
55        public void endProperty() {
56        }
57        @Override
58        public void propertyGroup(String group) {
59        }
60        @Override
61        public void propertyName(String name) {
62        }
63        @Override
64        public void propertyParamType(String type) {
65        }
66        @Override
67        public void propertyParamValue(String value) {
68        }
69        @Override
70        public void propertyValues(List<String> values) {
71        }
72        @Override
73        public void start() {
74        }
75        @Override
76        public void startEntry() {
77        }
78        @Override
79        public void startProperty() {
80        }
81    }
82
83    protected static final class CustomBufferedReader extends BufferedReader {
84        private long mTime;
85
86        /**
87         * Needed since "next line" may be null due to end of line.
88         */
89        private boolean mNextLineIsValid;
90        private String mNextLine;
91
92        public CustomBufferedReader(Reader in) {
93            super(in);
94        }
95
96        @Override
97        public String readLine() throws IOException {
98            if (mNextLineIsValid) {
99                final String ret = mNextLine;
100                mNextLine = null;
101                mNextLineIsValid = false;
102                return ret;
103            }
104
105            final long start = System.currentTimeMillis();
106            final String line = super.readLine();
107            final long end = System.currentTimeMillis();
108            mTime += end - start;
109            return line;
110        }
111
112        /**
113         * Read one line, but make this object store it in its queue.
114         */
115        public String peekLine() throws IOException {
116            if (!mNextLineIsValid) {
117                final long start = System.currentTimeMillis();
118                final String line = super.readLine();
119                final long end = System.currentTimeMillis();
120                mTime += end - start;
121
122                mNextLine = line;
123                mNextLineIsValid = true;
124            }
125
126            return mNextLine;
127        }
128
129        public long getTotalmillisecond() {
130            return mTime;
131        }
132    }
133
134    private static final String DEFAULT_ENCODING = "8BIT";
135
136    protected boolean mCanceled;
137    protected VCardInterpreter mInterpreter;
138
139    protected final String mIntermediateCharset;
140
141    /**
142     * <p>
143     * The encoding type for deconding byte streams. This member variable is
144     * reset to a default encoding every time when a new item comes.
145     * </p>
146     * <p>
147     * "Encoding" in vCard is different from "Charset". It is mainly used for
148     * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
149     * "QUOTED-PRINTABLE" are known examples.
150     * </p>
151     */
152    protected String mCurrentEncoding;
153
154    /**
155     * <p>
156     * The reader object to be used internally.
157     * </p>
158     * <p>
159     * Developers should not directly read a line from this object. Use
160     * getLine() unless there some reason.
161     * </p>
162     */
163    protected CustomBufferedReader mReader;
164
165    /**
166     * <p>
167     * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
168     * specification, but happens to be seen in real world vCard.
169     * </p>
170     * <p>
171     * We just accept those invalid types after emitting a warning for each of it.
172     * </p>
173     */
174    protected final Set<String> mUnknownTypeSet = new HashSet<String>();
175
176    /**
177     * <p>
178     * Set for storing unkonwn VALUE attributes, which is not acceptable in
179     * vCard specification, but happens to be seen in real world vCard.
180     * </p>
181     * <p>
182     * We just accept those invalid types after emitting a warning for each of it.
183     * </p>
184     */
185    protected final Set<String> mUnknownValueSet = new HashSet<String>();
186
187
188    // In some cases, vCard is nested. Currently, we only consider the most
189    // interior vCard data.
190    // See v21_foma_1.vcf in test directory for more information.
191    // TODO: Don't ignore by using count, but read all of information outside vCard.
192    private int mNestCount;
193
194    // Used only for parsing END:VCARD.
195    private String mPreviousLine;
196
197    // For measuring performance.
198    private long mTimeTotal;
199    private long mTimeReadStartRecord;
200    private long mTimeReadEndRecord;
201    private long mTimeStartProperty;
202    private long mTimeEndProperty;
203    private long mTimeParseItems;
204    private long mTimeParseLineAndHandleGroup;
205    private long mTimeParsePropertyValues;
206    private long mTimeParseAdrOrgN;
207    private long mTimeHandleMiscPropertyValue;
208    private long mTimeHandleQuotedPrintable;
209    private long mTimeHandleBase64;
210
211    public VCardParserImpl_V21() {
212        this(VCardConfig.VCARD_TYPE_DEFAULT);
213    }
214
215    public VCardParserImpl_V21(int vcardType) {
216        if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) {
217            mNestCount = 1;
218        }
219
220        mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
221    }
222
223    /**
224     * <p>
225     * Parses the file at the given position.
226     * </p>
227     */
228    // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre>
229    protected void parseVCardFile() throws IOException, VCardException {
230        boolean readingFirstFile = true;
231        while (true) {
232            if (mCanceled) {
233                Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
234                break;
235            }
236            if (!parseOneVCard(readingFirstFile)) {
237                break;
238            }
239            readingFirstFile = false;
240        }
241
242        if (mNestCount > 0) {
243            boolean useCache = true;
244            for (int i = 0; i < mNestCount; i++) {
245                readEndVCard(useCache, true);
246                useCache = false;
247            }
248        }
249    }
250
251    /**
252     * @return true when a given property name is a valid property name.
253     */
254    protected boolean isValidPropertyName(final String propertyName) {
255        if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
256                propertyName.startsWith("X-"))
257                && !mUnknownTypeSet.contains(propertyName)) {
258            mUnknownTypeSet.add(propertyName);
259            Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
260        }
261        return true;
262    }
263
264    /**
265     * @return String. It may be null, or its length may be 0
266     * @throws IOException
267     */
268    protected String getLine() throws IOException {
269        return mReader.readLine();
270    }
271
272    protected String peekLine() throws IOException {
273        return mReader.peekLine();
274    }
275
276    /**
277     * @return String with it's length > 0
278     * @throws IOException
279     * @throws VCardException when the stream reached end of line
280     */
281    protected String getNonEmptyLine() throws IOException, VCardException {
282        String line;
283        while (true) {
284            line = getLine();
285            if (line == null) {
286                throw new VCardException("Reached end of buffer.");
287            } else if (line.trim().length() > 0) {
288                return line;
289            }
290        }
291    }
292
293    /*
294     * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
295     *         items *CRLF
296     *         "END" [ws] ":" [ws] "VCARD"
297     */
298    private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException {
299        boolean allowGarbage = false;
300        if (firstRead) {
301            if (mNestCount > 0) {
302                for (int i = 0; i < mNestCount; i++) {
303                    if (!readBeginVCard(allowGarbage)) {
304                        return false;
305                    }
306                    allowGarbage = true;
307                }
308            }
309        }
310
311        if (!readBeginVCard(allowGarbage)) {
312            return false;
313        }
314        final long beforeStartEntry = System.currentTimeMillis();
315        mInterpreter.startEntry();
316        mTimeReadStartRecord += System.currentTimeMillis() - beforeStartEntry;
317
318        final long beforeParseItems = System.currentTimeMillis();
319        parseItems();
320        mTimeParseItems += System.currentTimeMillis() - beforeParseItems;
321
322        readEndVCard(true, false);
323
324        final long beforeEndEntry = System.currentTimeMillis();
325        mInterpreter.endEntry();
326        mTimeReadEndRecord += System.currentTimeMillis() - beforeEndEntry;
327        return true;
328    }
329
330    /**
331     * @return True when successful. False when reaching the end of line
332     * @throws IOException
333     * @throws VCardException
334     */
335    protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
336        String line;
337        do {
338            while (true) {
339                line = getLine();
340                if (line == null) {
341                    return false;
342                } else if (line.trim().length() > 0) {
343                    break;
344                }
345            }
346            final String[] strArray = line.split(":", 2);
347            final int length = strArray.length;
348
349            // Although vCard 2.1/3.0 specification does not allow lower cases,
350            // we found vCard file emitted by some external vCard expoter have such
351            // invalid Strings.
352            // So we allow it.
353            // e.g.
354            // BEGIN:vCard
355            if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
356                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
357                return true;
358            } else if (!allowGarbage) {
359                if (mNestCount > 0) {
360                    mPreviousLine = line;
361                    return false;
362                } else {
363                    throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
364                            + "(Instead, \"" + line + "\" came)");
365                }
366            }
367        } while (allowGarbage);
368
369        throw new VCardException("Reached where must not be reached.");
370    }
371
372    /**
373     * <p>
374     * The arguments useCache and allowGarbase are usually true and false
375     * accordingly when this function is called outside this function itself.
376     * </p>
377     *
378     * @param useCache When true, line is obtained from mPreviousline.
379     *            Otherwise, getLine() is used.
380     * @param allowGarbage When true, ignore non "END:VCARD" line.
381     * @throws IOException
382     * @throws VCardException
383     */
384    protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException,
385            VCardException {
386        String line;
387        do {
388            if (useCache) {
389                // Though vCard specification does not allow lower cases,
390                // some data may have them, so we allow it.
391                line = mPreviousLine;
392            } else {
393                while (true) {
394                    line = getLine();
395                    if (line == null) {
396                        throw new VCardException("Expected END:VCARD was not found.");
397                    } else if (line.trim().length() > 0) {
398                        break;
399                    }
400                }
401            }
402
403            String[] strArray = line.split(":", 2);
404            if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END")
405                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
406                return;
407            } else if (!allowGarbage) {
408                throw new VCardException("END:VCARD != \"" + mPreviousLine + "\"");
409            }
410            useCache = false;
411        } while (allowGarbage);
412    }
413
414    /*
415     * items = *CRLF item / item
416     */
417    protected void parseItems() throws IOException, VCardException {
418        boolean ended = false;
419
420        final long beforeBeginProperty = System.currentTimeMillis();
421        mInterpreter.startProperty();
422        mTimeStartProperty += System.currentTimeMillis() - beforeBeginProperty;
423        ended = parseItem();
424        if (!ended) {
425            final long beforeEndProperty = System.currentTimeMillis();
426            mInterpreter.endProperty();
427            mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
428        }
429
430        while (!ended) {
431            final long beforeStartProperty = System.currentTimeMillis();
432            mInterpreter.startProperty();
433            mTimeStartProperty += System.currentTimeMillis() - beforeStartProperty;
434            try {
435                ended = parseItem();
436            } catch (VCardInvalidCommentLineException e) {
437                Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
438                ended = false;
439            }
440
441            if (!ended) {
442                final long beforeEndProperty = System.currentTimeMillis();
443                mInterpreter.endProperty();
444                mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
445            }
446        }
447    }
448
449    /*
450     * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
451     * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
452     * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
453     * "AGENT" [params] ":" vcard CRLF
454     */
455    protected boolean parseItem() throws IOException, VCardException {
456        mCurrentEncoding = DEFAULT_ENCODING;
457
458        final String line = getNonEmptyLine();
459        long start = System.currentTimeMillis();
460
461        String[] propertyNameAndValue = separateLineAndHandleGroup(line);
462        if (propertyNameAndValue == null) {
463            return true;
464        }
465        if (propertyNameAndValue.length != 2) {
466            throw new VCardInvalidLineException("Invalid line \"" + line + "\"");
467        }
468        String propertyName = propertyNameAndValue[0].toUpperCase();
469        String propertyValue = propertyNameAndValue[1];
470
471        mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start;
472
473        if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) {
474            start = System.currentTimeMillis();
475            handleMultiplePropertyValue(propertyName, propertyValue);
476            mTimeParseAdrOrgN += System.currentTimeMillis() - start;
477            return false;
478        } else if (propertyName.equals("AGENT")) {
479            handleAgent(propertyValue);
480            return false;
481        } else if (isValidPropertyName(propertyName)) {
482            if (propertyName.equals("BEGIN")) {
483                if (propertyValue.equals("VCARD")) {
484                    throw new VCardNestedException("This vCard has nested vCard data in it.");
485                } else {
486                    throw new VCardException("Unknown BEGIN type: " + propertyValue);
487                }
488            } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) {
489                throw new VCardVersionException("Incompatible version: " + propertyValue + " != "
490                        + getVersionString());
491            }
492            start = System.currentTimeMillis();
493            handlePropertyValue(propertyName, propertyValue);
494            mTimeParsePropertyValues += System.currentTimeMillis() - start;
495            return false;
496        }
497
498        throw new VCardException("Unknown property name: \"" + propertyName + "\"");
499    }
500
501    // For performance reason, the states for group and property name are merged into one.
502    static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
503    static private final int STATE_PARAMS = 1;
504    // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
505    static private final int STATE_PARAMS_IN_DQUOTE = 2;
506
507    protected String[] separateLineAndHandleGroup(String line) throws VCardException {
508        final String[] propertyNameAndValue = new String[2];
509        final int length = line.length();
510        if (length > 0 && line.charAt(0) == '#') {
511            throw new VCardInvalidCommentLineException();
512        }
513
514        int state = STATE_GROUP_OR_PROPERTY_NAME;
515        int nameIndex = 0;
516
517        // This loop is developed so that we don't have to take care of bottle neck here.
518        // Refactor carefully when you need to do so.
519        for (int i = 0; i < length; i++) {
520            final char ch = line.charAt(i);
521            switch (state) {
522                case STATE_GROUP_OR_PROPERTY_NAME: {
523                    if (ch == ':') {  // End of a property name.
524                        final String propertyName = line.substring(nameIndex, i);
525                        if (propertyName.equalsIgnoreCase("END")) {
526                            mPreviousLine = line;
527                            return null;
528                        }
529                        mInterpreter.propertyName(propertyName);
530                        propertyNameAndValue[0] = propertyName;
531                        if (i < length - 1) {
532                            propertyNameAndValue[1] = line.substring(i + 1);
533                        } else {
534                            propertyNameAndValue[1] = "";
535                        }
536                        return propertyNameAndValue;
537                    } else if (ch == '.') {  // Each group is followed by the dot.
538                        final String groupName = line.substring(nameIndex, i);
539                        if (groupName.length() == 0) {
540                            Log.w(LOG_TAG, "Empty group found. Ignoring.");
541                        } else {
542                            mInterpreter.propertyGroup(groupName);
543                        }
544                        nameIndex = i + 1;  // Next should be another group or a property name.
545                    } else if (ch == ';') {  // End of property name and beginneng of parameters.
546                        final String propertyName = line.substring(nameIndex, i);
547                        if (propertyName.equalsIgnoreCase("END")) {
548                            mPreviousLine = line;
549                            return null;
550                        }
551                        mInterpreter.propertyName(propertyName);
552                        propertyNameAndValue[0] = propertyName;
553                        nameIndex = i + 1;
554                        state = STATE_PARAMS;  // Start parameter parsing.
555                    }
556                    // TODO: comma support (in vCard 3.0 and 4.0).
557                    break;
558                }
559                case STATE_PARAMS: {
560                    if (ch == '"') {
561                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
562                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
563                                    "Silently allow it");
564                        }
565                        state = STATE_PARAMS_IN_DQUOTE;
566                    } else if (ch == ';') {  // Starts another param.
567                        handleParams(line.substring(nameIndex, i));
568                        nameIndex = i + 1;
569                    } else if (ch == ':') {  // End of param and beginenning of values.
570                        handleParams(line.substring(nameIndex, i));
571                        if (i < length - 1) {
572                            propertyNameAndValue[1] = line.substring(i + 1);
573                        } else {
574                            propertyNameAndValue[1] = "";
575                        }
576                        return propertyNameAndValue;
577                    }
578                    break;
579                }
580                case STATE_PARAMS_IN_DQUOTE: {
581                    if (ch == '"') {
582                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
583                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
584                                    "Silently allow it");
585                        }
586                        state = STATE_PARAMS;
587                    }
588                    break;
589                }
590            }
591        }
592
593        throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
594    }
595
596    /*
597     * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
598     * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
599     * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
600     * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
601     * [ws] word / knowntype
602     */
603    protected void handleParams(String params) throws VCardException {
604        final String[] strArray = params.split("=", 2);
605        if (strArray.length == 2) {
606            final String paramName = strArray[0].trim().toUpperCase();
607            String paramValue = strArray[1].trim();
608            if (paramName.equals("TYPE")) {
609                handleType(paramValue);
610            } else if (paramName.equals("VALUE")) {
611                handleValue(paramValue);
612            } else if (paramName.equals("ENCODING")) {
613                handleEncoding(paramValue);
614            } else if (paramName.equals("CHARSET")) {
615                handleCharset(paramValue);
616            } else if (paramName.equals("LANGUAGE")) {
617                handleLanguage(paramValue);
618            } else if (paramName.startsWith("X-")) {
619                handleAnyParam(paramName, paramValue);
620            } else {
621                throw new VCardException("Unknown type \"" + paramName + "\"");
622            }
623        } else {
624            handleParamWithoutName(strArray[0]);
625        }
626    }
627
628    /**
629     * vCard 3.0 parser implementation may throw VCardException.
630     */
631    @SuppressWarnings("unused")
632    protected void handleParamWithoutName(final String paramValue) throws VCardException {
633        handleType(paramValue);
634    }
635
636    /*
637     * ptypeval = knowntype / "X-" word
638     */
639    protected void handleType(final String ptypeval) {
640        if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
641                || ptypeval.startsWith("X-"))
642                && !mUnknownTypeSet.contains(ptypeval)) {
643            mUnknownTypeSet.add(ptypeval);
644            Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
645        }
646        mInterpreter.propertyParamType("TYPE");
647        mInterpreter.propertyParamValue(ptypeval);
648    }
649
650    /*
651     * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
652     */
653    protected void handleValue(final String pvalueval) {
654        if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
655                || pvalueval.startsWith("X-")
656                || mUnknownValueSet.contains(pvalueval))) {
657            mUnknownValueSet.add(pvalueval);
658            Log.w(LOG_TAG, String.format(
659                    "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
660        }
661        mInterpreter.propertyParamType("VALUE");
662        mInterpreter.propertyParamValue(pvalueval);
663    }
664
665    /*
666     * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
667     */
668    protected void handleEncoding(String pencodingval) throws VCardException {
669        if (getAvailableEncodingSet().contains(pencodingval) ||
670                pencodingval.startsWith("X-")) {
671            mInterpreter.propertyParamType("ENCODING");
672            mInterpreter.propertyParamValue(pencodingval);
673            mCurrentEncoding = pencodingval;
674        } else {
675            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
676        }
677    }
678
679    /**
680     * <p>
681     * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
682     * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
683     * We allow any charset.
684     * </p>
685     */
686    protected void handleCharset(String charsetval) {
687        mInterpreter.propertyParamType("CHARSET");
688        mInterpreter.propertyParamValue(charsetval);
689    }
690
691    /**
692     * See also Section 7.1 of RFC 1521
693     */
694    protected void handleLanguage(String langval) throws VCardException {
695        String[] strArray = langval.split("-");
696        if (strArray.length != 2) {
697            throw new VCardException("Invalid Language: \"" + langval + "\"");
698        }
699        String tmp = strArray[0];
700        int length = tmp.length();
701        for (int i = 0; i < length; i++) {
702            if (!isAsciiLetter(tmp.charAt(i))) {
703                throw new VCardException("Invalid Language: \"" + langval + "\"");
704            }
705        }
706        tmp = strArray[1];
707        length = tmp.length();
708        for (int i = 0; i < length; i++) {
709            if (!isAsciiLetter(tmp.charAt(i))) {
710                throw new VCardException("Invalid Language: \"" + langval + "\"");
711            }
712        }
713        mInterpreter.propertyParamType(VCardConstants.PARAM_LANGUAGE);
714        mInterpreter.propertyParamValue(langval);
715    }
716
717    private boolean isAsciiLetter(char ch) {
718        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
719            return true;
720        }
721        return false;
722    }
723
724    /**
725     * Mainly for "X-" type. This accepts any kind of type without check.
726     */
727    protected void handleAnyParam(String paramName, String paramValue) {
728        mInterpreter.propertyParamType(paramName);
729        mInterpreter.propertyParamValue(paramValue);
730    }
731
732    protected void handlePropertyValue(String propertyName, String propertyValue)
733            throws IOException, VCardException {
734        final String upperEncoding = mCurrentEncoding.toUpperCase();
735        if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
736            final long start = System.currentTimeMillis();
737            final String result = getQuotedPrintable(propertyValue);
738            final ArrayList<String> v = new ArrayList<String>();
739            v.add(result);
740            mInterpreter.propertyValues(v);
741            mTimeHandleQuotedPrintable += System.currentTimeMillis() - start;
742        } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
743                || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
744            final long start = System.currentTimeMillis();
745            // It is very rare, but some BASE64 data may be so big that
746            // OutOfMemoryError occurs. To ignore such cases, use try-catch.
747            try {
748                final ArrayList<String> arrayList = new ArrayList<String>();
749                arrayList.add(getBase64(propertyValue));
750                mInterpreter.propertyValues(arrayList);
751            } catch (OutOfMemoryError error) {
752                Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
753                mInterpreter.propertyValues(null);
754            }
755            mTimeHandleBase64 += System.currentTimeMillis() - start;
756        } else {
757            if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") ||
758                    upperEncoding.startsWith("X-"))) {
759                Log.w(LOG_TAG,
760                        String.format("The encoding \"%s\" is unsupported by vCard %s",
761                                mCurrentEncoding, getVersionString()));
762            }
763
764            // Some device uses line folding defined in RFC 2425, which is not allowed
765            // in vCard 2.1 (while needed in vCard 3.0).
766            //
767            // e.g.
768            // BEGIN:VCARD
769            // VERSION:2.1
770            // N:;Omega;;;
771            // EMAIL;INTERNET:"Omega"
772            //   <omega@example.com>
773            // FN:Omega
774            // END:VCARD
775            //
776            // The vCard above assumes that email address should become:
777            // "Omega" <omega@example.com>
778            //
779            // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
780            //
781            // For more information about line folding,
782            // see "5.8.1. Line delimiting and folding" in RFC 2425.
783            //
784            // We take care of this case more formally in vCard 3.0, so we only need to
785            // do this in vCard 2.1.
786            if (getVersion() == VCardConfig.VERSION_21) {
787                StringBuilder builder = null;
788                while (true) {
789                    final String nextLine = peekLine();
790                    // We don't need to care too much about this exceptional case,
791                    // but we should not wrongly eat up "END:VCARD", since it critically
792                    // breaks this parser's state machine.
793                    // Thus we roughly look over the next line and confirm it is at least not
794                    // "END:VCARD". This extra fee is worth paying. This is exceptional
795                    // anyway.
796                    if (!TextUtils.isEmpty(nextLine) &&
797                            nextLine.charAt(0) == ' ' &&
798                            !"END:VCARD".contains(nextLine.toUpperCase())) {
799                        getLine();  // Drop the next line.
800
801                        if (builder == null) {
802                            builder = new StringBuilder();
803                            builder.append(propertyValue);
804                        }
805                        builder.append(nextLine.substring(1));
806                    } else {
807                        break;
808                    }
809                }
810                if (builder != null) {
811                    propertyValue = builder.toString();
812                }
813            }
814
815            final long start = System.currentTimeMillis();
816            ArrayList<String> v = new ArrayList<String>();
817            v.add(maybeUnescapeText(propertyValue));
818            mInterpreter.propertyValues(v);
819            mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start;
820        }
821    }
822
823    /**
824     * <p>
825     * Parses and returns Quoted-Printable.
826     * </p>
827     *
828     * @param firstString The string following a parameter name and attributes.
829     *            Example: "string" in
830     *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
831     * @return whole Quoted-Printable string, including a given argument and
832     *         following lines. Excludes the last empty line following to Quoted
833     *         Printable lines.
834     * @throws IOException
835     * @throws VCardException
836     */
837    private String getQuotedPrintable(String firstString) throws IOException, VCardException {
838        // Specifically, there may be some padding between = and CRLF.
839        // See the following:
840        //
841        // qp-line := *(qp-segment transport-padding CRLF)
842        // qp-part transport-padding
843        // qp-segment := qp-section *(SPACE / TAB) "="
844        // ; Maximum length of 76 characters
845        //
846        // e.g. (from RFC 2045)
847        // Now's the time =
848        // for all folk to come=
849        // to the aid of their country.
850        if (firstString.trim().endsWith("=")) {
851            // remove "transport-padding"
852            int pos = firstString.length() - 1;
853            while (firstString.charAt(pos) != '=') {
854            }
855            StringBuilder builder = new StringBuilder();
856            builder.append(firstString.substring(0, pos + 1));
857            builder.append("\r\n");
858            String line;
859            while (true) {
860                line = getLine();
861                if (line == null) {
862                    throw new VCardException("File ended during parsing a Quoted-Printable String");
863                }
864                if (line.trim().endsWith("=")) {
865                    // remove "transport-padding"
866                    pos = line.length() - 1;
867                    while (line.charAt(pos) != '=') {
868                    }
869                    builder.append(line.substring(0, pos + 1));
870                    builder.append("\r\n");
871                } else {
872                    builder.append(line);
873                    break;
874                }
875            }
876            return builder.toString();
877        } else {
878            return firstString;
879        }
880    }
881
882    protected String getBase64(String firstString) throws IOException, VCardException {
883        final StringBuilder builder = new StringBuilder();
884        builder.append(firstString);
885
886        while (true) {
887            final String line = peekLine();
888            if (line == null) {
889                throw new VCardException("File ended during parsing BASE64 binary");
890            }
891
892            // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
893            // have them. We try to detect those cases using semi-colon, given BASE64 doesn't
894            // contain it. Specifically BASE64 doesn't have semi-colon in it, so we should be able
895            // to detect the case safely.
896            if (line.contains(":")) {
897                if (getKnownPropertyNameSet().contains(
898                        line.substring(0, line.indexOf(":")).toUpperCase())) {
899                    Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
900                            "which must not contain semi-colon. Treat the line as next property.");
901                    Log.w(LOG_TAG, "Problematic line: " + line.trim());
902                    break;
903                }
904            }
905
906            // Consume the line.
907            getLine();
908
909            if (line.length() == 0) {
910                break;
911            }
912            builder.append(line);
913        }
914
915        return builder.toString();
916    }
917
918    /**
919     * <p>
920     * Mainly for "ADR", "ORG", and "N"
921     * </p>
922     */
923    /*
924     * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr,
925     * Street, Locality, Region, Postal Code, Country Name orgparts =
926     * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are
927     * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family,
928     * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III,
929     * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a
930     * semicolon in this string, it must be escaped ; with a "\" character. We
931     * do not care the number of "strnosemi" here. We are not sure whether we
932     * should add "\" CRLF to each value. We exclude them for now.
933     */
934    protected void handleMultiplePropertyValue(String propertyName, String propertyValue)
935            throws IOException, VCardException {
936        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some
937        // softwares/devices
938        // emit such data.
939        if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
940            propertyValue = getQuotedPrintable(propertyValue);
941        }
942
943        mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue,
944                getVersion()));
945    }
946
947    /*
948     * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
949     * error toward the AGENT property.
950     * // TODO: Support AGENT property.
951     * item =
952     * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
953     * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
954     */
955    protected void handleAgent(final String propertyValue) throws VCardException {
956        if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) {
957            // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
958            return;
959        } else {
960            throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
961        }
962    }
963
964    /**
965     * For vCard 3.0.
966     */
967    protected String maybeUnescapeText(final String text) {
968        return text;
969    }
970
971    /**
972     * Returns unescaped String if the character should be unescaped. Return
973     * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
974     * while "\x" should not be.
975     */
976    protected String maybeUnescapeCharacter(final char ch) {
977        return unescapeCharacter(ch);
978    }
979
980    /* package */ static String unescapeCharacter(final char ch) {
981        // Original vCard 2.1 specification does not allow transformation
982        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
983        // implementation of
984        // this class allowed them, so keep it as is.
985        if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
986            return String.valueOf(ch);
987        } else {
988            return null;
989        }
990    }
991
992    private void showPerformanceInfo() {
993        Log.d(LOG_TAG, "Total parsing time:  " + mTimeTotal + " ms");
994        Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms");
995        Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord
996                + " ms");
997        Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms");
998        Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup
999                + " ms");
1000        Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms");
1001        Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms");
1002        Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue
1003                + " ms");
1004        Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms");
1005        Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms");
1006    }
1007
1008    /**
1009     * @return {@link VCardConfig#VERSION_21}
1010     */
1011    protected int getVersion() {
1012        return VCardConfig.VERSION_21;
1013    }
1014
1015    /**
1016     * @return {@link VCardConfig#VERSION_30}
1017     */
1018    protected String getVersionString() {
1019        return VCardConstants.VERSION_V21;
1020    }
1021
1022    protected Set<String> getKnownPropertyNameSet() {
1023        return VCardParser_V21.sKnownPropertyNameSet;
1024    }
1025
1026    protected Set<String> getKnownTypeSet() {
1027        return VCardParser_V21.sKnownTypeSet;
1028    }
1029
1030    protected Set<String> getKnownValueSet() {
1031        return VCardParser_V21.sKnownValueSet;
1032    }
1033
1034    protected Set<String> getAvailableEncodingSet() {
1035        return VCardParser_V21.sAvailableEncoding;
1036    }
1037
1038    protected String getDefaultEncoding() {
1039        return DEFAULT_ENCODING;
1040    }
1041
1042
1043    public void parse(InputStream is, VCardInterpreter interpreter)
1044            throws IOException, VCardException {
1045        if (is == null) {
1046            throw new NullPointerException("InputStream must not be null.");
1047        }
1048
1049        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1050        mReader = new CustomBufferedReader(tmpReader);
1051
1052        mInterpreter = (interpreter != null ? interpreter : new EmptyInterpreter());
1053
1054        final long start = System.currentTimeMillis();
1055        if (mInterpreter != null) {
1056            mInterpreter.start();
1057        }
1058        parseVCardFile();
1059        if (mInterpreter != null) {
1060            mInterpreter.end();
1061        }
1062        mTimeTotal += System.currentTimeMillis() - start;
1063
1064        if (VCardConfig.showPerformanceLog()) {
1065            showPerformanceInfo();
1066        }
1067    }
1068
1069    public final void cancel() {
1070        Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1071        mCanceled = true;
1072    }
1073}
1074