VCardParserImpl_V21.java revision c955c8b0da0c9fcbad0ddcae76641358c27e72cd
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package com.android.vcard;
17
18import android.text.TextUtils;
19import android.util.Log;
20
21import com.android.vcard.exception.VCardAgentNotSupportedException;
22import com.android.vcard.exception.VCardException;
23import com.android.vcard.exception.VCardInvalidCommentLineException;
24import com.android.vcard.exception.VCardInvalidLineException;
25import com.android.vcard.exception.VCardNestedException;
26import com.android.vcard.exception.VCardVersionException;
27
28import java.io.BufferedReader;
29import java.io.IOException;
30import java.io.InputStream;
31import java.io.InputStreamReader;
32import java.io.Reader;
33import java.util.ArrayList;
34import java.util.HashSet;
35import java.util.List;
36import java.util.Set;
37
38/**
39 * <p>
40 * Basic implementation achieving vCard parsing. Based on vCard 2.1,
41 * </p>
42 * @hide
43 */
44/* package */ class VCardParserImpl_V21 {
45    private static final String LOG_TAG = "VCardParserImpl_V21";
46
47    private static final class EmptyInterpreter implements VCardInterpreter {
48        @Override
49        public void end() {
50        }
51        @Override
52        public void endEntry() {
53        }
54        @Override
55        public void endProperty() {
56        }
57        @Override
58        public void propertyGroup(String group) {
59        }
60        @Override
61        public void propertyName(String name) {
62        }
63        @Override
64        public void propertyParamType(String type) {
65        }
66        @Override
67        public void propertyParamValue(String value) {
68        }
69        @Override
70        public void propertyValues(List<String> values) {
71        }
72        @Override
73        public void start() {
74        }
75        @Override
76        public void startEntry() {
77        }
78        @Override
79        public void startProperty() {
80        }
81    }
82
83    protected static final class CustomBufferedReader extends BufferedReader {
84        private long mTime;
85
86        /**
87         * Needed since "next line" may be null due to end of line.
88         */
89        private boolean mNextLineIsValid;
90        private String mNextLine;
91
92        public CustomBufferedReader(Reader in) {
93            super(in);
94        }
95
96        @Override
97        public String readLine() throws IOException {
98            if (mNextLineIsValid) {
99                final String ret = mNextLine;
100                mNextLine = null;
101                mNextLineIsValid = false;
102                return ret;
103            }
104
105            long start = System.currentTimeMillis();
106            final String line = super.readLine();
107            long end = System.currentTimeMillis();
108            mTime += end - start;
109            return line;
110        }
111
112        /**
113         * Read one line, but make this object store it in its queue.
114         */
115        public String peekLine() throws IOException {
116            if (!mNextLineIsValid) {
117                long start = System.currentTimeMillis();
118                final String line = super.readLine();
119                long end = System.currentTimeMillis();
120                mTime += end - start;
121
122                mNextLine = line;
123                mNextLineIsValid = true;
124            }
125
126            return mNextLine;
127        }
128
129        public long getTotalmillisecond() {
130            return mTime;
131        }
132    }
133
134    private static final String DEFAULT_ENCODING = "8BIT";
135
136    protected boolean mCanceled;
137    protected VCardInterpreter mInterpreter;
138
139    protected final String mIntermediateCharset;
140
141    /**
142     * <p>
143     * The encoding type for deconding byte streams. This member variable is
144     * reset to a default encoding every time when a new item comes.
145     * </p>
146     * <p>
147     * "Encoding" in vCard is different from "Charset". It is mainly used for
148     * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
149     * "QUOTED-PRINTABLE" are known examples.
150     * </p>
151     */
152    protected String mCurrentEncoding;
153
154    /**
155     * <p>
156     * The reader object to be used internally.
157     * </p>
158     * <p>
159     * Developers should not directly read a line from this object. Use
160     * getLine() unless there some reason.
161     * </p>
162     */
163    protected CustomBufferedReader mReader;
164
165    /**
166     * <p>
167     * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
168     * specification, but happens to be seen in real world vCard.
169     * </p>
170     */
171    protected final Set<String> mUnknownTypeSet = new HashSet<String>();
172
173    /**
174     * <p>
175     * Set for storing unkonwn VALUE attributes, which is not acceptable in
176     * vCard specification, but happens to be seen in real world vCard.
177     * </p>
178     */
179    protected final Set<String> mUnknownValueSet = new HashSet<String>();
180
181
182    // In some cases, vCard is nested. Currently, we only consider the most
183    // interior vCard data.
184    // See v21_foma_1.vcf in test directory for more information.
185    // TODO: Don't ignore by using count, but read all of information outside vCard.
186    private int mNestCount;
187
188    // Used only for parsing END:VCARD.
189    private String mPreviousLine;
190
191    // For measuring performance.
192    private long mTimeTotal;
193    private long mTimeReadStartRecord;
194    private long mTimeReadEndRecord;
195    private long mTimeStartProperty;
196    private long mTimeEndProperty;
197    private long mTimeParseItems;
198    private long mTimeParseLineAndHandleGroup;
199    private long mTimeParsePropertyValues;
200    private long mTimeParseAdrOrgN;
201    private long mTimeHandleMiscPropertyValue;
202    private long mTimeHandleQuotedPrintable;
203    private long mTimeHandleBase64;
204
205    public VCardParserImpl_V21() {
206        this(VCardConfig.VCARD_TYPE_DEFAULT);
207    }
208
209    public VCardParserImpl_V21(int vcardType) {
210        if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) {
211            mNestCount = 1;
212        }
213
214        mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
215    }
216
217    /**
218     * <p>
219     * Parses the file at the given position.
220     * </p>
221     */
222    // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre>
223    protected void parseVCardFile() throws IOException, VCardException {
224        boolean readingFirstFile = true;
225        while (true) {
226            if (mCanceled) {
227                Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
228                break;
229            }
230            if (!parseOneVCard(readingFirstFile)) {
231                break;
232            }
233            readingFirstFile = false;
234        }
235
236        if (mNestCount > 0) {
237            boolean useCache = true;
238            for (int i = 0; i < mNestCount; i++) {
239                readEndVCard(useCache, true);
240                useCache = false;
241            }
242        }
243    }
244
245    /**
246     * @return true when a given property name is a valid property name.
247     */
248    protected boolean isValidPropertyName(final String propertyName) {
249        if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
250                propertyName.startsWith("X-"))
251                && !mUnknownTypeSet.contains(propertyName)) {
252            mUnknownTypeSet.add(propertyName);
253            Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
254        }
255        return true;
256    }
257
258    /**
259     * @return String. It may be null, or its length may be 0
260     * @throws IOException
261     */
262    protected String getLine() throws IOException {
263        return mReader.readLine();
264    }
265
266    protected String peekLine() throws IOException {
267        return mReader.peekLine();
268    }
269
270    /**
271     * @return String with it's length > 0
272     * @throws IOException
273     * @throws VCardException when the stream reached end of line
274     */
275    protected String getNonEmptyLine() throws IOException, VCardException {
276        String line;
277        while (true) {
278            line = getLine();
279            if (line == null) {
280                throw new VCardException("Reached end of buffer.");
281            } else if (line.trim().length() > 0) {
282                return line;
283            }
284        }
285    }
286
287    /*
288     * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
289     *         items *CRLF
290     *         "END" [ws] ":" [ws] "VCARD"
291     */
292    private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException {
293        boolean allowGarbage = false;
294        if (firstRead) {
295            if (mNestCount > 0) {
296                for (int i = 0; i < mNestCount; i++) {
297                    if (!readBeginVCard(allowGarbage)) {
298                        return false;
299                    }
300                    allowGarbage = true;
301                }
302            }
303        }
304
305        if (!readBeginVCard(allowGarbage)) {
306            return false;
307        }
308        final long beforeStartEntry = System.currentTimeMillis();
309        mInterpreter.startEntry();
310        mTimeReadStartRecord += System.currentTimeMillis() - beforeStartEntry;
311
312        final long beforeParseItems = System.currentTimeMillis();
313        parseItems();
314        mTimeParseItems += System.currentTimeMillis() - beforeParseItems;
315
316        readEndVCard(true, false);
317
318        final long beforeEndEntry = System.currentTimeMillis();
319        mInterpreter.endEntry();
320        mTimeReadEndRecord += System.currentTimeMillis() - beforeEndEntry;
321        return true;
322    }
323
324    /**
325     * @return True when successful. False when reaching the end of line
326     * @throws IOException
327     * @throws VCardException
328     */
329    protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
330        String line;
331        do {
332            while (true) {
333                line = getLine();
334                if (line == null) {
335                    return false;
336                } else if (line.trim().length() > 0) {
337                    break;
338                }
339            }
340            final String[] strArray = line.split(":", 2);
341            final int length = strArray.length;
342
343            // Although vCard 2.1/3.0 specification does not allow lower cases,
344            // we found vCard file emitted by some external vCard expoter have such
345            // invalid Strings.
346            // So we allow it.
347            // e.g.
348            // BEGIN:vCard
349            if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
350                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
351                return true;
352            } else if (!allowGarbage) {
353                if (mNestCount > 0) {
354                    mPreviousLine = line;
355                    return false;
356                } else {
357                    throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
358                            + "(Instead, \"" + line + "\" came)");
359                }
360            }
361        } while (allowGarbage);
362
363        throw new VCardException("Reached where must not be reached.");
364    }
365
366    /**
367     * <p>
368     * The arguments useCache and allowGarbase are usually true and false
369     * accordingly when this function is called outside this function itself.
370     * </p>
371     *
372     * @param useCache When true, line is obtained from mPreviousline.
373     *            Otherwise, getLine() is used.
374     * @param allowGarbage When true, ignore non "END:VCARD" line.
375     * @throws IOException
376     * @throws VCardException
377     */
378    protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException,
379            VCardException {
380        String line;
381        do {
382            if (useCache) {
383                // Though vCard specification does not allow lower cases,
384                // some data may have them, so we allow it.
385                line = mPreviousLine;
386            } else {
387                while (true) {
388                    line = getLine();
389                    if (line == null) {
390                        throw new VCardException("Expected END:VCARD was not found.");
391                    } else if (line.trim().length() > 0) {
392                        break;
393                    }
394                }
395            }
396
397            String[] strArray = line.split(":", 2);
398            if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END")
399                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
400                return;
401            } else if (!allowGarbage) {
402                throw new VCardException("END:VCARD != \"" + mPreviousLine + "\"");
403            }
404            useCache = false;
405        } while (allowGarbage);
406    }
407
408    /*
409     * items = *CRLF item / item
410     */
411    protected void parseItems() throws IOException, VCardException {
412        boolean ended = false;
413
414        final long beforeBeginProperty = System.currentTimeMillis();
415        mInterpreter.startProperty();
416        mTimeStartProperty += System.currentTimeMillis() - beforeBeginProperty;
417        ended = parseItem();
418        if (!ended) {
419            final long beforeEndProperty = System.currentTimeMillis();
420            mInterpreter.endProperty();
421            mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
422        }
423
424        while (!ended) {
425            final long beforeStartProperty = System.currentTimeMillis();
426            mInterpreter.startProperty();
427            mTimeStartProperty += System.currentTimeMillis() - beforeStartProperty;
428            try {
429                ended = parseItem();
430            } catch (VCardInvalidCommentLineException e) {
431                Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
432                ended = false;
433            }
434
435            if (!ended) {
436                final long beforeEndProperty = System.currentTimeMillis();
437                mInterpreter.endProperty();
438                mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
439            }
440        }
441    }
442
443    /*
444     * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
445     * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
446     * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
447     * "AGENT" [params] ":" vcard CRLF
448     */
449    protected boolean parseItem() throws IOException, VCardException {
450        mCurrentEncoding = DEFAULT_ENCODING;
451
452        final String line = getNonEmptyLine();
453        long start = System.currentTimeMillis();
454
455        String[] propertyNameAndValue = separateLineAndHandleGroup(line);
456        if (propertyNameAndValue == null) {
457            return true;
458        }
459        if (propertyNameAndValue.length != 2) {
460            throw new VCardInvalidLineException("Invalid line \"" + line + "\"");
461        }
462        String propertyName = propertyNameAndValue[0].toUpperCase();
463        String propertyValue = propertyNameAndValue[1];
464
465        mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start;
466
467        if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) {
468            start = System.currentTimeMillis();
469            handleMultiplePropertyValue(propertyName, propertyValue);
470            mTimeParseAdrOrgN += System.currentTimeMillis() - start;
471            return false;
472        } else if (propertyName.equals("AGENT")) {
473            handleAgent(propertyValue);
474            return false;
475        } else if (isValidPropertyName(propertyName)) {
476            if (propertyName.equals("BEGIN")) {
477                if (propertyValue.equals("VCARD")) {
478                    throw new VCardNestedException("This vCard has nested vCard data in it.");
479                } else {
480                    throw new VCardException("Unknown BEGIN type: " + propertyValue);
481                }
482            } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) {
483                throw new VCardVersionException("Incompatible version: " + propertyValue + " != "
484                        + getVersionString());
485            }
486            start = System.currentTimeMillis();
487            handlePropertyValue(propertyName, propertyValue);
488            mTimeParsePropertyValues += System.currentTimeMillis() - start;
489            return false;
490        }
491
492        throw new VCardException("Unknown property name: \"" + propertyName + "\"");
493    }
494
495    // For performance reason, the states for group and property name are merged into one.
496    static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
497    static private final int STATE_PARAMS = 1;
498    // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
499    static private final int STATE_PARAMS_IN_DQUOTE = 2;
500
501    protected String[] separateLineAndHandleGroup(String line) throws VCardException {
502        final String[] propertyNameAndValue = new String[2];
503        final int length = line.length();
504        if (length > 0 && line.charAt(0) == '#') {
505            throw new VCardInvalidCommentLineException();
506        }
507
508        int state = STATE_GROUP_OR_PROPERTY_NAME;
509        int nameIndex = 0;
510
511        // This loop is developed so that we don't have to take care of bottle neck here.
512        // Refactor carefully when you need to do so.
513        for (int i = 0; i < length; i++) {
514            final char ch = line.charAt(i);
515            switch (state) {
516                case STATE_GROUP_OR_PROPERTY_NAME: {
517                    if (ch == ':') {  // End of a property name.
518                        final String propertyName = line.substring(nameIndex, i);
519                        if (propertyName.equalsIgnoreCase("END")) {
520                            mPreviousLine = line;
521                            return null;
522                        }
523                        mInterpreter.propertyName(propertyName);
524                        propertyNameAndValue[0] = propertyName;
525                        if (i < length - 1) {
526                            propertyNameAndValue[1] = line.substring(i + 1);
527                        } else {
528                            propertyNameAndValue[1] = "";
529                        }
530                        return propertyNameAndValue;
531                    } else if (ch == '.') {  // Each group is followed by the dot.
532                        final String groupName = line.substring(nameIndex, i);
533                        if (groupName.length() == 0) {
534                            Log.w(LOG_TAG, "Empty group found. Ignoring.");
535                        } else {
536                            mInterpreter.propertyGroup(groupName);
537                        }
538                        nameIndex = i + 1;  // Next should be another group or a property name.
539                    } else if (ch == ';') {  // End of property name and beginneng of parameters.
540                        final String propertyName = line.substring(nameIndex, i);
541                        if (propertyName.equalsIgnoreCase("END")) {
542                            mPreviousLine = line;
543                            return null;
544                        }
545                        mInterpreter.propertyName(propertyName);
546                        propertyNameAndValue[0] = propertyName;
547                        nameIndex = i + 1;
548                        state = STATE_PARAMS;  // Start parameter parsing.
549                    }
550                    // TODO: comma support (in vCard 3.0 and 4.0).
551                    break;
552                }
553                case STATE_PARAMS: {
554                    if (ch == '"') {
555                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
556                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
557                                    "Silently allow it");
558                        }
559                        state = STATE_PARAMS_IN_DQUOTE;
560                    } else if (ch == ';') {  // Starts another param.
561                        handleParams(line.substring(nameIndex, i));
562                        nameIndex = i + 1;
563                    } else if (ch == ':') {  // End of param and beginenning of values.
564                        handleParams(line.substring(nameIndex, i));
565                        if (i < length - 1) {
566                            propertyNameAndValue[1] = line.substring(i + 1);
567                        } else {
568                            propertyNameAndValue[1] = "";
569                        }
570                        return propertyNameAndValue;
571                    }
572                    break;
573                }
574                case STATE_PARAMS_IN_DQUOTE: {
575                    if (ch == '"') {
576                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
577                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
578                                    "Silently allow it");
579                        }
580                        state = STATE_PARAMS;
581                    }
582                    break;
583                }
584            }
585        }
586
587        throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
588    }
589
590    /*
591     * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
592     * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
593     * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
594     * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
595     * [ws] word / knowntype
596     */
597    protected void handleParams(String params) throws VCardException {
598        final String[] strArray = params.split("=", 2);
599        if (strArray.length == 2) {
600            final String paramName = strArray[0].trim().toUpperCase();
601            String paramValue = strArray[1].trim();
602            if (paramName.equals("TYPE")) {
603                handleType(paramValue);
604            } else if (paramName.equals("VALUE")) {
605                handleValue(paramValue);
606            } else if (paramName.equals("ENCODING")) {
607                handleEncoding(paramValue);
608            } else if (paramName.equals("CHARSET")) {
609                handleCharset(paramValue);
610            } else if (paramName.equals("LANGUAGE")) {
611                handleLanguage(paramValue);
612            } else if (paramName.startsWith("X-")) {
613                handleAnyParam(paramName, paramValue);
614            } else {
615                throw new VCardException("Unknown type \"" + paramName + "\"");
616            }
617        } else {
618            handleParamWithoutName(strArray[0]);
619        }
620    }
621
622    /**
623     * vCard 3.0 parser implementation may throw VCardException.
624     */
625    @SuppressWarnings("unused")
626    protected void handleParamWithoutName(final String paramValue) throws VCardException {
627        handleType(paramValue);
628    }
629
630    /*
631     * ptypeval = knowntype / "X-" word
632     */
633    protected void handleType(final String ptypeval) {
634        if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
635                || ptypeval.startsWith("X-"))
636                && !mUnknownTypeSet.contains(ptypeval)) {
637            mUnknownTypeSet.add(ptypeval);
638            Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
639        }
640        mInterpreter.propertyParamType("TYPE");
641        mInterpreter.propertyParamValue(ptypeval);
642    }
643
644    /*
645     * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
646     */
647    protected void handleValue(final String pvalueval) {
648        if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
649                || pvalueval.startsWith("X-")
650                || mUnknownValueSet.contains(pvalueval))) {
651            mUnknownValueSet.add(pvalueval);
652            Log.w(LOG_TAG, String.format(
653                    "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
654        }
655        mInterpreter.propertyParamType("VALUE");
656        mInterpreter.propertyParamValue(pvalueval);
657    }
658
659    /*
660     * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
661     */
662    protected void handleEncoding(String pencodingval) throws VCardException {
663        if (getAvailableEncodingSet().contains(pencodingval) ||
664                pencodingval.startsWith("X-")) {
665            mInterpreter.propertyParamType("ENCODING");
666            mInterpreter.propertyParamValue(pencodingval);
667            mCurrentEncoding = pencodingval;
668        } else {
669            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
670        }
671    }
672
673    /**
674     * <p>
675     * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
676     * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
677     * We allow any charset.
678     * </p>
679     */
680    protected void handleCharset(String charsetval) {
681        mInterpreter.propertyParamType("CHARSET");
682        mInterpreter.propertyParamValue(charsetval);
683    }
684
685    /**
686     * See also Section 7.1 of RFC 1521
687     */
688    protected void handleLanguage(String langval) throws VCardException {
689        String[] strArray = langval.split("-");
690        if (strArray.length != 2) {
691            throw new VCardException("Invalid Language: \"" + langval + "\"");
692        }
693        String tmp = strArray[0];
694        int length = tmp.length();
695        for (int i = 0; i < length; i++) {
696            if (!isAsciiLetter(tmp.charAt(i))) {
697                throw new VCardException("Invalid Language: \"" + langval + "\"");
698            }
699        }
700        tmp = strArray[1];
701        length = tmp.length();
702        for (int i = 0; i < length; i++) {
703            if (!isAsciiLetter(tmp.charAt(i))) {
704                throw new VCardException("Invalid Language: \"" + langval + "\"");
705            }
706        }
707        mInterpreter.propertyParamType(VCardConstants.PARAM_LANGUAGE);
708        mInterpreter.propertyParamValue(langval);
709    }
710
711    private boolean isAsciiLetter(char ch) {
712        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
713            return true;
714        }
715        return false;
716    }
717
718    /**
719     * Mainly for "X-" type. This accepts any kind of type without check.
720     */
721    protected void handleAnyParam(String paramName, String paramValue) {
722        mInterpreter.propertyParamType(paramName);
723        mInterpreter.propertyParamValue(paramValue);
724    }
725
726    protected void handlePropertyValue(String propertyName, String propertyValue)
727            throws IOException, VCardException {
728        final String upperEncoding = mCurrentEncoding.toUpperCase();
729        if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
730            final long start = System.currentTimeMillis();
731            final String result = getQuotedPrintable(propertyValue);
732            final ArrayList<String> v = new ArrayList<String>();
733            v.add(result);
734            mInterpreter.propertyValues(v);
735            mTimeHandleQuotedPrintable += System.currentTimeMillis() - start;
736        } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
737                || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
738            final long start = System.currentTimeMillis();
739            // It is very rare, but some BASE64 data may be so big that
740            // OutOfMemoryError occurs. To ignore such cases, use try-catch.
741            try {
742                final ArrayList<String> arrayList = new ArrayList<String>();
743                arrayList.add(getBase64(propertyValue));
744                mInterpreter.propertyValues(arrayList);
745            } catch (OutOfMemoryError error) {
746                Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
747                mInterpreter.propertyValues(null);
748            }
749            mTimeHandleBase64 += System.currentTimeMillis() - start;
750        } else {
751            if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") ||
752                    upperEncoding.startsWith("X-"))) {
753                Log.w(LOG_TAG,
754                        String.format("The encoding \"%s\" is unsupported by vCard %s",
755                                mCurrentEncoding, getVersionString()));
756            }
757
758            // Some device uses line folding defined in RFC 2425, which is not allowed
759            // in vCard 2.1 (while needed in vCard 3.0).
760            //
761            // e.g.
762            // BEGIN:VCARD
763            // VERSION:2.1
764            // N:;Omega;;;
765            // EMAIL;INTERNET:"Omega"
766            //   <omega@example.com>
767            // FN:Omega
768            // END:VCARD
769            //
770            // The vCard above assumes that email address should become:
771            // "Omega" <omega@example.com>
772            //
773            // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
774            //
775            // For more information about line folding,
776            // see "5.8.1. Line delimiting and folding" in RFC 2425.
777            //
778            // We take care of this case more formally in vCard 3.0, so we only need to
779            // do this in vCard 2.1.
780            if (getVersion() == VCardConfig.VERSION_21) {
781                StringBuilder builder = null;
782                while (true) {
783                    final String nextLine = peekLine();
784                    // We don't need to care too much about this exceptional case,
785                    // but we should not wrongly eat up "END:VCARD", since it critically
786                    // breaks this parser's state machine.
787                    // Thus we roughly look over the next line and confirm it is at least not
788                    // "END:VCARD". This extra fee is worth paying. This is exceptional
789                    // anyway.
790                    if (!TextUtils.isEmpty(nextLine) &&
791                            nextLine.charAt(0) == ' ' &&
792                            !"END:VCARD".contains(nextLine.toUpperCase())) {
793                        getLine();  // Drop the next line.
794
795                        if (builder == null) {
796                            builder = new StringBuilder();
797                            builder.append(propertyValue);
798                        }
799                        builder.append(nextLine.substring(1));
800                    } else {
801                        break;
802                    }
803                }
804                if (builder != null) {
805                    propertyValue = builder.toString();
806                }
807            }
808
809            final long start = System.currentTimeMillis();
810            ArrayList<String> v = new ArrayList<String>();
811            v.add(maybeUnescapeText(propertyValue));
812            mInterpreter.propertyValues(v);
813            mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start;
814        }
815    }
816
817    /**
818     * <p>
819     * Parses and returns Quoted-Printable.
820     * </p>
821     *
822     * @param firstString The string following a parameter name and attributes.
823     *            Example: "string" in
824     *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
825     * @return whole Quoted-Printable string, including a given argument and
826     *         following lines. Excludes the last empty line following to Quoted
827     *         Printable lines.
828     * @throws IOException
829     * @throws VCardException
830     */
831    private String getQuotedPrintable(String firstString) throws IOException, VCardException {
832        // Specifically, there may be some padding between = and CRLF.
833        // See the following:
834        //
835        // qp-line := *(qp-segment transport-padding CRLF)
836        // qp-part transport-padding
837        // qp-segment := qp-section *(SPACE / TAB) "="
838        // ; Maximum length of 76 characters
839        //
840        // e.g. (from RFC 2045)
841        // Now's the time =
842        // for all folk to come=
843        // to the aid of their country.
844        if (firstString.trim().endsWith("=")) {
845            // remove "transport-padding"
846            int pos = firstString.length() - 1;
847            while (firstString.charAt(pos) != '=') {
848            }
849            StringBuilder builder = new StringBuilder();
850            builder.append(firstString.substring(0, pos + 1));
851            builder.append("\r\n");
852            String line;
853            while (true) {
854                line = getLine();
855                if (line == null) {
856                    throw new VCardException("File ended during parsing a Quoted-Printable String");
857                }
858                if (line.trim().endsWith("=")) {
859                    // remove "transport-padding"
860                    pos = line.length() - 1;
861                    while (line.charAt(pos) != '=') {
862                    }
863                    builder.append(line.substring(0, pos + 1));
864                    builder.append("\r\n");
865                } else {
866                    builder.append(line);
867                    break;
868                }
869            }
870            return builder.toString();
871        } else {
872            return firstString;
873        }
874    }
875
876    protected String getBase64(String firstString) throws IOException, VCardException {
877        final StringBuilder builder = new StringBuilder();
878        builder.append(firstString);
879
880        while (true) {
881            final String line = peekLine();
882            if (line == null) {
883                throw new VCardException("File ended during parsing BASE64 binary");
884            }
885
886            // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
887            // have them. We try to detect those cases using semi-colon, given BASE64 doesn't
888            // contain it. Specifically BASE64 doesn't have semi-colon in it, so we should be able
889            // to detect the case safely.
890            if (line.contains(":")) {
891                if (getKnownPropertyNameSet().contains(
892                        line.substring(0, line.indexOf(":")).toUpperCase())) {
893                    Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
894                            "which must not contain semi-colon. Treat the line as next property.");
895                    Log.w(LOG_TAG, "Problematic line: " + line.trim());
896                    break;
897                }
898            }
899
900            // Consume the line.
901            getLine();
902
903            if (line.length() == 0) {
904                break;
905            }
906            builder.append(line);
907        }
908
909        return builder.toString();
910    }
911
912    /**
913     * <p>
914     * Mainly for "ADR", "ORG", and "N"
915     * </p>
916     */
917    /*
918     * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr,
919     * Street, Locality, Region, Postal Code, Country Name orgparts =
920     * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are
921     * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family,
922     * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III,
923     * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a
924     * semicolon in this string, it must be escaped ; with a "\" character. We
925     * do not care the number of "strnosemi" here. We are not sure whether we
926     * should add "\" CRLF to each value. We exclude them for now.
927     */
928    protected void handleMultiplePropertyValue(String propertyName, String propertyValue)
929            throws IOException, VCardException {
930        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some
931        // softwares/devices
932        // emit such data.
933        if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
934            propertyValue = getQuotedPrintable(propertyValue);
935        }
936
937        mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue,
938                getVersion()));
939    }
940
941    /*
942     * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
943     * error toward the AGENT property.
944     * // TODO: Support AGENT property.
945     * item =
946     * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
947     * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
948     */
949    protected void handleAgent(final String propertyValue) throws VCardException {
950        if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) {
951            // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
952            return;
953        } else {
954            throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
955        }
956    }
957
958    /**
959     * For vCard 3.0.
960     */
961    protected String maybeUnescapeText(final String text) {
962        return text;
963    }
964
965    /**
966     * Returns unescaped String if the character should be unescaped. Return
967     * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
968     * while "\x" should not be.
969     */
970    protected String maybeUnescapeCharacter(final char ch) {
971        return unescapeCharacter(ch);
972    }
973
974    /* package */ static String unescapeCharacter(final char ch) {
975        // Original vCard 2.1 specification does not allow transformation
976        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
977        // implementation of
978        // this class allowed them, so keep it as is.
979        if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
980            return String.valueOf(ch);
981        } else {
982            return null;
983        }
984    }
985
986    private void showPerformanceInfo() {
987        Log.d(LOG_TAG, "Total parsing time:  " + mTimeTotal + " ms");
988        Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms");
989        Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord
990                + " ms");
991        Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms");
992        Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup
993                + " ms");
994        Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms");
995        Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms");
996        Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue
997                + " ms");
998        Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms");
999        Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms");
1000    }
1001
1002    /**
1003     * @return {@link VCardConfig#VERSION_21}
1004     */
1005    protected int getVersion() {
1006        return VCardConfig.VERSION_21;
1007    }
1008
1009    /**
1010     * @return {@link VCardConfig#VERSION_30}
1011     */
1012    protected String getVersionString() {
1013        return VCardConstants.VERSION_V21;
1014    }
1015
1016    protected Set<String> getKnownPropertyNameSet() {
1017        return VCardParser_V21.sKnownPropertyNameSet;
1018    }
1019
1020    protected Set<String> getKnownTypeSet() {
1021        return VCardParser_V21.sKnownTypeSet;
1022    }
1023
1024    protected Set<String> getKnownValueSet() {
1025        return VCardParser_V21.sKnownValueSet;
1026    }
1027
1028    protected Set<String> getAvailableEncodingSet() {
1029        return VCardParser_V21.sAvailableEncoding;
1030    }
1031
1032    protected String getDefaultEncoding() {
1033        return DEFAULT_ENCODING;
1034    }
1035
1036
1037    public void parse(InputStream is, VCardInterpreter interpreter)
1038            throws IOException, VCardException {
1039        if (is == null) {
1040            throw new NullPointerException("InputStream must not be null.");
1041        }
1042
1043        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1044        mReader = new CustomBufferedReader(tmpReader);
1045
1046        mInterpreter = (interpreter != null ? interpreter : new EmptyInterpreter());
1047
1048        final long start = System.currentTimeMillis();
1049        if (mInterpreter != null) {
1050            mInterpreter.start();
1051        }
1052        parseVCardFile();
1053        if (mInterpreter != null) {
1054            mInterpreter.end();
1055        }
1056        mTimeTotal += System.currentTimeMillis() - start;
1057
1058        if (VCardConfig.showPerformanceLog()) {
1059            showPerformanceInfo();
1060        }
1061    }
1062
1063    public final void cancel() {
1064        Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1065        mCanceled = true;
1066    }
1067}
1068