1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package com.android.vcard;
17
18import android.text.TextUtils;
19import android.util.Base64;
20import android.util.Log;
21
22import com.android.vcard.exception.VCardAgentNotSupportedException;
23import com.android.vcard.exception.VCardException;
24import com.android.vcard.exception.VCardInvalidCommentLineException;
25import com.android.vcard.exception.VCardInvalidLineException;
26import com.android.vcard.exception.VCardVersionException;
27
28import java.io.BufferedReader;
29import java.io.IOException;
30import java.io.InputStream;
31import java.io.InputStreamReader;
32import java.io.Reader;
33import java.util.ArrayList;
34import java.util.Collection;
35import java.util.HashSet;
36import java.util.List;
37import java.util.Set;
38
39/**
40 * <p>
41 * Basic implementation achieving vCard parsing. Based on vCard 2.1.
42 * </p>
43 * @hide
44 */
45/* package */ class VCardParserImpl_V21 {
46    private static final String LOG_TAG = VCardConstants.LOG_TAG;
47
48    protected static final class CustomBufferedReader extends BufferedReader {
49        private long mTime;
50
51        /**
52         * Needed since "next line" may be null due to end of line.
53         */
54        private boolean mNextLineIsValid;
55        private String mNextLine;
56
57        public CustomBufferedReader(Reader in) {
58            super(in);
59        }
60
61        @Override
62        public String readLine() throws IOException {
63            if (mNextLineIsValid) {
64                final String ret = mNextLine;
65                mNextLine = null;
66                mNextLineIsValid = false;
67                return ret;
68            }
69
70            final long start = System.currentTimeMillis();
71            final String line = super.readLine();
72            final long end = System.currentTimeMillis();
73            mTime += end - start;
74            return line;
75        }
76
77        /**
78         * Read one line, but make this object store it in its queue.
79         */
80        public String peekLine() throws IOException {
81            if (!mNextLineIsValid) {
82                final long start = System.currentTimeMillis();
83                final String line = super.readLine();
84                final long end = System.currentTimeMillis();
85                mTime += end - start;
86
87                mNextLine = line;
88                mNextLineIsValid = true;
89            }
90
91            return mNextLine;
92        }
93
94        public long getTotalmillisecond() {
95            return mTime;
96        }
97    }
98
99    private static final String DEFAULT_ENCODING = "8BIT";
100    private static final String DEFAULT_CHARSET = "UTF-8";
101
102    protected final String mIntermediateCharset;
103
104    private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>();
105    private boolean mCanceled;
106
107    /**
108     * <p>
109     * The encoding type for deconding byte streams. This member variable is
110     * reset to a default encoding every time when a new item comes.
111     * </p>
112     * <p>
113     * "Encoding" in vCard is different from "Charset". It is mainly used for
114     * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
115     * "QUOTED-PRINTABLE" are known examples.
116     * </p>
117     */
118    protected String mCurrentEncoding;
119
120    protected String mCurrentCharset;
121
122    /**
123     * <p>
124     * The reader object to be used internally.
125     * </p>
126     * <p>
127     * Developers should not directly read a line from this object. Use
128     * getLine() unless there some reason.
129     * </p>
130     */
131    protected CustomBufferedReader mReader;
132
133    /**
134     * <p>
135     * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
136     * specification, but happens to be seen in real world vCard.
137     * </p>
138     * <p>
139     * We just accept those invalid types after emitting a warning for each of it.
140     * </p>
141     */
142    protected final Set<String> mUnknownTypeSet = new HashSet<String>();
143
144    /**
145     * <p>
146     * Set for storing unkonwn VALUE attributes, which is not acceptable in
147     * vCard specification, but happens to be seen in real world vCard.
148     * </p>
149     * <p>
150     * We just accept those invalid types after emitting a warning for each of it.
151     * </p>
152     */
153    protected final Set<String> mUnknownValueSet = new HashSet<String>();
154
155
156    public VCardParserImpl_V21() {
157        this(VCardConfig.VCARD_TYPE_DEFAULT);
158    }
159
160    public VCardParserImpl_V21(int vcardType) {
161        mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
162    }
163
164    /**
165     * @return true when a given property name is a valid property name.
166     */
167    protected boolean isValidPropertyName(final String propertyName) {
168        if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
169                propertyName.startsWith("X-"))
170                && !mUnknownTypeSet.contains(propertyName)) {
171            mUnknownTypeSet.add(propertyName);
172            Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
173        }
174        return true;
175    }
176
177    /**
178     * @return String. It may be null, or its length may be 0
179     * @throws IOException
180     */
181    protected String getLine() throws IOException {
182        return mReader.readLine();
183    }
184
185    protected String peekLine() throws IOException {
186        return mReader.peekLine();
187    }
188
189    /**
190     * @return String with it's length > 0
191     * @throws IOException
192     * @throws VCardException when the stream reached end of line
193     */
194    protected String getNonEmptyLine() throws IOException, VCardException {
195        String line;
196        while (true) {
197            line = getLine();
198            if (line == null) {
199                throw new VCardException("Reached end of buffer.");
200            } else if (line.trim().length() > 0) {
201                return line;
202            }
203        }
204    }
205
206    /**
207     * <code>
208     * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
209     *         items *CRLF
210     *         "END" [ws] ":" [ws] "VCARD"
211     * </code>
212     * @return False when reaching end of file.
213     */
214    private boolean parseOneVCard() throws IOException, VCardException {
215        // reset for this entire vCard.
216        mCurrentEncoding = DEFAULT_ENCODING;
217        mCurrentCharset = DEFAULT_CHARSET;
218
219        boolean allowGarbage = false;
220        if (!readBeginVCard(allowGarbage)) {
221            return false;
222        }
223        for (VCardInterpreter interpreter : mInterpreterList) {
224            interpreter.onEntryStarted();
225        }
226        parseItems();
227        for (VCardInterpreter interpreter : mInterpreterList) {
228            interpreter.onEntryEnded();
229        }
230        return true;
231    }
232
233    /**
234     * @return True when successful. False when reaching the end of line
235     * @throws IOException
236     * @throws VCardException
237     */
238    protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
239        // TODO: use consructPropertyLine().
240        String line;
241        do {
242            while (true) {
243                line = getLine();
244                if (line == null) {
245                    return false;
246                } else if (line.trim().length() > 0) {
247                    break;
248                }
249            }
250            final String[] strArray = line.split(":", 2);
251            final int length = strArray.length;
252
253            // Although vCard 2.1/3.0 specification does not allow lower cases,
254            // we found vCard file emitted by some external vCard expoter have such
255            // invalid Strings.
256            // e.g. BEGIN:vCard
257            if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
258                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
259                return true;
260            } else if (!allowGarbage) {
261                throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
262                        + "(Instead, \"" + line + "\" came)");
263            }
264        } while (allowGarbage);
265
266        throw new VCardException("Reached where must not be reached.");
267    }
268
269    /**
270     * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and
271     * "BEGIN:VCARD" in nested vCard.
272     */
273    /*
274     * items = *CRLF item / item
275     *
276     * Note: BEGIN/END aren't include in the original spec while this method handles them.
277     */
278    protected void parseItems() throws IOException, VCardException {
279        boolean ended = false;
280
281        try {
282            ended = parseItem();
283        } catch (VCardInvalidCommentLineException e) {
284            Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
285        }
286
287        while (!ended) {
288            try {
289                ended = parseItem();
290            } catch (VCardInvalidCommentLineException e) {
291                Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
292            }
293        }
294    }
295
296    /*
297     * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
298     * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
299     * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
300     * "AGENT" [params] ":" vcard CRLF
301     */
302    protected boolean parseItem() throws IOException, VCardException {
303        // Reset for an item.
304        mCurrentEncoding = DEFAULT_ENCODING;
305
306        final String line = getNonEmptyLine();
307        final VCardProperty propertyData = constructPropertyData(line);
308
309        final String propertyNameUpper = propertyData.getName().toUpperCase();
310        final String propertyRawValue = propertyData.getRawValue();
311
312        if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) {
313            if (propertyRawValue.equalsIgnoreCase("VCARD")) {
314                handleNest();
315            } else {
316                throw new VCardException("Unknown BEGIN type: " + propertyRawValue);
317            }
318        } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) {
319            if (propertyRawValue.equalsIgnoreCase("VCARD")) {
320                return true;  // Ended.
321            } else {
322                throw new VCardException("Unknown END type: " + propertyRawValue);
323            }
324        } else {
325            parseItemInter(propertyData, propertyNameUpper);
326        }
327        return false;
328    }
329
330    private void parseItemInter(VCardProperty property, String propertyNameUpper)
331            throws IOException, VCardException {
332        String propertyRawValue = property.getRawValue();
333        if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) {
334            handleAgent(property);
335        } else if (isValidPropertyName(propertyNameUpper)) {
336            if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) &&
337                    !propertyRawValue.equals(getVersionString())) {
338                throw new VCardVersionException(
339                        "Incompatible version: " + propertyRawValue + " != " + getVersionString());
340            }
341            handlePropertyValue(property, propertyNameUpper);
342        } else {
343            throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\"");
344        }
345    }
346
347    private void handleNest() throws IOException, VCardException {
348        for (VCardInterpreter interpreter : mInterpreterList) {
349            interpreter.onEntryStarted();
350        }
351        parseItems();
352        for (VCardInterpreter interpreter : mInterpreterList) {
353            interpreter.onEntryEnded();
354        }
355    }
356
357    // For performance reason, the states for group and property name are merged into one.
358    static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
359    static private final int STATE_PARAMS = 1;
360    // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
361    static private final int STATE_PARAMS_IN_DQUOTE = 2;
362
363    protected VCardProperty constructPropertyData(String line) throws VCardException {
364        final VCardProperty propertyData = new VCardProperty();
365
366        final int length = line.length();
367        if (length > 0 && line.charAt(0) == '#') {
368            throw new VCardInvalidCommentLineException();
369        }
370
371        int state = STATE_GROUP_OR_PROPERTY_NAME;
372        int nameIndex = 0;
373
374        // This loop is developed so that we don't have to take care of bottle neck here.
375        // Refactor carefully when you need to do so.
376        for (int i = 0; i < length; i++) {
377            final char ch = line.charAt(i);
378            switch (state) {
379                case STATE_GROUP_OR_PROPERTY_NAME: {
380                    if (ch == ':') {  // End of a property name.
381                        final String propertyName = line.substring(nameIndex, i);
382                        propertyData.setName(propertyName);
383                        propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : "");
384                        return propertyData;
385                    } else if (ch == '.') {  // Each group is followed by the dot.
386                        final String groupName = line.substring(nameIndex, i);
387                        if (groupName.length() == 0) {
388                            Log.w(LOG_TAG, "Empty group found. Ignoring.");
389                        } else {
390                            propertyData.addGroup(groupName);
391                        }
392                        nameIndex = i + 1;  // Next should be another group or a property name.
393                    } else if (ch == ';') {  // End of property name and beginneng of parameters.
394                        final String propertyName = line.substring(nameIndex, i);
395                        propertyData.setName(propertyName);
396                        nameIndex = i + 1;
397                        state = STATE_PARAMS;  // Start parameter parsing.
398                    }
399                    // TODO: comma support (in vCard 3.0 and 4.0).
400                    break;
401                }
402                case STATE_PARAMS: {
403                    if (ch == '"') {
404                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
405                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
406                                    "Silently allow it");
407                        }
408                        state = STATE_PARAMS_IN_DQUOTE;
409                    } else if (ch == ';') {  // Starts another param.
410                        handleParams(propertyData, line.substring(nameIndex, i));
411                        nameIndex = i + 1;
412                    } else if (ch == ':') {  // End of param and beginenning of values.
413                        handleParams(propertyData, line.substring(nameIndex, i));
414                        propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : "");
415                        return propertyData;
416                    }
417                    break;
418                }
419                case STATE_PARAMS_IN_DQUOTE: {
420                    if (ch == '"') {
421                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
422                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
423                                    "Silently allow it");
424                        }
425                        state = STATE_PARAMS;
426                    }
427                    break;
428                }
429            }
430        }
431
432        throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
433    }
434
435    /*
436     * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
437     * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
438     * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
439     * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
440     * [ws] word / knowntype
441     */
442    protected void handleParams(VCardProperty propertyData, String params)
443            throws VCardException {
444        final String[] strArray = params.split("=", 2);
445        if (strArray.length == 2) {
446            final String paramName = strArray[0].trim().toUpperCase();
447            String paramValue = strArray[1].trim();
448            if (paramName.equals("TYPE")) {
449                handleType(propertyData, paramValue);
450            } else if (paramName.equals("VALUE")) {
451                handleValue(propertyData, paramValue);
452            } else if (paramName.equals("ENCODING")) {
453                handleEncoding(propertyData, paramValue.toUpperCase());
454            } else if (paramName.equals("CHARSET")) {
455                handleCharset(propertyData, paramValue);
456            } else if (paramName.equals("LANGUAGE")) {
457                handleLanguage(propertyData, paramValue);
458            } else if (paramName.startsWith("X-")) {
459                handleAnyParam(propertyData, paramName, paramValue);
460            } else {
461                throw new VCardException("Unknown type \"" + paramName + "\"");
462            }
463        } else {
464            handleParamWithoutName(propertyData, strArray[0]);
465        }
466    }
467
468    /**
469     * vCard 3.0 parser implementation may throw VCardException.
470     */
471    protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) {
472        handleType(propertyData, paramValue);
473    }
474
475    /*
476     * ptypeval = knowntype / "X-" word
477     */
478    protected void handleType(VCardProperty propertyData, final String ptypeval) {
479        if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
480                || ptypeval.startsWith("X-"))
481                && !mUnknownTypeSet.contains(ptypeval)) {
482            mUnknownTypeSet.add(ptypeval);
483            Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
484        }
485        propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval);
486    }
487
488    /*
489     * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
490     */
491    protected void handleValue(VCardProperty propertyData, final String pvalueval) {
492        if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
493                || pvalueval.startsWith("X-")
494                || mUnknownValueSet.contains(pvalueval))) {
495            mUnknownValueSet.add(pvalueval);
496            Log.w(LOG_TAG, String.format(
497                    "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
498        }
499        propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval);
500    }
501
502    /*
503     * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
504     */
505    protected void handleEncoding(VCardProperty propertyData, String pencodingval)
506            throws VCardException {
507        if (getAvailableEncodingSet().contains(pencodingval) ||
508                pencodingval.startsWith("X-")) {
509            propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval);
510            // Update encoding right away, as this is needed to understanding other params.
511            mCurrentEncoding = pencodingval.toUpperCase();
512        } else {
513            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
514        }
515    }
516
517    /**
518     * <p>
519     * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
520     * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
521     * We allow any charset.
522     * </p>
523     */
524    protected void handleCharset(VCardProperty propertyData, String charsetval) {
525        mCurrentCharset = charsetval;
526        propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval);
527    }
528
529    /**
530     * See also Section 7.1 of RFC 1521
531     */
532    protected void handleLanguage(VCardProperty propertyData, String langval)
533            throws VCardException {
534        String[] strArray = langval.split("-");
535        if (strArray.length != 2) {
536            throw new VCardException("Invalid Language: \"" + langval + "\"");
537        }
538        String tmp = strArray[0];
539        int length = tmp.length();
540        for (int i = 0; i < length; i++) {
541            if (!isAsciiLetter(tmp.charAt(i))) {
542                throw new VCardException("Invalid Language: \"" + langval + "\"");
543            }
544        }
545        tmp = strArray[1];
546        length = tmp.length();
547        for (int i = 0; i < length; i++) {
548            if (!isAsciiLetter(tmp.charAt(i))) {
549                throw new VCardException("Invalid Language: \"" + langval + "\"");
550            }
551        }
552        propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval);
553    }
554
555    private boolean isAsciiLetter(char ch) {
556        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
557            return true;
558        }
559        return false;
560    }
561
562    /**
563     * Mainly for "X-" type. This accepts any kind of type without check.
564     */
565    protected void handleAnyParam(
566            VCardProperty propertyData, String paramName, String paramValue) {
567        propertyData.addParameter(paramName, paramValue);
568    }
569
570    protected void handlePropertyValue(VCardProperty property, String propertyName)
571            throws IOException, VCardException {
572        final String propertyNameUpper = property.getName().toUpperCase();
573        String propertyRawValue = property.getRawValue();
574        final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
575        final Collection<String> charsetCollection =
576                property.getParameters(VCardConstants.PARAM_CHARSET);
577        String targetCharset =
578                ((charsetCollection != null) ? charsetCollection.iterator().next() : null);
579        if (TextUtils.isEmpty(targetCharset)) {
580            targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET;
581        }
582
583        // TODO: have "separableProperty" which reflects vCard spec..
584        if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR)
585                || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG)
586                || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) {
587            handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset);
588            return;
589        }
590
591        if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) ||
592                // If encoding attribute is missing, then attempt to detect QP encoding.
593                // This is to handle a bug where the android exporter was creating FN properties
594                // with missing encoding.  b/7292017
595                (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) &&
596                        property.getParameters(VCardConstants.PARAM_ENCODING) == null &&
597                        VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue))
598                ) {
599            final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
600            final String propertyEncodedValue =
601                    VCardUtils.parseQuotedPrintable(quotedPrintablePart,
602                            false, sourceCharset, targetCharset);
603            property.setRawValue(quotedPrintablePart);
604            property.setValues(propertyEncodedValue);
605            for (VCardInterpreter interpreter : mInterpreterList) {
606                interpreter.onPropertyCreated(property);
607            }
608        } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
609                || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
610            // It is very rare, but some BASE64 data may be so big that
611            // OutOfMemoryError occurs. To ignore such cases, use try-catch.
612            try {
613                final String base64Property = getBase64(propertyRawValue);
614                try {
615                    property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT));
616                } catch (IllegalArgumentException e) {
617                    throw new VCardException("Decode error on base64 photo: " + propertyRawValue);
618                }
619                for (VCardInterpreter interpreter : mInterpreterList) {
620                    interpreter.onPropertyCreated(property);
621                }
622            } catch (OutOfMemoryError error) {
623                Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
624                for (VCardInterpreter interpreter : mInterpreterList) {
625                    interpreter.onPropertyCreated(property);
626                }
627            }
628        } else {
629            if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") ||
630                    mCurrentEncoding.startsWith("X-"))) {
631                Log.w(LOG_TAG,
632                        String.format("The encoding \"%s\" is unsupported by vCard %s",
633                                mCurrentEncoding, getVersionString()));
634            }
635
636            // Some device uses line folding defined in RFC 2425, which is not allowed
637            // in vCard 2.1 (while needed in vCard 3.0).
638            //
639            // e.g.
640            // BEGIN:VCARD
641            // VERSION:2.1
642            // N:;Omega;;;
643            // EMAIL;INTERNET:"Omega"
644            //   <omega@example.com>
645            // FN:Omega
646            // END:VCARD
647            //
648            // The vCard above assumes that email address should become:
649            // "Omega" <omega@example.com>
650            //
651            // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
652            //
653            // For more information about line folding,
654            // see "5.8.1. Line delimiting and folding" in RFC 2425.
655            //
656            // We take care of this case more formally in vCard 3.0, so we only need to
657            // do this in vCard 2.1.
658            if (getVersion() == VCardConfig.VERSION_21) {
659                StringBuilder builder = null;
660                while (true) {
661                    final String nextLine = peekLine();
662                    // We don't need to care too much about this exceptional case,
663                    // but we should not wrongly eat up "END:VCARD", since it critically
664                    // breaks this parser's state machine.
665                    // Thus we roughly look over the next line and confirm it is at least not
666                    // "END:VCARD". This extra fee is worth paying. This is exceptional
667                    // anyway.
668                    if (!TextUtils.isEmpty(nextLine) &&
669                            nextLine.charAt(0) == ' ' &&
670                            !"END:VCARD".contains(nextLine.toUpperCase())) {
671                        getLine();  // Drop the next line.
672
673                        if (builder == null) {
674                            builder = new StringBuilder();
675                            builder.append(propertyRawValue);
676                        }
677                        builder.append(nextLine.substring(1));
678                    } else {
679                        break;
680                    }
681                }
682                if (builder != null) {
683                    propertyRawValue = builder.toString();
684                }
685            }
686
687            ArrayList<String> propertyValueList = new ArrayList<String>();
688            String value = maybeUnescapeText(VCardUtils.convertStringCharset(
689                    propertyRawValue, sourceCharset, targetCharset));
690            propertyValueList.add(value);
691            property.setValues(propertyValueList);
692            for (VCardInterpreter interpreter : mInterpreterList) {
693                interpreter.onPropertyCreated(property);
694            }
695        }
696    }
697
698    private void handleAdrOrgN(VCardProperty property, String propertyRawValue,
699            String sourceCharset, String targetCharset) throws VCardException, IOException {
700        List<String> encodedValueList = new ArrayList<String>();
701
702        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit
703        // such data.
704        if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
705            // First we retrieve Quoted-Printable String from vCard entry, which may include
706            // multiple lines.
707            final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
708
709            // "Raw value" from the view of users should contain all part of QP string.
710            // TODO: add test for this handling
711            property.setRawValue(quotedPrintablePart);
712
713            // We split Quoted-Printable String using semi-colon before decoding it, as
714            // the Quoted-Printable may have semi-colon, which confuses splitter.
715            final List<String> quotedPrintableValueList =
716                    VCardUtils.constructListFromValue(quotedPrintablePart, getVersion());
717            for (String quotedPrintableValue : quotedPrintableValueList) {
718                String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue,
719                        false, sourceCharset, targetCharset);
720                encodedValueList.add(encoded);
721            }
722        } else {
723            final String propertyValue = VCardUtils.convertStringCharset(
724                    getPotentialMultiline(propertyRawValue), sourceCharset, targetCharset);
725            final List<String> valueList =
726                    VCardUtils.constructListFromValue(propertyValue, getVersion());
727            for (String value : valueList) {
728                encodedValueList.add(value);
729            }
730        }
731
732        property.setValues(encodedValueList);
733        for (VCardInterpreter interpreter : mInterpreterList) {
734            interpreter.onPropertyCreated(property);
735        }
736    }
737
738    /**
739     * <p>
740     * Parses and returns Quoted-Printable.
741     * </p>
742     *
743     * @param firstString The string following a parameter name and attributes.
744     *            Example: "string" in
745     *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
746     * @return whole Quoted-Printable string, including a given argument and
747     *         following lines. Excludes the last empty line following to Quoted
748     *         Printable lines.
749     * @throws IOException
750     * @throws VCardException
751     */
752    private String getQuotedPrintablePart(String firstString)
753            throws IOException, VCardException {
754        // Specifically, there may be some padding between = and CRLF.
755        // See the following:
756        //
757        // qp-line := *(qp-segment transport-padding CRLF)
758        // qp-part transport-padding
759        // qp-segment := qp-section *(SPACE / TAB) "="
760        // ; Maximum length of 76 characters
761        //
762        // e.g. (from RFC 2045)
763        // Now's the time =
764        // for all folk to come=
765        // to the aid of their country.
766        if (firstString.trim().endsWith("=")) {
767            // remove "transport-padding"
768            int pos = firstString.length() - 1;
769            while (firstString.charAt(pos) != '=') {
770            }
771            StringBuilder builder = new StringBuilder();
772            builder.append(firstString.substring(0, pos + 1));
773            builder.append("\r\n");
774            String line;
775            while (true) {
776                line = getLine();
777                if (line == null) {
778                    throw new VCardException("File ended during parsing a Quoted-Printable String");
779                }
780                if (line.trim().endsWith("=")) {
781                    // remove "transport-padding"
782                    pos = line.length() - 1;
783                    while (line.charAt(pos) != '=') {
784                    }
785                    builder.append(line.substring(0, pos + 1));
786                    builder.append("\r\n");
787                } else {
788                    builder.append(line);
789                    break;
790                }
791            }
792            return builder.toString();
793        } else {
794            return firstString;
795        }
796    }
797
798    /**
799     * Given the first line of a property, checks consecutive lines after it and builds a new
800     * multi-line value if it exists.
801     *
802     * @param firstString The first line of the property.
803     * @return A new property, potentially built from multiple lines.
804     * @throws IOException
805     */
806    private String getPotentialMultiline(String firstString) throws IOException {
807        final StringBuilder builder = new StringBuilder();
808        builder.append(firstString);
809
810        while (true) {
811            final String line = peekLine();
812            if (line == null || line.length() == 0) {
813                break;
814            }
815
816            final String propertyName = getPropertyNameUpperCase(line);
817            if (propertyName != null) {
818                break;
819            }
820
821            // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it.
822            // We will consider the next line to be a part of a multi-line value if it does not
823            // contain a property name (i.e. a colon or semi-colon).
824            // Consume the line.
825            getLine();
826            builder.append(" ").append(line);
827        }
828
829        return builder.toString();
830    }
831
832    protected String getBase64(String firstString) throws IOException, VCardException {
833        final StringBuilder builder = new StringBuilder();
834        builder.append(firstString);
835
836        while (true) {
837            final String line = peekLine();
838            if (line == null) {
839                throw new VCardException("File ended during parsing BASE64 binary");
840            }
841
842            // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
843            // have them. We try to detect those cases using colon and semi-colon, given BASE64
844            // does not contain it.
845            // E.g.
846            //      TEL;TYPE=WORK:+5555555
847            // or
848            //      END:VCARD
849            String propertyName = getPropertyNameUpperCase(line);
850            if (getKnownPropertyNameSet().contains(propertyName) ||
851                    VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) {
852                Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
853                        "which must not contain semi-colon or colon. Treat the line as next "
854                        + "property.");
855                Log.w(LOG_TAG, "Problematic line: " + line.trim());
856                break;
857            }
858
859            // Consume the line.
860            getLine();
861
862            if (line.length() == 0) {
863                break;
864            }
865            // Trim off any extraneous whitespace to handle 2.1 implementations
866            // that use 3.0 style line continuations. This is safe because space
867            // isn't a Base64 encoding value.
868            builder.append(line.trim());
869        }
870
871        return builder.toString();
872    }
873
874    /**
875     * Extracts the property name portion of a given vCard line.
876     * <p>
877     * Properties must contain a colon.
878     * <p>
879     * E.g.
880     *      TEL;TYPE=WORK:+5555555  // returns "TEL"
881     *      END:VCARD // returns "END"
882     *      TEL; // returns null
883     *
884     * @param line The vCard line.
885     * @return The property name portion. {@literal null} if no property name found.
886     */
887    private String getPropertyNameUpperCase(String line) {
888        final int colonIndex = line.indexOf(":");
889        if (colonIndex > -1) {
890            final int semiColonIndex = line.indexOf(";");
891
892            // Find the minimum index that is greater than -1.
893            final int minIndex;
894            if (colonIndex == -1) {
895                minIndex = semiColonIndex;
896            } else if (semiColonIndex == -1) {
897                minIndex = colonIndex;
898            } else {
899                minIndex = Math.min(colonIndex, semiColonIndex);
900            }
901            return line.substring(0, minIndex).toUpperCase();
902        }
903        return null;
904    }
905
906    /*
907     * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
908     * error toward the AGENT property.
909     * // TODO: Support AGENT property.
910     * item =
911     * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
912     * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
913     */
914    protected void handleAgent(final VCardProperty property) throws VCardException {
915        if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) {
916            // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
917            for (VCardInterpreter interpreter : mInterpreterList) {
918                interpreter.onPropertyCreated(property);
919            }
920            return;
921        } else {
922            throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
923        }
924    }
925
926    /**
927     * For vCard 3.0.
928     */
929    protected String maybeUnescapeText(final String text) {
930        return text;
931    }
932
933    /**
934     * Returns unescaped String if the character should be unescaped. Return
935     * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
936     * while "\x" should not be.
937     */
938    protected String maybeUnescapeCharacter(final char ch) {
939        return unescapeCharacter(ch);
940    }
941
942    /* package */ static String unescapeCharacter(final char ch) {
943        // Original vCard 2.1 specification does not allow transformation
944        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
945        // implementation of
946        // this class allowed them, so keep it as is.
947        if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
948            return String.valueOf(ch);
949        } else {
950            return null;
951        }
952    }
953
954    /**
955     * @return {@link VCardConfig#VERSION_21}
956     */
957    protected int getVersion() {
958        return VCardConfig.VERSION_21;
959    }
960
961    /**
962     * @return {@link VCardConfig#VERSION_30}
963     */
964    protected String getVersionString() {
965        return VCardConstants.VERSION_V21;
966    }
967
968    protected Set<String> getKnownPropertyNameSet() {
969        return VCardParser_V21.sKnownPropertyNameSet;
970    }
971
972    protected Set<String> getKnownTypeSet() {
973        return VCardParser_V21.sKnownTypeSet;
974    }
975
976    protected Set<String> getKnownValueSet() {
977        return VCardParser_V21.sKnownValueSet;
978    }
979
980    protected Set<String> getAvailableEncodingSet() {
981        return VCardParser_V21.sAvailableEncoding;
982    }
983
984    protected String getDefaultEncoding() {
985        return DEFAULT_ENCODING;
986    }
987
988    protected String getDefaultCharset() {
989        return DEFAULT_CHARSET;
990    }
991
992    protected String getCurrentCharset() {
993        return mCurrentCharset;
994    }
995
996    public void addInterpreter(VCardInterpreter interpreter) {
997        mInterpreterList.add(interpreter);
998    }
999
1000    public void parse(InputStream is) throws IOException, VCardException {
1001        if (is == null) {
1002            throw new NullPointerException("InputStream must not be null.");
1003        }
1004
1005        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1006        mReader = new CustomBufferedReader(tmpReader);
1007
1008        final long start = System.currentTimeMillis();
1009        for (VCardInterpreter interpreter : mInterpreterList) {
1010            interpreter.onVCardStarted();
1011        }
1012
1013        // vcard_file = [wsls] vcard [wsls]
1014        while (true) {
1015            synchronized (this) {
1016                if (mCanceled) {
1017                    Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
1018                    break;
1019                }
1020            }
1021            if (!parseOneVCard()) {
1022                break;
1023            }
1024        }
1025
1026        for (VCardInterpreter interpreter : mInterpreterList) {
1027            interpreter.onVCardEnded();
1028        }
1029    }
1030
1031    public void parseOne(InputStream is) throws IOException, VCardException {
1032        if (is == null) {
1033            throw new NullPointerException("InputStream must not be null.");
1034        }
1035
1036        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1037        mReader = new CustomBufferedReader(tmpReader);
1038
1039        final long start = System.currentTimeMillis();
1040        for (VCardInterpreter interpreter : mInterpreterList) {
1041            interpreter.onVCardStarted();
1042        }
1043        parseOneVCard();
1044        for (VCardInterpreter interpreter : mInterpreterList) {
1045            interpreter.onVCardEnded();
1046        }
1047    }
1048
1049    public final synchronized void cancel() {
1050        Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1051        mCanceled = true;
1052    }
1053}
1054