VCardParserImpl_V21.java revision 6761d1eb69e66bbcf244caa0bcaadf15c7640c53
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package com.android.vcard;
17
18import android.text.TextUtils;
19import android.util.Base64;
20import android.util.Log;
21
22import com.android.vcard.exception.VCardAgentNotSupportedException;
23import com.android.vcard.exception.VCardException;
24import com.android.vcard.exception.VCardInvalidCommentLineException;
25import com.android.vcard.exception.VCardInvalidLineException;
26import com.android.vcard.exception.VCardVersionException;
27
28import java.io.BufferedReader;
29import java.io.IOException;
30import java.io.InputStream;
31import java.io.InputStreamReader;
32import java.io.Reader;
33import java.util.ArrayList;
34import java.util.Collection;
35import java.util.HashSet;
36import java.util.List;
37import java.util.Set;
38
39/**
40 * <p>
41 * Basic implementation achieving vCard parsing. Based on vCard 2.1.
42 * </p>
43 * @hide
44 */
45/* package */ class VCardParserImpl_V21 {
46    private static final String LOG_TAG = VCardConstants.LOG_TAG;
47
48    protected static final class CustomBufferedReader extends BufferedReader {
49        private long mTime;
50
51        /**
52         * Needed since "next line" may be null due to end of line.
53         */
54        private boolean mNextLineIsValid;
55        private String mNextLine;
56
57        public CustomBufferedReader(Reader in) {
58            super(in);
59        }
60
61        @Override
62        public String readLine() throws IOException {
63            if (mNextLineIsValid) {
64                final String ret = mNextLine;
65                mNextLine = null;
66                mNextLineIsValid = false;
67                return ret;
68            }
69
70            final long start = System.currentTimeMillis();
71            final String line = super.readLine();
72            final long end = System.currentTimeMillis();
73            mTime += end - start;
74            return line;
75        }
76
77        /**
78         * Read one line, but make this object store it in its queue.
79         */
80        public String peekLine() throws IOException {
81            if (!mNextLineIsValid) {
82                final long start = System.currentTimeMillis();
83                final String line = super.readLine();
84                final long end = System.currentTimeMillis();
85                mTime += end - start;
86
87                mNextLine = line;
88                mNextLineIsValid = true;
89            }
90
91            return mNextLine;
92        }
93
94        public long getTotalmillisecond() {
95            return mTime;
96        }
97    }
98
99    private static final String DEFAULT_ENCODING = "8BIT";
100    private static final String DEFAULT_CHARSET = "UTF-8";
101
102    protected final String mIntermediateCharset;
103
104    private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>();
105    private boolean mCanceled;
106
107    /**
108     * <p>
109     * The encoding type for deconding byte streams. This member variable is
110     * reset to a default encoding every time when a new item comes.
111     * </p>
112     * <p>
113     * "Encoding" in vCard is different from "Charset". It is mainly used for
114     * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
115     * "QUOTED-PRINTABLE" are known examples.
116     * </p>
117     */
118    protected String mCurrentEncoding;
119
120    protected String mCurrentCharset;
121
122    /**
123     * <p>
124     * The reader object to be used internally.
125     * </p>
126     * <p>
127     * Developers should not directly read a line from this object. Use
128     * getLine() unless there some reason.
129     * </p>
130     */
131    protected CustomBufferedReader mReader;
132
133    /**
134     * <p>
135     * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
136     * specification, but happens to be seen in real world vCard.
137     * </p>
138     * <p>
139     * We just accept those invalid types after emitting a warning for each of it.
140     * </p>
141     */
142    protected final Set<String> mUnknownTypeSet = new HashSet<String>();
143
144    /**
145     * <p>
146     * Set for storing unkonwn VALUE attributes, which is not acceptable in
147     * vCard specification, but happens to be seen in real world vCard.
148     * </p>
149     * <p>
150     * We just accept those invalid types after emitting a warning for each of it.
151     * </p>
152     */
153    protected final Set<String> mUnknownValueSet = new HashSet<String>();
154
155
156    public VCardParserImpl_V21() {
157        this(VCardConfig.VCARD_TYPE_DEFAULT);
158    }
159
160    public VCardParserImpl_V21(int vcardType) {
161        mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
162    }
163
164    /**
165     * @return true when a given property name is a valid property name.
166     */
167    protected boolean isValidPropertyName(final String propertyName) {
168        if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
169                propertyName.startsWith("X-"))
170                && !mUnknownTypeSet.contains(propertyName)) {
171            mUnknownTypeSet.add(propertyName);
172            Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
173        }
174        return true;
175    }
176
177    /**
178     * @return String. It may be null, or its length may be 0
179     * @throws IOException
180     */
181    protected String getLine() throws IOException {
182        return mReader.readLine();
183    }
184
185    protected String peekLine() throws IOException {
186        return mReader.peekLine();
187    }
188
189    /**
190     * @return String with it's length > 0
191     * @throws IOException
192     * @throws VCardException when the stream reached end of line
193     */
194    protected String getNonEmptyLine() throws IOException, VCardException {
195        String line;
196        while (true) {
197            line = getLine();
198            if (line == null) {
199                throw new VCardException("Reached end of buffer.");
200            } else if (line.trim().length() > 0) {
201                return line;
202            }
203        }
204    }
205
206    /**
207     * <code>
208     * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
209     *         items *CRLF
210     *         "END" [ws] ":" [ws] "VCARD"
211     * </code>
212     * @return False when reaching end of file.
213     */
214    private boolean parseOneVCard() throws IOException, VCardException {
215        // reset for this entire vCard.
216        mCurrentEncoding = DEFAULT_ENCODING;
217        mCurrentCharset = DEFAULT_CHARSET;
218
219        // allow parsing of vcards that have mime data leading up to BEGIN:VCARD
220        boolean allowGarbage = true;
221        if (!readBeginVCard(allowGarbage)) {
222            return false;
223        }
224        for (VCardInterpreter interpreter : mInterpreterList) {
225            interpreter.onEntryStarted();
226        }
227        parseItems();
228        for (VCardInterpreter interpreter : mInterpreterList) {
229            interpreter.onEntryEnded();
230        }
231        return true;
232    }
233
234    /**
235     * @return True when successful. False when reaching the end of line
236     * @throws IOException
237     * @throws VCardException
238     */
239    protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
240        // TODO: use consructPropertyLine().
241        String line;
242        do {
243            while (true) {
244                line = getLine();
245                if (line == null) {
246                    return false;
247                } else if (line.trim().length() > 0) {
248                    break;
249                }
250            }
251            final String[] strArray = line.split(":", 2);
252            final int length = strArray.length;
253
254            // Although vCard 2.1/3.0 specification does not allow lower cases,
255            // we found vCard file emitted by some external vCard expoter have such
256            // invalid Strings.
257            // e.g. BEGIN:vCard
258            if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
259                    && strArray[1].trim().equalsIgnoreCase("VCARD")) {
260                return true;
261            } else if (!allowGarbage) {
262                throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
263                        + "(Instead, \"" + line + "\" came)");
264            }
265        } while (allowGarbage);
266
267        throw new VCardException("Reached where must not be reached.");
268    }
269
270    /**
271     * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and
272     * "BEGIN:VCARD" in nested vCard.
273     */
274    /*
275     * items = *CRLF item / item
276     *
277     * Note: BEGIN/END aren't include in the original spec while this method handles them.
278     */
279    protected void parseItems() throws IOException, VCardException {
280        boolean ended = false;
281
282        try {
283            ended = parseItem();
284        } catch (VCardInvalidCommentLineException e) {
285            Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
286        }
287
288        while (!ended) {
289            try {
290                ended = parseItem();
291            } catch (VCardInvalidCommentLineException e) {
292                Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
293            }
294        }
295    }
296
297    /*
298     * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
299     * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
300     * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
301     * "AGENT" [params] ":" vcard CRLF
302     */
303    protected boolean parseItem() throws IOException, VCardException {
304        // Reset for an item.
305        mCurrentEncoding = DEFAULT_ENCODING;
306
307        final String line = getNonEmptyLine();
308        final VCardProperty propertyData = constructPropertyData(line);
309
310        final String propertyNameUpper = propertyData.getName().toUpperCase();
311        final String propertyRawValue = propertyData.getRawValue();
312
313        if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) {
314            if (propertyRawValue.equalsIgnoreCase("VCARD")) {
315                handleNest();
316            } else {
317                throw new VCardException("Unknown BEGIN type: " + propertyRawValue);
318            }
319        } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) {
320            if (propertyRawValue.equalsIgnoreCase("VCARD")) {
321                return true;  // Ended.
322            } else {
323                throw new VCardException("Unknown END type: " + propertyRawValue);
324            }
325        } else {
326            parseItemInter(propertyData, propertyNameUpper);
327        }
328        return false;
329    }
330
331    private void parseItemInter(VCardProperty property, String propertyNameUpper)
332            throws IOException, VCardException {
333        String propertyRawValue = property.getRawValue();
334        if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) {
335            handleAgent(property);
336        } else if (isValidPropertyName(propertyNameUpper)) {
337            if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) &&
338                    !propertyRawValue.equals(getVersionString())) {
339                throw new VCardVersionException(
340                        "Incompatible version: " + propertyRawValue + " != " + getVersionString());
341            }
342            handlePropertyValue(property, propertyNameUpper);
343        } else {
344            throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\"");
345        }
346    }
347
348    private void handleNest() throws IOException, VCardException {
349        for (VCardInterpreter interpreter : mInterpreterList) {
350            interpreter.onEntryStarted();
351        }
352        parseItems();
353        for (VCardInterpreter interpreter : mInterpreterList) {
354            interpreter.onEntryEnded();
355        }
356    }
357
358    // For performance reason, the states for group and property name are merged into one.
359    static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
360    static private final int STATE_PARAMS = 1;
361    // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
362    static private final int STATE_PARAMS_IN_DQUOTE = 2;
363
364    protected VCardProperty constructPropertyData(String line) throws VCardException {
365        final VCardProperty propertyData = new VCardProperty();
366
367        final int length = line.length();
368        if (length > 0 && line.charAt(0) == '#') {
369            throw new VCardInvalidCommentLineException();
370        }
371
372        int state = STATE_GROUP_OR_PROPERTY_NAME;
373        int nameIndex = 0;
374
375        // This loop is developed so that we don't have to take care of bottle neck here.
376        // Refactor carefully when you need to do so.
377        for (int i = 0; i < length; i++) {
378            final char ch = line.charAt(i);
379            switch (state) {
380                case STATE_GROUP_OR_PROPERTY_NAME: {
381                    if (ch == ':') {  // End of a property name.
382                        final String propertyName = line.substring(nameIndex, i);
383                        propertyData.setName(propertyName);
384                        propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : "");
385                        return propertyData;
386                    } else if (ch == '.') {  // Each group is followed by the dot.
387                        final String groupName = line.substring(nameIndex, i);
388                        if (groupName.length() == 0) {
389                            Log.w(LOG_TAG, "Empty group found. Ignoring.");
390                        } else {
391                            propertyData.addGroup(groupName);
392                        }
393                        nameIndex = i + 1;  // Next should be another group or a property name.
394                    } else if (ch == ';') {  // End of property name and beginneng of parameters.
395                        final String propertyName = line.substring(nameIndex, i);
396                        propertyData.setName(propertyName);
397                        nameIndex = i + 1;
398                        state = STATE_PARAMS;  // Start parameter parsing.
399                    }
400                    // TODO: comma support (in vCard 3.0 and 4.0).
401                    break;
402                }
403                case STATE_PARAMS: {
404                    if (ch == '"') {
405                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
406                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
407                                    "Silently allow it");
408                        }
409                        state = STATE_PARAMS_IN_DQUOTE;
410                    } else if (ch == ';') {  // Starts another param.
411                        handleParams(propertyData, line.substring(nameIndex, i));
412                        nameIndex = i + 1;
413                    } else if (ch == ':') {  // End of param and beginenning of values.
414                        handleParams(propertyData, line.substring(nameIndex, i));
415                        propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : "");
416                        return propertyData;
417                    }
418                    break;
419                }
420                case STATE_PARAMS_IN_DQUOTE: {
421                    if (ch == '"') {
422                        if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
423                            Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
424                                    "Silently allow it");
425                        }
426                        state = STATE_PARAMS;
427                    }
428                    break;
429                }
430            }
431        }
432
433        throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
434    }
435
436    /*
437     * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
438     * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
439     * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
440     * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
441     * [ws] word / knowntype
442     */
443    protected void handleParams(VCardProperty propertyData, String params)
444            throws VCardException {
445        final String[] strArray = params.split("=", 2);
446        if (strArray.length == 2) {
447            final String paramName = strArray[0].trim().toUpperCase();
448            String paramValue = strArray[1].trim();
449            if (paramName.equals("TYPE")) {
450                handleType(propertyData, paramValue);
451            } else if (paramName.equals("VALUE")) {
452                handleValue(propertyData, paramValue);
453            } else if (paramName.equals("ENCODING")) {
454                handleEncoding(propertyData, paramValue.toUpperCase());
455            } else if (paramName.equals("CHARSET")) {
456                handleCharset(propertyData, paramValue);
457            } else if (paramName.equals("LANGUAGE")) {
458                handleLanguage(propertyData, paramValue);
459            } else if (paramName.startsWith("X-")) {
460                handleAnyParam(propertyData, paramName, paramValue);
461            } else {
462                throw new VCardException("Unknown type \"" + paramName + "\"");
463            }
464        } else {
465            handleParamWithoutName(propertyData, strArray[0]);
466        }
467    }
468
469    /**
470     * vCard 3.0 parser implementation may throw VCardException.
471     */
472    protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) {
473        handleType(propertyData, paramValue);
474    }
475
476    /*
477     * ptypeval = knowntype / "X-" word
478     */
479    protected void handleType(VCardProperty propertyData, final String ptypeval) {
480        if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
481                || ptypeval.startsWith("X-"))
482                && !mUnknownTypeSet.contains(ptypeval)) {
483            mUnknownTypeSet.add(ptypeval);
484            Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
485        }
486        propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval);
487    }
488
489    /*
490     * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
491     */
492    protected void handleValue(VCardProperty propertyData, final String pvalueval) {
493        if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
494                || pvalueval.startsWith("X-")
495                || mUnknownValueSet.contains(pvalueval))) {
496            mUnknownValueSet.add(pvalueval);
497            Log.w(LOG_TAG, String.format(
498                    "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
499        }
500        propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval);
501    }
502
503    /*
504     * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
505     */
506    protected void handleEncoding(VCardProperty propertyData, String pencodingval)
507            throws VCardException {
508        if (getAvailableEncodingSet().contains(pencodingval) ||
509                pencodingval.startsWith("X-")) {
510            propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval);
511            // Update encoding right away, as this is needed to understanding other params.
512            mCurrentEncoding = pencodingval.toUpperCase();
513        } else {
514            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
515        }
516    }
517
518    /**
519     * <p>
520     * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
521     * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
522     * We allow any charset.
523     * </p>
524     */
525    protected void handleCharset(VCardProperty propertyData, String charsetval) {
526        mCurrentCharset = charsetval;
527        propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval);
528    }
529
530    /**
531     * See also Section 7.1 of RFC 1521
532     */
533    protected void handleLanguage(VCardProperty propertyData, String langval)
534            throws VCardException {
535        String[] strArray = langval.split("-");
536        if (strArray.length != 2) {
537            throw new VCardException("Invalid Language: \"" + langval + "\"");
538        }
539        String tmp = strArray[0];
540        int length = tmp.length();
541        for (int i = 0; i < length; i++) {
542            if (!isAsciiLetter(tmp.charAt(i))) {
543                throw new VCardException("Invalid Language: \"" + langval + "\"");
544            }
545        }
546        tmp = strArray[1];
547        length = tmp.length();
548        for (int i = 0; i < length; i++) {
549            if (!isAsciiLetter(tmp.charAt(i))) {
550                throw new VCardException("Invalid Language: \"" + langval + "\"");
551            }
552        }
553        propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval);
554    }
555
556    private boolean isAsciiLetter(char ch) {
557        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
558            return true;
559        }
560        return false;
561    }
562
563    /**
564     * Mainly for "X-" type. This accepts any kind of type without check.
565     */
566    protected void handleAnyParam(
567            VCardProperty propertyData, String paramName, String paramValue) {
568        propertyData.addParameter(paramName, paramValue);
569    }
570
571    protected void handlePropertyValue(VCardProperty property, String propertyName)
572            throws IOException, VCardException {
573        final String propertyNameUpper = property.getName().toUpperCase();
574        String propertyRawValue = property.getRawValue();
575        final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
576        final Collection<String> charsetCollection =
577                property.getParameters(VCardConstants.PARAM_CHARSET);
578        String targetCharset =
579                ((charsetCollection != null) ? charsetCollection.iterator().next() : null);
580        if (TextUtils.isEmpty(targetCharset)) {
581            targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET;
582        }
583
584        // TODO: have "separableProperty" which reflects vCard spec..
585        if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR)
586                || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG)
587                || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) {
588            handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset);
589            return;
590        }
591
592        if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) ||
593                // If encoding attribute is missing, then attempt to detect QP encoding.
594                // This is to handle a bug where the android exporter was creating FN properties
595                // with missing encoding.  b/7292017
596                (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) &&
597                        property.getParameters(VCardConstants.PARAM_ENCODING) == null &&
598                        VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue))
599                ) {
600            final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
601            final String propertyEncodedValue =
602                    VCardUtils.parseQuotedPrintable(quotedPrintablePart,
603                            false, sourceCharset, targetCharset);
604            property.setRawValue(quotedPrintablePart);
605            property.setValues(propertyEncodedValue);
606            for (VCardInterpreter interpreter : mInterpreterList) {
607                interpreter.onPropertyCreated(property);
608            }
609        } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
610                || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
611            // It is very rare, but some BASE64 data may be so big that
612            // OutOfMemoryError occurs. To ignore such cases, use try-catch.
613            try {
614                final String base64Property = getBase64(propertyRawValue);
615                try {
616                    property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT));
617                } catch (IllegalArgumentException e) {
618                    throw new VCardException("Decode error on base64 photo: " + propertyRawValue);
619                }
620                for (VCardInterpreter interpreter : mInterpreterList) {
621                    interpreter.onPropertyCreated(property);
622                }
623            } catch (OutOfMemoryError error) {
624                Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
625                for (VCardInterpreter interpreter : mInterpreterList) {
626                    interpreter.onPropertyCreated(property);
627                }
628            }
629        } else {
630            if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") ||
631                    mCurrentEncoding.startsWith("X-"))) {
632                Log.w(LOG_TAG,
633                        String.format("The encoding \"%s\" is unsupported by vCard %s",
634                                mCurrentEncoding, getVersionString()));
635            }
636
637            // Some device uses line folding defined in RFC 2425, which is not allowed
638            // in vCard 2.1 (while needed in vCard 3.0).
639            //
640            // e.g.
641            // BEGIN:VCARD
642            // VERSION:2.1
643            // N:;Omega;;;
644            // EMAIL;INTERNET:"Omega"
645            //   <omega@example.com>
646            // FN:Omega
647            // END:VCARD
648            //
649            // The vCard above assumes that email address should become:
650            // "Omega" <omega@example.com>
651            //
652            // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
653            //
654            // For more information about line folding,
655            // see "5.8.1. Line delimiting and folding" in RFC 2425.
656            //
657            // We take care of this case more formally in vCard 3.0, so we only need to
658            // do this in vCard 2.1.
659            if (getVersion() == VCardConfig.VERSION_21) {
660                StringBuilder builder = null;
661                while (true) {
662                    final String nextLine = peekLine();
663                    // We don't need to care too much about this exceptional case,
664                    // but we should not wrongly eat up "END:VCARD", since it critically
665                    // breaks this parser's state machine.
666                    // Thus we roughly look over the next line and confirm it is at least not
667                    // "END:VCARD". This extra fee is worth paying. This is exceptional
668                    // anyway.
669                    if (!TextUtils.isEmpty(nextLine) &&
670                            nextLine.charAt(0) == ' ' &&
671                            !"END:VCARD".contains(nextLine.toUpperCase())) {
672                        getLine();  // Drop the next line.
673
674                        if (builder == null) {
675                            builder = new StringBuilder();
676                            builder.append(propertyRawValue);
677                        }
678                        builder.append(nextLine.substring(1));
679                    } else {
680                        break;
681                    }
682                }
683                if (builder != null) {
684                    propertyRawValue = builder.toString();
685                }
686            }
687
688            ArrayList<String> propertyValueList = new ArrayList<String>();
689            String value = maybeUnescapeText(VCardUtils.convertStringCharset(
690                    propertyRawValue, sourceCharset, targetCharset));
691            propertyValueList.add(value);
692            property.setValues(propertyValueList);
693            for (VCardInterpreter interpreter : mInterpreterList) {
694                interpreter.onPropertyCreated(property);
695            }
696        }
697    }
698
699    private void handleAdrOrgN(VCardProperty property, String propertyRawValue,
700            String sourceCharset, String targetCharset) throws VCardException, IOException {
701        List<String> encodedValueList = new ArrayList<String>();
702
703        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit
704        // such data.
705        if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
706            // First we retrieve Quoted-Printable String from vCard entry, which may include
707            // multiple lines.
708            final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
709
710            // "Raw value" from the view of users should contain all part of QP string.
711            // TODO: add test for this handling
712            property.setRawValue(quotedPrintablePart);
713
714            // We split Quoted-Printable String using semi-colon before decoding it, as
715            // the Quoted-Printable may have semi-colon, which confuses splitter.
716            final List<String> quotedPrintableValueList =
717                    VCardUtils.constructListFromValue(quotedPrintablePart, getVersion());
718            for (String quotedPrintableValue : quotedPrintableValueList) {
719                String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue,
720                        false, sourceCharset, targetCharset);
721                encodedValueList.add(encoded);
722            }
723        } else {
724            final String propertyValue = VCardUtils.convertStringCharset(
725                    getPotentialMultiline(propertyRawValue), sourceCharset, targetCharset);
726            final List<String> valueList =
727                    VCardUtils.constructListFromValue(propertyValue, getVersion());
728            for (String value : valueList) {
729                encodedValueList.add(value);
730            }
731        }
732
733        property.setValues(encodedValueList);
734        for (VCardInterpreter interpreter : mInterpreterList) {
735            interpreter.onPropertyCreated(property);
736        }
737    }
738
739    /**
740     * <p>
741     * Parses and returns Quoted-Printable.
742     * </p>
743     *
744     * @param firstString The string following a parameter name and attributes.
745     *            Example: "string" in
746     *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
747     * @return whole Quoted-Printable string, including a given argument and
748     *         following lines. Excludes the last empty line following to Quoted
749     *         Printable lines.
750     * @throws IOException
751     * @throws VCardException
752     */
753    private String getQuotedPrintablePart(String firstString)
754            throws IOException, VCardException {
755        // Specifically, there may be some padding between = and CRLF.
756        // See the following:
757        //
758        // qp-line := *(qp-segment transport-padding CRLF)
759        // qp-part transport-padding
760        // qp-segment := qp-section *(SPACE / TAB) "="
761        // ; Maximum length of 76 characters
762        //
763        // e.g. (from RFC 2045)
764        // Now's the time =
765        // for all folk to come=
766        // to the aid of their country.
767        if (firstString.trim().endsWith("=")) {
768            // remove "transport-padding"
769            int pos = firstString.length() - 1;
770            while (firstString.charAt(pos) != '=') {
771            }
772            StringBuilder builder = new StringBuilder();
773            builder.append(firstString.substring(0, pos + 1));
774            builder.append("\r\n");
775            String line;
776            while (true) {
777                line = getLine();
778                if (line == null) {
779                    throw new VCardException("File ended during parsing a Quoted-Printable String");
780                }
781                if (line.trim().endsWith("=")) {
782                    // remove "transport-padding"
783                    pos = line.length() - 1;
784                    while (line.charAt(pos) != '=') {
785                    }
786                    builder.append(line.substring(0, pos + 1));
787                    builder.append("\r\n");
788                } else {
789                    builder.append(line);
790                    break;
791                }
792            }
793            return builder.toString();
794        } else {
795            return firstString;
796        }
797    }
798
799    /**
800     * Given the first line of a property, checks consecutive lines after it and builds a new
801     * multi-line value if it exists.
802     *
803     * @param firstString The first line of the property.
804     * @return A new property, potentially built from multiple lines.
805     * @throws IOException
806     */
807    private String getPotentialMultiline(String firstString) throws IOException {
808        final StringBuilder builder = new StringBuilder();
809        builder.append(firstString);
810
811        while (true) {
812            final String line = peekLine();
813            if (line == null || line.length() == 0) {
814                break;
815            }
816
817            final String propertyName = getPropertyNameUpperCase(line);
818            if (propertyName != null) {
819                break;
820            }
821
822            // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it.
823            // We will consider the next line to be a part of a multi-line value if it does not
824            // contain a property name (i.e. a colon or semi-colon).
825            // Consume the line.
826            getLine();
827            builder.append(" ").append(line);
828        }
829
830        return builder.toString();
831    }
832
833    protected String getBase64(String firstString) throws IOException, VCardException {
834        final StringBuilder builder = new StringBuilder();
835        builder.append(firstString);
836
837        while (true) {
838            final String line = peekLine();
839            if (line == null) {
840                throw new VCardException("File ended during parsing BASE64 binary");
841            }
842
843            // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
844            // have them. We try to detect those cases using colon and semi-colon, given BASE64
845            // does not contain it.
846            // E.g.
847            //      TEL;TYPE=WORK:+5555555
848            // or
849            //      END:VCARD
850            String propertyName = getPropertyNameUpperCase(line);
851            if (getKnownPropertyNameSet().contains(propertyName) ||
852                    VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) {
853                Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
854                        "which must not contain semi-colon or colon. Treat the line as next "
855                        + "property.");
856                Log.w(LOG_TAG, "Problematic line: " + line.trim());
857                break;
858            }
859
860            // Consume the line.
861            getLine();
862
863            if (line.length() == 0) {
864                break;
865            }
866            // Trim off any extraneous whitespace to handle 2.1 implementations
867            // that use 3.0 style line continuations. This is safe because space
868            // isn't a Base64 encoding value.
869            builder.append(line.trim());
870        }
871
872        return builder.toString();
873    }
874
875    /**
876     * Extracts the property name portion of a given vCard line.
877     * <p>
878     * Properties must contain a colon.
879     * <p>
880     * E.g.
881     *      TEL;TYPE=WORK:+5555555  // returns "TEL"
882     *      END:VCARD // returns "END"
883     *      TEL; // returns null
884     *
885     * @param line The vCard line.
886     * @return The property name portion. {@literal null} if no property name found.
887     */
888    private String getPropertyNameUpperCase(String line) {
889        final int colonIndex = line.indexOf(":");
890        if (colonIndex > -1) {
891            final int semiColonIndex = line.indexOf(";");
892
893            // Find the minimum index that is greater than -1.
894            final int minIndex;
895            if (colonIndex == -1) {
896                minIndex = semiColonIndex;
897            } else if (semiColonIndex == -1) {
898                minIndex = colonIndex;
899            } else {
900                minIndex = Math.min(colonIndex, semiColonIndex);
901            }
902            return line.substring(0, minIndex).toUpperCase();
903        }
904        return null;
905    }
906
907    /*
908     * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
909     * error toward the AGENT property.
910     * // TODO: Support AGENT property.
911     * item =
912     * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
913     * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
914     */
915    protected void handleAgent(final VCardProperty property) throws VCardException {
916        if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) {
917            // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
918            for (VCardInterpreter interpreter : mInterpreterList) {
919                interpreter.onPropertyCreated(property);
920            }
921            return;
922        } else {
923            throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
924        }
925    }
926
927    /**
928     * For vCard 3.0.
929     */
930    protected String maybeUnescapeText(final String text) {
931        return text;
932    }
933
934    /**
935     * Returns unescaped String if the character should be unescaped. Return
936     * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
937     * while "\x" should not be.
938     */
939    protected String maybeUnescapeCharacter(final char ch) {
940        return unescapeCharacter(ch);
941    }
942
943    /* package */ static String unescapeCharacter(final char ch) {
944        // Original vCard 2.1 specification does not allow transformation
945        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
946        // implementation of
947        // this class allowed them, so keep it as is.
948        if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
949            return String.valueOf(ch);
950        } else {
951            return null;
952        }
953    }
954
955    /**
956     * @return {@link VCardConfig#VERSION_21}
957     */
958    protected int getVersion() {
959        return VCardConfig.VERSION_21;
960    }
961
962    /**
963     * @return {@link VCardConfig#VERSION_30}
964     */
965    protected String getVersionString() {
966        return VCardConstants.VERSION_V21;
967    }
968
969    protected Set<String> getKnownPropertyNameSet() {
970        return VCardParser_V21.sKnownPropertyNameSet;
971    }
972
973    protected Set<String> getKnownTypeSet() {
974        return VCardParser_V21.sKnownTypeSet;
975    }
976
977    protected Set<String> getKnownValueSet() {
978        return VCardParser_V21.sKnownValueSet;
979    }
980
981    protected Set<String> getAvailableEncodingSet() {
982        return VCardParser_V21.sAvailableEncoding;
983    }
984
985    protected String getDefaultEncoding() {
986        return DEFAULT_ENCODING;
987    }
988
989    protected String getDefaultCharset() {
990        return DEFAULT_CHARSET;
991    }
992
993    protected String getCurrentCharset() {
994        return mCurrentCharset;
995    }
996
997    public void addInterpreter(VCardInterpreter interpreter) {
998        mInterpreterList.add(interpreter);
999    }
1000
1001    public void parse(InputStream is) throws IOException, VCardException {
1002        if (is == null) {
1003            throw new NullPointerException("InputStream must not be null.");
1004        }
1005
1006        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1007        mReader = new CustomBufferedReader(tmpReader);
1008
1009        final long start = System.currentTimeMillis();
1010        for (VCardInterpreter interpreter : mInterpreterList) {
1011            interpreter.onVCardStarted();
1012        }
1013
1014        // vcard_file = [wsls] vcard [wsls]
1015        while (true) {
1016            synchronized (this) {
1017                if (mCanceled) {
1018                    Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
1019                    break;
1020                }
1021            }
1022            if (!parseOneVCard()) {
1023                break;
1024            }
1025        }
1026
1027        for (VCardInterpreter interpreter : mInterpreterList) {
1028            interpreter.onVCardEnded();
1029        }
1030    }
1031
1032    public void parseOne(InputStream is) throws IOException, VCardException {
1033        if (is == null) {
1034            throw new NullPointerException("InputStream must not be null.");
1035        }
1036
1037        final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1038        mReader = new CustomBufferedReader(tmpReader);
1039
1040        final long start = System.currentTimeMillis();
1041        for (VCardInterpreter interpreter : mInterpreterList) {
1042            interpreter.onVCardStarted();
1043        }
1044        parseOneVCard();
1045        for (VCardInterpreter interpreter : mInterpreterList) {
1046            interpreter.onVCardEnded();
1047        }
1048    }
1049
1050    public final synchronized void cancel() {
1051        Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1052        mCanceled = true;
1053    }
1054}
1055