VCardParserImpl_V21.java revision 147f1ae5371954ae845cb2330b221df6ca1d8831
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16package com.android.vcard; 17 18import android.text.TextUtils; 19import android.util.Log; 20 21import com.android.vcard.exception.VCardAgentNotSupportedException; 22import com.android.vcard.exception.VCardException; 23import com.android.vcard.exception.VCardInvalidCommentLineException; 24import com.android.vcard.exception.VCardInvalidLineException; 25import com.android.vcard.exception.VCardNestedException; 26import com.android.vcard.exception.VCardVersionException; 27 28import java.io.BufferedReader; 29import java.io.IOException; 30import java.io.InputStream; 31import java.io.InputStreamReader; 32import java.io.Reader; 33import java.util.ArrayList; 34import java.util.HashSet; 35import java.util.Set; 36 37/** 38 * <p> 39 * Basic implementation achieving vCard parsing. Based on vCard 2.1, 40 * </p> 41 * @hide 42 */ 43/* package */ class VCardParserImpl_V21 { 44 private static final String LOG_TAG = "VCardParserImpl_V21"; 45 46 protected static final class CustomBufferedReader extends BufferedReader { 47 private long mTime; 48 49 /** 50 * Needed since "next line" may be null due to end of line. 51 */ 52 private boolean mNextLineIsValid; 53 private String mNextLine; 54 55 public CustomBufferedReader(Reader in) { 56 super(in); 57 } 58 59 @Override 60 public String readLine() throws IOException { 61 if (mNextLineIsValid) { 62 final String ret = mNextLine; 63 mNextLine = null; 64 mNextLineIsValid = false; 65 return ret; 66 } 67 68 long start = System.currentTimeMillis(); 69 final String line = super.readLine(); 70 long end = System.currentTimeMillis(); 71 mTime += end - start; 72 return line; 73 } 74 75 /** 76 * Read one line, but make this object store it in its queue. 77 */ 78 public String peekLine() throws IOException { 79 if (!mNextLineIsValid) { 80 long start = System.currentTimeMillis(); 81 final String line = super.readLine(); 82 long end = System.currentTimeMillis(); 83 mTime += end - start; 84 85 mNextLine = line; 86 mNextLineIsValid = true; 87 } 88 89 return mNextLine; 90 } 91 92 public long getTotalmillisecond() { 93 return mTime; 94 } 95 } 96 97 private static final String DEFAULT_ENCODING = "8BIT"; 98 99 protected boolean mCanceled; 100 protected VCardInterpreter mInterpreter; 101 102 protected final String mIntermediateCharset; 103 104 /** 105 * <p> 106 * The encoding type for deconding byte streams. This member variable is 107 * reset to a default encoding every time when a new item comes. 108 * </p> 109 * <p> 110 * "Encoding" in vCard is different from "Charset". It is mainly used for 111 * addresses, notes, images. "7BIT", "8BIT", "BASE64", and 112 * "QUOTED-PRINTABLE" are known examples. 113 * </p> 114 */ 115 protected String mCurrentEncoding; 116 117 /** 118 * <p> 119 * The reader object to be used internally. 120 * </p> 121 * <p> 122 * Developers should not directly read a line from this object. Use 123 * getLine() unless there some reason. 124 * </p> 125 */ 126 protected CustomBufferedReader mReader; 127 128 /** 129 * <p> 130 * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard 131 * specification, but happens to be seen in real world vCard. 132 * </p> 133 */ 134 protected final Set<String> mUnknownTypeSet = new HashSet<String>(); 135 136 /** 137 * <p> 138 * Set for storing unkonwn VALUE attributes, which is not acceptable in 139 * vCard specification, but happens to be seen in real world vCard. 140 * </p> 141 */ 142 protected final Set<String> mUnknownValueSet = new HashSet<String>(); 143 144 145 // In some cases, vCard is nested. Currently, we only consider the most 146 // interior vCard data. 147 // See v21_foma_1.vcf in test directory for more information. 148 // TODO: Don't ignore by using count, but read all of information outside vCard. 149 private int mNestCount; 150 151 // Used only for parsing END:VCARD. 152 private String mPreviousLine; 153 154 // For measuring performance. 155 private long mTimeTotal; 156 private long mTimeReadStartRecord; 157 private long mTimeReadEndRecord; 158 private long mTimeStartProperty; 159 private long mTimeEndProperty; 160 private long mTimeParseItems; 161 private long mTimeParseLineAndHandleGroup; 162 private long mTimeParsePropertyValues; 163 private long mTimeParseAdrOrgN; 164 private long mTimeHandleMiscPropertyValue; 165 private long mTimeHandleQuotedPrintable; 166 private long mTimeHandleBase64; 167 168 public VCardParserImpl_V21() { 169 this(VCardConfig.VCARD_TYPE_DEFAULT); 170 } 171 172 public VCardParserImpl_V21(int vcardType) { 173 if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) { 174 mNestCount = 1; 175 } 176 177 mIntermediateCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 178 } 179 180 /** 181 * <p> 182 * Parses the file at the given position. 183 * </p> 184 */ 185 // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre> 186 protected void parseVCardFile() throws IOException, VCardException { 187 boolean readingFirstFile = true; 188 while (true) { 189 if (mCanceled) { 190 break; 191 } 192 if (!parseOneVCard(readingFirstFile)) { 193 break; 194 } 195 readingFirstFile = false; 196 } 197 198 if (mNestCount > 0) { 199 boolean useCache = true; 200 for (int i = 0; i < mNestCount; i++) { 201 readEndVCard(useCache, true); 202 useCache = false; 203 } 204 } 205 } 206 207 /** 208 * @return true when a given property name is a valid property name. 209 */ 210 protected boolean isValidPropertyName(final String propertyName) { 211 if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || 212 propertyName.startsWith("X-")) 213 && !mUnknownTypeSet.contains(propertyName)) { 214 mUnknownTypeSet.add(propertyName); 215 Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); 216 } 217 return true; 218 } 219 220 /** 221 * @return String. It may be null, or its length may be 0 222 * @throws IOException 223 */ 224 protected String getLine() throws IOException { 225 return mReader.readLine(); 226 } 227 228 protected String peekLine() throws IOException { 229 return mReader.peekLine(); 230 } 231 232 /** 233 * @return String with it's length > 0 234 * @throws IOException 235 * @throws VCardException when the stream reached end of line 236 */ 237 protected String getNonEmptyLine() throws IOException, VCardException { 238 String line; 239 while (true) { 240 line = getLine(); 241 if (line == null) { 242 throw new VCardException("Reached end of buffer."); 243 } else if (line.trim().length() > 0) { 244 return line; 245 } 246 } 247 } 248 249 /* 250 * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF 251 * items *CRLF 252 * "END" [ws] ":" [ws] "VCARD" 253 */ 254 private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException { 255 boolean allowGarbage = false; 256 if (firstRead) { 257 if (mNestCount > 0) { 258 for (int i = 0; i < mNestCount; i++) { 259 if (!readBeginVCard(allowGarbage)) { 260 return false; 261 } 262 allowGarbage = true; 263 } 264 } 265 } 266 267 if (!readBeginVCard(allowGarbage)) { 268 return false; 269 } 270 long start; 271 if (mInterpreter != null) { 272 start = System.currentTimeMillis(); 273 mInterpreter.startEntry(); 274 mTimeReadStartRecord += System.currentTimeMillis() - start; 275 } 276 start = System.currentTimeMillis(); 277 parseItems(); 278 mTimeParseItems += System.currentTimeMillis() - start; 279 readEndVCard(true, false); 280 if (mInterpreter != null) { 281 start = System.currentTimeMillis(); 282 mInterpreter.endEntry(); 283 mTimeReadEndRecord += System.currentTimeMillis() - start; 284 } 285 return true; 286 } 287 288 /** 289 * @return True when successful. False when reaching the end of line 290 * @throws IOException 291 * @throws VCardException 292 */ 293 protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { 294 String line; 295 do { 296 while (true) { 297 line = getLine(); 298 if (line == null) { 299 return false; 300 } else if (line.trim().length() > 0) { 301 break; 302 } 303 } 304 final String[] strArray = line.split(":", 2); 305 final int length = strArray.length; 306 307 // Although vCard 2.1/3.0 specification does not allow lower cases, 308 // we found vCard file emitted by some external vCard expoter have such 309 // invalid Strings. 310 // So we allow it. 311 // e.g. 312 // BEGIN:vCard 313 if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") 314 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 315 return true; 316 } else if (!allowGarbage) { 317 if (mNestCount > 0) { 318 mPreviousLine = line; 319 return false; 320 } else { 321 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " 322 + "(Instead, \"" + line + "\" came)"); 323 } 324 } 325 } while (allowGarbage); 326 327 throw new VCardException("Reached where must not be reached."); 328 } 329 330 /** 331 * <p> 332 * The arguments useCache and allowGarbase are usually true and false 333 * accordingly when this function is called outside this function itself. 334 * </p> 335 * 336 * @param useCache When true, line is obtained from mPreviousline. 337 * Otherwise, getLine() is used. 338 * @param allowGarbage When true, ignore non "END:VCARD" line. 339 * @throws IOException 340 * @throws VCardException 341 */ 342 protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException, 343 VCardException { 344 String line; 345 do { 346 if (useCache) { 347 // Though vCard specification does not allow lower cases, 348 // some data may have them, so we allow it. 349 line = mPreviousLine; 350 } else { 351 while (true) { 352 line = getLine(); 353 if (line == null) { 354 throw new VCardException("Expected END:VCARD was not found."); 355 } else if (line.trim().length() > 0) { 356 break; 357 } 358 } 359 } 360 361 String[] strArray = line.split(":", 2); 362 if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END") 363 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 364 return; 365 } else if (!allowGarbage) { 366 throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); 367 } 368 useCache = false; 369 } while (allowGarbage); 370 } 371 372 /* 373 * items = *CRLF item / item 374 */ 375 protected void parseItems() throws IOException, VCardException { 376 boolean ended = false; 377 378 if (mInterpreter != null) { 379 long start = System.currentTimeMillis(); 380 mInterpreter.startProperty(); 381 mTimeStartProperty += System.currentTimeMillis() - start; 382 } 383 ended = parseItem(); 384 if (mInterpreter != null && !ended) { 385 long start = System.currentTimeMillis(); 386 mInterpreter.endProperty(); 387 mTimeEndProperty += System.currentTimeMillis() - start; 388 } 389 390 while (!ended) { 391 if (mInterpreter != null) { 392 long start = System.currentTimeMillis(); 393 mInterpreter.startProperty(); 394 mTimeStartProperty += System.currentTimeMillis() - start; 395 } 396 try { 397 ended = parseItem(); 398 } catch (VCardInvalidCommentLineException e) { 399 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 400 ended = false; 401 } 402 if (mInterpreter != null && !ended) { 403 long start = System.currentTimeMillis(); 404 mInterpreter.endProperty(); 405 mTimeEndProperty += System.currentTimeMillis() - start; 406 } 407 } 408 } 409 410 /* 411 * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" 412 * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts 413 * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] 414 * "AGENT" [params] ":" vcard CRLF 415 */ 416 protected boolean parseItem() throws IOException, VCardException { 417 mCurrentEncoding = DEFAULT_ENCODING; 418 419 final String line = getNonEmptyLine(); 420 long start = System.currentTimeMillis(); 421 422 String[] propertyNameAndValue = separateLineAndHandleGroup(line); 423 if (propertyNameAndValue == null) { 424 return true; 425 } 426 if (propertyNameAndValue.length != 2) { 427 throw new VCardInvalidLineException("Invalid line \"" + line + "\""); 428 } 429 String propertyName = propertyNameAndValue[0].toUpperCase(); 430 String propertyValue = propertyNameAndValue[1]; 431 432 mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start; 433 434 if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) { 435 start = System.currentTimeMillis(); 436 handleMultiplePropertyValue(propertyName, propertyValue); 437 mTimeParseAdrOrgN += System.currentTimeMillis() - start; 438 return false; 439 } else if (propertyName.equals("AGENT")) { 440 handleAgent(propertyValue); 441 return false; 442 } else if (isValidPropertyName(propertyName)) { 443 if (propertyName.equals("BEGIN")) { 444 if (propertyValue.equals("VCARD")) { 445 throw new VCardNestedException("This vCard has nested vCard data in it."); 446 } else { 447 throw new VCardException("Unknown BEGIN type: " + propertyValue); 448 } 449 } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) { 450 throw new VCardVersionException("Incompatible version: " + propertyValue + " != " 451 + getVersionString()); 452 } 453 start = System.currentTimeMillis(); 454 handlePropertyValue(propertyName, propertyValue); 455 mTimeParsePropertyValues += System.currentTimeMillis() - start; 456 return false; 457 } 458 459 throw new VCardException("Unknown property name: \"" + propertyName + "\""); 460 } 461 462 // For performance reason, the states for group and property name are merged into one. 463 static private final int STATE_GROUP_OR_PROPERTY_NAME = 0; 464 static private final int STATE_PARAMS = 1; 465 // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not. 466 static private final int STATE_PARAMS_IN_DQUOTE = 2; 467 468 protected String[] separateLineAndHandleGroup(String line) throws VCardException { 469 final String[] propertyNameAndValue = new String[2]; 470 final int length = line.length(); 471 if (length > 0 && line.charAt(0) == '#') { 472 throw new VCardInvalidCommentLineException(); 473 } 474 475 int state = STATE_GROUP_OR_PROPERTY_NAME; 476 int nameIndex = 0; 477 478 // This loop is developed so that we don't have to take care of bottle neck here. 479 // Refactor carefully when you need to do so. 480 for (int i = 0; i < length; i++) { 481 final char ch = line.charAt(i); 482 switch (state) { 483 case STATE_GROUP_OR_PROPERTY_NAME: { 484 if (ch == ':') { // End of a property name. 485 final String propertyName = line.substring(nameIndex, i); 486 if (propertyName.equalsIgnoreCase("END")) { 487 mPreviousLine = line; 488 return null; 489 } 490 if (mInterpreter != null) { 491 mInterpreter.propertyName(propertyName); 492 } 493 propertyNameAndValue[0] = propertyName; 494 if (i < length - 1) { 495 propertyNameAndValue[1] = line.substring(i + 1); 496 } else { 497 propertyNameAndValue[1] = ""; 498 } 499 return propertyNameAndValue; 500 } else if (ch == '.') { // Each group is followed by the dot. 501 final String groupName = line.substring(nameIndex, i); 502 if (groupName.length() == 0) { 503 Log.w(LOG_TAG, "Empty group found. Ignoring."); 504 } else if (mInterpreter != null) { 505 mInterpreter.propertyGroup(groupName); 506 } 507 nameIndex = i + 1; // Next should be another group or a property name. 508 } else if (ch == ';') { // End of property name and beginneng of parameters. 509 final String propertyName = line.substring(nameIndex, i); 510 if (propertyName.equalsIgnoreCase("END")) { 511 mPreviousLine = line; 512 return null; 513 } 514 if (mInterpreter != null) { 515 mInterpreter.propertyName(propertyName); 516 } 517 propertyNameAndValue[0] = propertyName; 518 nameIndex = i + 1; 519 state = STATE_PARAMS; // Start parameter parsing. 520 } 521 break; 522 } 523 case STATE_PARAMS: { 524 if (ch == '"') { 525 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 526 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 527 "Silently allow it"); 528 } 529 state = STATE_PARAMS_IN_DQUOTE; 530 } else if (ch == ';') { // Starts another param. 531 handleParams(line.substring(nameIndex, i)); 532 nameIndex = i + 1; 533 } else if (ch == ':') { // End of param and beginenning of values. 534 handleParams(line.substring(nameIndex, i)); 535 if (i < length - 1) { 536 propertyNameAndValue[1] = line.substring(i + 1); 537 } else { 538 propertyNameAndValue[1] = ""; 539 } 540 return propertyNameAndValue; 541 } 542 break; 543 } 544 case STATE_PARAMS_IN_DQUOTE: { 545 if (ch == '"') { 546 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 547 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 548 "Silently allow it"); 549 } 550 state = STATE_PARAMS; 551 } 552 break; 553 } 554 } 555 } 556 557 throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); 558 } 559 560 /* 561 * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / 562 * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] 563 * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" 564 * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" 565 * [ws] word / knowntype 566 */ 567 protected void handleParams(String params) throws VCardException { 568 final String[] strArray = params.split("=", 2); 569 if (strArray.length == 2) { 570 final String paramName = strArray[0].trim().toUpperCase(); 571 String paramValue = strArray[1].trim(); 572 if (paramName.equals("TYPE")) { 573 handleType(paramValue); 574 } else if (paramName.equals("VALUE")) { 575 handleValue(paramValue); 576 } else if (paramName.equals("ENCODING")) { 577 handleEncoding(paramValue); 578 } else if (paramName.equals("CHARSET")) { 579 handleCharset(paramValue); 580 } else if (paramName.equals("LANGUAGE")) { 581 handleLanguage(paramValue); 582 } else if (paramName.startsWith("X-")) { 583 handleAnyParam(paramName, paramValue); 584 } else { 585 throw new VCardException("Unknown type \"" + paramName + "\""); 586 } 587 } else { 588 handleParamWithoutName(strArray[0]); 589 } 590 } 591 592 /** 593 * vCard 3.0 parser implementation may throw VCardException. 594 */ 595 @SuppressWarnings("unused") 596 protected void handleParamWithoutName(final String paramValue) throws VCardException { 597 handleType(paramValue); 598 } 599 600 /* 601 * ptypeval = knowntype / "X-" word 602 */ 603 protected void handleType(final String ptypeval) { 604 if (!(getKnownTypeSet().contains(ptypeval.toUpperCase()) 605 || ptypeval.startsWith("X-")) 606 && !mUnknownTypeSet.contains(ptypeval)) { 607 mUnknownTypeSet.add(ptypeval); 608 Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); 609 } 610 if (mInterpreter != null) { 611 mInterpreter.propertyParamType("TYPE"); 612 mInterpreter.propertyParamValue(ptypeval); 613 } 614 } 615 616 /* 617 * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word 618 */ 619 protected void handleValue(final String pvalueval) { 620 if (!(getKnownValueSet().contains(pvalueval.toUpperCase()) 621 || pvalueval.startsWith("X-") 622 || mUnknownValueSet.contains(pvalueval))) { 623 mUnknownValueSet.add(pvalueval); 624 Log.w(LOG_TAG, String.format( 625 "The value unsupported by TYPE of %s: ", getVersion(), pvalueval)); 626 } 627 if (mInterpreter != null) { 628 mInterpreter.propertyParamType("VALUE"); 629 mInterpreter.propertyParamValue(pvalueval); 630 } 631 } 632 633 /* 634 * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word 635 */ 636 protected void handleEncoding(String pencodingval) throws VCardException { 637 if (getAvailableEncodingSet().contains(pencodingval) || 638 pencodingval.startsWith("X-")) { 639 if (mInterpreter != null) { 640 mInterpreter.propertyParamType("ENCODING"); 641 mInterpreter.propertyParamValue(pencodingval); 642 } 643 mCurrentEncoding = pencodingval; 644 } else { 645 throw new VCardException("Unknown encoding \"" + pencodingval + "\""); 646 } 647 } 648 649 /** 650 * <p> 651 * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), 652 * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc. 653 * We allow any charset. 654 * </p> 655 */ 656 protected void handleCharset(String charsetval) { 657 if (mInterpreter != null) { 658 mInterpreter.propertyParamType("CHARSET"); 659 mInterpreter.propertyParamValue(charsetval); 660 } 661 } 662 663 /** 664 * See also Section 7.1 of RFC 1521 665 */ 666 protected void handleLanguage(String langval) throws VCardException { 667 String[] strArray = langval.split("-"); 668 if (strArray.length != 2) { 669 throw new VCardException("Invalid Language: \"" + langval + "\""); 670 } 671 String tmp = strArray[0]; 672 int length = tmp.length(); 673 for (int i = 0; i < length; i++) { 674 if (!isAsciiLetter(tmp.charAt(i))) { 675 throw new VCardException("Invalid Language: \"" + langval + "\""); 676 } 677 } 678 tmp = strArray[1]; 679 length = tmp.length(); 680 for (int i = 0; i < length; i++) { 681 if (!isAsciiLetter(tmp.charAt(i))) { 682 throw new VCardException("Invalid Language: \"" + langval + "\""); 683 } 684 } 685 if (mInterpreter != null) { 686 mInterpreter.propertyParamType("LANGUAGE"); 687 mInterpreter.propertyParamValue(langval); 688 } 689 } 690 691 private boolean isAsciiLetter(char ch) { 692 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 693 return true; 694 } 695 return false; 696 } 697 698 /** 699 * Mainly for "X-" type. This accepts any kind of type without check. 700 */ 701 protected void handleAnyParam(String paramName, String paramValue) { 702 if (mInterpreter != null) { 703 mInterpreter.propertyParamType(paramName); 704 mInterpreter.propertyParamValue(paramValue); 705 } 706 } 707 708 protected void handlePropertyValue(String propertyName, String propertyValue) 709 throws IOException, VCardException { 710 final String upperEncoding = mCurrentEncoding.toUpperCase(); 711 if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) { 712 final long start = System.currentTimeMillis(); 713 final String result = getQuotedPrintable(propertyValue); 714 if (mInterpreter != null) { 715 ArrayList<String> v = new ArrayList<String>(); 716 v.add(result); 717 mInterpreter.propertyValues(v); 718 } 719 mTimeHandleQuotedPrintable += System.currentTimeMillis() - start; 720 } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64) 721 || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) { 722 final long start = System.currentTimeMillis(); 723 // It is very rare, but some BASE64 data may be so big that 724 // OutOfMemoryError occurs. To ignore such cases, use try-catch. 725 try { 726 final String result = getBase64(propertyValue); 727 if (mInterpreter != null) { 728 ArrayList<String> arrayList = new ArrayList<String>(); 729 arrayList.add(result); 730 mInterpreter.propertyValues(arrayList); 731 } 732 } catch (OutOfMemoryError error) { 733 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); 734 if (mInterpreter != null) { 735 mInterpreter.propertyValues(null); 736 } 737 } 738 mTimeHandleBase64 += System.currentTimeMillis() - start; 739 } else { 740 if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") || 741 upperEncoding.startsWith("X-"))) { 742 Log.w(LOG_TAG, 743 String.format("The encoding \"%s\" is unsupported by vCard %s", 744 mCurrentEncoding, getVersionString())); 745 } 746 747 // Some device uses line folding defined in RFC 2425, which is not allowed 748 // in vCard 2.1 (while needed in vCard 3.0). 749 // 750 // e.g. 751 // BEGIN:VCARD 752 // VERSION:2.1 753 // N:;Omega;;; 754 // EMAIL;INTERNET:"Omega" 755 // <omega@example.com> 756 // FN:Omega 757 // END:VCARD 758 // 759 // The vCard above assumes that email address should become: 760 // "Omega" <omega@example.com> 761 // 762 // But vCard 2.1 requires Quote-Printable when a line contains line break(s). 763 // 764 // For more information about line folding, 765 // see "5.8.1. Line delimiting and folding" in RFC 2425. 766 // 767 // We take care of this case more formally in vCard 3.0, so we only need to 768 // do this in vCard 2.1. 769 if (getVersion() == VCardConfig.FLAG_V21) { 770 StringBuilder builder = null; 771 while (true) { 772 final String nextLine = peekLine(); 773 // We don't need to care too much about this exceptional case, 774 // but we should not wrongly eat up "END:VCARD", since it critically 775 // breaks this parser's state machine. 776 // Thus we roughly look over the next line and confirm it is at least not 777 // "END:VCARD". This extra fee is worth paying. This is exceptional 778 // anyway. 779 if (!TextUtils.isEmpty(nextLine) && 780 nextLine.charAt(0) == ' ' && 781 !"END:VCARD".contains(nextLine.toUpperCase())) { 782 getLine(); // Drop the next line. 783 784 if (builder == null) { 785 builder = new StringBuilder(); 786 builder.append(propertyValue); 787 } 788 builder.append(nextLine.substring(1)); 789 } else { 790 break; 791 } 792 } 793 if (builder != null) { 794 propertyValue = builder.toString(); 795 } 796 } 797 798 final long start = System.currentTimeMillis(); 799 if (mInterpreter != null) { 800 ArrayList<String> v = new ArrayList<String>(); 801 v.add(maybeUnescapeText(propertyValue)); 802 mInterpreter.propertyValues(v); 803 } 804 mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start; 805 } 806 } 807 808 /** 809 * <p> 810 * Parses and returns Quoted-Printable. 811 * </p> 812 * 813 * @param firstString The string following a parameter name and attributes. 814 * Example: "string" in 815 * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". 816 * @return whole Quoted-Printable string, including a given argument and 817 * following lines. Excludes the last empty line following to Quoted 818 * Printable lines. 819 * @throws IOException 820 * @throws VCardException 821 */ 822 private String getQuotedPrintable(String firstString) throws IOException, VCardException { 823 // Specifically, there may be some padding between = and CRLF. 824 // See the following: 825 // 826 // qp-line := *(qp-segment transport-padding CRLF) 827 // qp-part transport-padding 828 // qp-segment := qp-section *(SPACE / TAB) "=" 829 // ; Maximum length of 76 characters 830 // 831 // e.g. (from RFC 2045) 832 // Now's the time = 833 // for all folk to come= 834 // to the aid of their country. 835 if (firstString.trim().endsWith("=")) { 836 // remove "transport-padding" 837 int pos = firstString.length() - 1; 838 while (firstString.charAt(pos) != '=') { 839 } 840 StringBuilder builder = new StringBuilder(); 841 builder.append(firstString.substring(0, pos + 1)); 842 builder.append("\r\n"); 843 String line; 844 while (true) { 845 line = getLine(); 846 if (line == null) { 847 throw new VCardException("File ended during parsing a Quoted-Printable String"); 848 } 849 if (line.trim().endsWith("=")) { 850 // remove "transport-padding" 851 pos = line.length() - 1; 852 while (line.charAt(pos) != '=') { 853 } 854 builder.append(line.substring(0, pos + 1)); 855 builder.append("\r\n"); 856 } else { 857 builder.append(line); 858 break; 859 } 860 } 861 return builder.toString(); 862 } else { 863 return firstString; 864 } 865 } 866 867 protected String getBase64(String firstString) throws IOException, VCardException { 868 StringBuilder builder = new StringBuilder(); 869 builder.append(firstString); 870 871 while (true) { 872 String line = getLine(); 873 if (line == null) { 874 throw new VCardException("File ended during parsing BASE64 binary"); 875 } 876 if (line.length() == 0) { 877 break; 878 } 879 builder.append(line); 880 } 881 882 return builder.toString(); 883 } 884 885 /** 886 * <p> 887 * Mainly for "ADR", "ORG", and "N" 888 * </p> 889 */ 890 /* 891 * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, 892 * Street, Locality, Region, Postal Code, Country Name orgparts = 893 * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are 894 * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family, 895 * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III, 896 * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a 897 * semicolon in this string, it must be escaped ; with a "\" character. We 898 * do not care the number of "strnosemi" here. We are not sure whether we 899 * should add "\" CRLF to each value. We exclude them for now. 900 */ 901 protected void handleMultiplePropertyValue(String propertyName, String propertyValue) 902 throws IOException, VCardException { 903 // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some 904 // softwares/devices 905 // emit such data. 906 if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { 907 propertyValue = getQuotedPrintable(propertyValue); 908 } 909 910 if (mInterpreter != null) { 911 mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue, 912 (getVersion() == VCardConfig.FLAG_V30))); 913 } 914 } 915 916 /* 917 * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an 918 * error toward the AGENT property. 919 * // TODO: Support AGENT property. 920 * item = 921 * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] 922 * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" 923 */ 924 protected void handleAgent(final String propertyValue) throws VCardException { 925 if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) { 926 // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. 927 return; 928 } else { 929 throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); 930 } 931 } 932 933 /** 934 * For vCard 3.0. 935 */ 936 protected String maybeUnescapeText(final String text) { 937 return text; 938 } 939 940 /** 941 * Returns unescaped String if the character should be unescaped. Return 942 * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" 943 * while "\x" should not be. 944 */ 945 protected String maybeUnescapeCharacter(final char ch) { 946 return unescapeCharacter(ch); 947 } 948 949 /* package */ static String unescapeCharacter(final char ch) { 950 // Original vCard 2.1 specification does not allow transformation 951 // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous 952 // implementation of 953 // this class allowed them, so keep it as is. 954 if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { 955 return String.valueOf(ch); 956 } else { 957 return null; 958 } 959 } 960 961 private void showPerformanceInfo() { 962 Log.d(LOG_TAG, "Total parsing time: " + mTimeTotal + " ms"); 963 Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms"); 964 Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord 965 + " ms"); 966 Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms"); 967 Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup 968 + " ms"); 969 Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms"); 970 Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms"); 971 Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue 972 + " ms"); 973 Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms"); 974 Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms"); 975 } 976 977 /** 978 * @return {@link VCardConfig#FLAG_V21} 979 */ 980 protected int getVersion() { 981 return VCardConfig.FLAG_V21; 982 } 983 984 /** 985 * @return {@link VCardConfig#FLAG_V30} 986 */ 987 protected String getVersionString() { 988 return VCardConstants.VERSION_V21; 989 } 990 991 protected Set<String> getKnownPropertyNameSet() { 992 return VCardParser_V21.sKnownPropertyNameSet; 993 } 994 995 protected Set<String> getKnownTypeSet() { 996 return VCardParser_V21.sKnownTypeSet; 997 } 998 999 protected Set<String> getKnownValueSet() { 1000 return VCardParser_V21.sKnownValueSet; 1001 } 1002 1003 protected Set<String> getAvailableEncodingSet() { 1004 return VCardParser_V21.sAvailableEncoding; 1005 } 1006 1007 protected String getDefaultEncoding() { 1008 return DEFAULT_ENCODING; 1009 } 1010 1011 1012 public void parse(InputStream is, VCardInterpreter interpreter) 1013 throws IOException, VCardException { 1014 if (is == null) { 1015 throw new NullPointerException("InputStream must not be null."); 1016 } 1017 1018 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 1019 mReader = new CustomBufferedReader(tmpReader); 1020 1021 mInterpreter = interpreter; 1022 1023 final long start = System.currentTimeMillis(); 1024 if (mInterpreter != null) { 1025 mInterpreter.start(); 1026 } 1027 parseVCardFile(); 1028 if (mInterpreter != null) { 1029 mInterpreter.end(); 1030 } 1031 mTimeTotal += System.currentTimeMillis() - start; 1032 1033 if (VCardConfig.showPerformanceLog()) { 1034 showPerformanceInfo(); 1035 } 1036 } 1037 1038 public final void cancel() { 1039 mCanceled = true; 1040 } 1041} 1042