VCardParserImpl_V21.java revision c955c8b0da0c9fcbad0ddcae76641358c27e72cd
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16package com.android.vcard; 17 18import android.text.TextUtils; 19import android.util.Log; 20 21import com.android.vcard.exception.VCardAgentNotSupportedException; 22import com.android.vcard.exception.VCardException; 23import com.android.vcard.exception.VCardInvalidCommentLineException; 24import com.android.vcard.exception.VCardInvalidLineException; 25import com.android.vcard.exception.VCardNestedException; 26import com.android.vcard.exception.VCardVersionException; 27 28import java.io.BufferedReader; 29import java.io.IOException; 30import java.io.InputStream; 31import java.io.InputStreamReader; 32import java.io.Reader; 33import java.util.ArrayList; 34import java.util.HashSet; 35import java.util.List; 36import java.util.Set; 37 38/** 39 * <p> 40 * Basic implementation achieving vCard parsing. Based on vCard 2.1, 41 * </p> 42 * @hide 43 */ 44/* package */ class VCardParserImpl_V21 { 45 private static final String LOG_TAG = "VCardParserImpl_V21"; 46 47 private static final class EmptyInterpreter implements VCardInterpreter { 48 @Override 49 public void end() { 50 } 51 @Override 52 public void endEntry() { 53 } 54 @Override 55 public void endProperty() { 56 } 57 @Override 58 public void propertyGroup(String group) { 59 } 60 @Override 61 public void propertyName(String name) { 62 } 63 @Override 64 public void propertyParamType(String type) { 65 } 66 @Override 67 public void propertyParamValue(String value) { 68 } 69 @Override 70 public void propertyValues(List<String> values) { 71 } 72 @Override 73 public void start() { 74 } 75 @Override 76 public void startEntry() { 77 } 78 @Override 79 public void startProperty() { 80 } 81 } 82 83 protected static final class CustomBufferedReader extends BufferedReader { 84 private long mTime; 85 86 /** 87 * Needed since "next line" may be null due to end of line. 88 */ 89 private boolean mNextLineIsValid; 90 private String mNextLine; 91 92 public CustomBufferedReader(Reader in) { 93 super(in); 94 } 95 96 @Override 97 public String readLine() throws IOException { 98 if (mNextLineIsValid) { 99 final String ret = mNextLine; 100 mNextLine = null; 101 mNextLineIsValid = false; 102 return ret; 103 } 104 105 long start = System.currentTimeMillis(); 106 final String line = super.readLine(); 107 long end = System.currentTimeMillis(); 108 mTime += end - start; 109 return line; 110 } 111 112 /** 113 * Read one line, but make this object store it in its queue. 114 */ 115 public String peekLine() throws IOException { 116 if (!mNextLineIsValid) { 117 long start = System.currentTimeMillis(); 118 final String line = super.readLine(); 119 long end = System.currentTimeMillis(); 120 mTime += end - start; 121 122 mNextLine = line; 123 mNextLineIsValid = true; 124 } 125 126 return mNextLine; 127 } 128 129 public long getTotalmillisecond() { 130 return mTime; 131 } 132 } 133 134 private static final String DEFAULT_ENCODING = "8BIT"; 135 136 protected boolean mCanceled; 137 protected VCardInterpreter mInterpreter; 138 139 protected final String mIntermediateCharset; 140 141 /** 142 * <p> 143 * The encoding type for deconding byte streams. This member variable is 144 * reset to a default encoding every time when a new item comes. 145 * </p> 146 * <p> 147 * "Encoding" in vCard is different from "Charset". It is mainly used for 148 * addresses, notes, images. "7BIT", "8BIT", "BASE64", and 149 * "QUOTED-PRINTABLE" are known examples. 150 * </p> 151 */ 152 protected String mCurrentEncoding; 153 154 /** 155 * <p> 156 * The reader object to be used internally. 157 * </p> 158 * <p> 159 * Developers should not directly read a line from this object. Use 160 * getLine() unless there some reason. 161 * </p> 162 */ 163 protected CustomBufferedReader mReader; 164 165 /** 166 * <p> 167 * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard 168 * specification, but happens to be seen in real world vCard. 169 * </p> 170 */ 171 protected final Set<String> mUnknownTypeSet = new HashSet<String>(); 172 173 /** 174 * <p> 175 * Set for storing unkonwn VALUE attributes, which is not acceptable in 176 * vCard specification, but happens to be seen in real world vCard. 177 * </p> 178 */ 179 protected final Set<String> mUnknownValueSet = new HashSet<String>(); 180 181 182 // In some cases, vCard is nested. Currently, we only consider the most 183 // interior vCard data. 184 // See v21_foma_1.vcf in test directory for more information. 185 // TODO: Don't ignore by using count, but read all of information outside vCard. 186 private int mNestCount; 187 188 // Used only for parsing END:VCARD. 189 private String mPreviousLine; 190 191 // For measuring performance. 192 private long mTimeTotal; 193 private long mTimeReadStartRecord; 194 private long mTimeReadEndRecord; 195 private long mTimeStartProperty; 196 private long mTimeEndProperty; 197 private long mTimeParseItems; 198 private long mTimeParseLineAndHandleGroup; 199 private long mTimeParsePropertyValues; 200 private long mTimeParseAdrOrgN; 201 private long mTimeHandleMiscPropertyValue; 202 private long mTimeHandleQuotedPrintable; 203 private long mTimeHandleBase64; 204 205 public VCardParserImpl_V21() { 206 this(VCardConfig.VCARD_TYPE_DEFAULT); 207 } 208 209 public VCardParserImpl_V21(int vcardType) { 210 if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) { 211 mNestCount = 1; 212 } 213 214 mIntermediateCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 215 } 216 217 /** 218 * <p> 219 * Parses the file at the given position. 220 * </p> 221 */ 222 // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre> 223 protected void parseVCardFile() throws IOException, VCardException { 224 boolean readingFirstFile = true; 225 while (true) { 226 if (mCanceled) { 227 Log.i(LOG_TAG, "Cancel request has come. exitting parse operation."); 228 break; 229 } 230 if (!parseOneVCard(readingFirstFile)) { 231 break; 232 } 233 readingFirstFile = false; 234 } 235 236 if (mNestCount > 0) { 237 boolean useCache = true; 238 for (int i = 0; i < mNestCount; i++) { 239 readEndVCard(useCache, true); 240 useCache = false; 241 } 242 } 243 } 244 245 /** 246 * @return true when a given property name is a valid property name. 247 */ 248 protected boolean isValidPropertyName(final String propertyName) { 249 if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || 250 propertyName.startsWith("X-")) 251 && !mUnknownTypeSet.contains(propertyName)) { 252 mUnknownTypeSet.add(propertyName); 253 Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); 254 } 255 return true; 256 } 257 258 /** 259 * @return String. It may be null, or its length may be 0 260 * @throws IOException 261 */ 262 protected String getLine() throws IOException { 263 return mReader.readLine(); 264 } 265 266 protected String peekLine() throws IOException { 267 return mReader.peekLine(); 268 } 269 270 /** 271 * @return String with it's length > 0 272 * @throws IOException 273 * @throws VCardException when the stream reached end of line 274 */ 275 protected String getNonEmptyLine() throws IOException, VCardException { 276 String line; 277 while (true) { 278 line = getLine(); 279 if (line == null) { 280 throw new VCardException("Reached end of buffer."); 281 } else if (line.trim().length() > 0) { 282 return line; 283 } 284 } 285 } 286 287 /* 288 * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF 289 * items *CRLF 290 * "END" [ws] ":" [ws] "VCARD" 291 */ 292 private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException { 293 boolean allowGarbage = false; 294 if (firstRead) { 295 if (mNestCount > 0) { 296 for (int i = 0; i < mNestCount; i++) { 297 if (!readBeginVCard(allowGarbage)) { 298 return false; 299 } 300 allowGarbage = true; 301 } 302 } 303 } 304 305 if (!readBeginVCard(allowGarbage)) { 306 return false; 307 } 308 final long beforeStartEntry = System.currentTimeMillis(); 309 mInterpreter.startEntry(); 310 mTimeReadStartRecord += System.currentTimeMillis() - beforeStartEntry; 311 312 final long beforeParseItems = System.currentTimeMillis(); 313 parseItems(); 314 mTimeParseItems += System.currentTimeMillis() - beforeParseItems; 315 316 readEndVCard(true, false); 317 318 final long beforeEndEntry = System.currentTimeMillis(); 319 mInterpreter.endEntry(); 320 mTimeReadEndRecord += System.currentTimeMillis() - beforeEndEntry; 321 return true; 322 } 323 324 /** 325 * @return True when successful. False when reaching the end of line 326 * @throws IOException 327 * @throws VCardException 328 */ 329 protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { 330 String line; 331 do { 332 while (true) { 333 line = getLine(); 334 if (line == null) { 335 return false; 336 } else if (line.trim().length() > 0) { 337 break; 338 } 339 } 340 final String[] strArray = line.split(":", 2); 341 final int length = strArray.length; 342 343 // Although vCard 2.1/3.0 specification does not allow lower cases, 344 // we found vCard file emitted by some external vCard expoter have such 345 // invalid Strings. 346 // So we allow it. 347 // e.g. 348 // BEGIN:vCard 349 if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") 350 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 351 return true; 352 } else if (!allowGarbage) { 353 if (mNestCount > 0) { 354 mPreviousLine = line; 355 return false; 356 } else { 357 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " 358 + "(Instead, \"" + line + "\" came)"); 359 } 360 } 361 } while (allowGarbage); 362 363 throw new VCardException("Reached where must not be reached."); 364 } 365 366 /** 367 * <p> 368 * The arguments useCache and allowGarbase are usually true and false 369 * accordingly when this function is called outside this function itself. 370 * </p> 371 * 372 * @param useCache When true, line is obtained from mPreviousline. 373 * Otherwise, getLine() is used. 374 * @param allowGarbage When true, ignore non "END:VCARD" line. 375 * @throws IOException 376 * @throws VCardException 377 */ 378 protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException, 379 VCardException { 380 String line; 381 do { 382 if (useCache) { 383 // Though vCard specification does not allow lower cases, 384 // some data may have them, so we allow it. 385 line = mPreviousLine; 386 } else { 387 while (true) { 388 line = getLine(); 389 if (line == null) { 390 throw new VCardException("Expected END:VCARD was not found."); 391 } else if (line.trim().length() > 0) { 392 break; 393 } 394 } 395 } 396 397 String[] strArray = line.split(":", 2); 398 if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END") 399 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 400 return; 401 } else if (!allowGarbage) { 402 throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); 403 } 404 useCache = false; 405 } while (allowGarbage); 406 } 407 408 /* 409 * items = *CRLF item / item 410 */ 411 protected void parseItems() throws IOException, VCardException { 412 boolean ended = false; 413 414 final long beforeBeginProperty = System.currentTimeMillis(); 415 mInterpreter.startProperty(); 416 mTimeStartProperty += System.currentTimeMillis() - beforeBeginProperty; 417 ended = parseItem(); 418 if (!ended) { 419 final long beforeEndProperty = System.currentTimeMillis(); 420 mInterpreter.endProperty(); 421 mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty; 422 } 423 424 while (!ended) { 425 final long beforeStartProperty = System.currentTimeMillis(); 426 mInterpreter.startProperty(); 427 mTimeStartProperty += System.currentTimeMillis() - beforeStartProperty; 428 try { 429 ended = parseItem(); 430 } catch (VCardInvalidCommentLineException e) { 431 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 432 ended = false; 433 } 434 435 if (!ended) { 436 final long beforeEndProperty = System.currentTimeMillis(); 437 mInterpreter.endProperty(); 438 mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty; 439 } 440 } 441 } 442 443 /* 444 * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" 445 * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts 446 * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] 447 * "AGENT" [params] ":" vcard CRLF 448 */ 449 protected boolean parseItem() throws IOException, VCardException { 450 mCurrentEncoding = DEFAULT_ENCODING; 451 452 final String line = getNonEmptyLine(); 453 long start = System.currentTimeMillis(); 454 455 String[] propertyNameAndValue = separateLineAndHandleGroup(line); 456 if (propertyNameAndValue == null) { 457 return true; 458 } 459 if (propertyNameAndValue.length != 2) { 460 throw new VCardInvalidLineException("Invalid line \"" + line + "\""); 461 } 462 String propertyName = propertyNameAndValue[0].toUpperCase(); 463 String propertyValue = propertyNameAndValue[1]; 464 465 mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start; 466 467 if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) { 468 start = System.currentTimeMillis(); 469 handleMultiplePropertyValue(propertyName, propertyValue); 470 mTimeParseAdrOrgN += System.currentTimeMillis() - start; 471 return false; 472 } else if (propertyName.equals("AGENT")) { 473 handleAgent(propertyValue); 474 return false; 475 } else if (isValidPropertyName(propertyName)) { 476 if (propertyName.equals("BEGIN")) { 477 if (propertyValue.equals("VCARD")) { 478 throw new VCardNestedException("This vCard has nested vCard data in it."); 479 } else { 480 throw new VCardException("Unknown BEGIN type: " + propertyValue); 481 } 482 } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) { 483 throw new VCardVersionException("Incompatible version: " + propertyValue + " != " 484 + getVersionString()); 485 } 486 start = System.currentTimeMillis(); 487 handlePropertyValue(propertyName, propertyValue); 488 mTimeParsePropertyValues += System.currentTimeMillis() - start; 489 return false; 490 } 491 492 throw new VCardException("Unknown property name: \"" + propertyName + "\""); 493 } 494 495 // For performance reason, the states for group and property name are merged into one. 496 static private final int STATE_GROUP_OR_PROPERTY_NAME = 0; 497 static private final int STATE_PARAMS = 1; 498 // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not. 499 static private final int STATE_PARAMS_IN_DQUOTE = 2; 500 501 protected String[] separateLineAndHandleGroup(String line) throws VCardException { 502 final String[] propertyNameAndValue = new String[2]; 503 final int length = line.length(); 504 if (length > 0 && line.charAt(0) == '#') { 505 throw new VCardInvalidCommentLineException(); 506 } 507 508 int state = STATE_GROUP_OR_PROPERTY_NAME; 509 int nameIndex = 0; 510 511 // This loop is developed so that we don't have to take care of bottle neck here. 512 // Refactor carefully when you need to do so. 513 for (int i = 0; i < length; i++) { 514 final char ch = line.charAt(i); 515 switch (state) { 516 case STATE_GROUP_OR_PROPERTY_NAME: { 517 if (ch == ':') { // End of a property name. 518 final String propertyName = line.substring(nameIndex, i); 519 if (propertyName.equalsIgnoreCase("END")) { 520 mPreviousLine = line; 521 return null; 522 } 523 mInterpreter.propertyName(propertyName); 524 propertyNameAndValue[0] = propertyName; 525 if (i < length - 1) { 526 propertyNameAndValue[1] = line.substring(i + 1); 527 } else { 528 propertyNameAndValue[1] = ""; 529 } 530 return propertyNameAndValue; 531 } else if (ch == '.') { // Each group is followed by the dot. 532 final String groupName = line.substring(nameIndex, i); 533 if (groupName.length() == 0) { 534 Log.w(LOG_TAG, "Empty group found. Ignoring."); 535 } else { 536 mInterpreter.propertyGroup(groupName); 537 } 538 nameIndex = i + 1; // Next should be another group or a property name. 539 } else if (ch == ';') { // End of property name and beginneng of parameters. 540 final String propertyName = line.substring(nameIndex, i); 541 if (propertyName.equalsIgnoreCase("END")) { 542 mPreviousLine = line; 543 return null; 544 } 545 mInterpreter.propertyName(propertyName); 546 propertyNameAndValue[0] = propertyName; 547 nameIndex = i + 1; 548 state = STATE_PARAMS; // Start parameter parsing. 549 } 550 // TODO: comma support (in vCard 3.0 and 4.0). 551 break; 552 } 553 case STATE_PARAMS: { 554 if (ch == '"') { 555 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 556 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 557 "Silently allow it"); 558 } 559 state = STATE_PARAMS_IN_DQUOTE; 560 } else if (ch == ';') { // Starts another param. 561 handleParams(line.substring(nameIndex, i)); 562 nameIndex = i + 1; 563 } else if (ch == ':') { // End of param and beginenning of values. 564 handleParams(line.substring(nameIndex, i)); 565 if (i < length - 1) { 566 propertyNameAndValue[1] = line.substring(i + 1); 567 } else { 568 propertyNameAndValue[1] = ""; 569 } 570 return propertyNameAndValue; 571 } 572 break; 573 } 574 case STATE_PARAMS_IN_DQUOTE: { 575 if (ch == '"') { 576 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 577 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 578 "Silently allow it"); 579 } 580 state = STATE_PARAMS; 581 } 582 break; 583 } 584 } 585 } 586 587 throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); 588 } 589 590 /* 591 * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / 592 * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] 593 * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" 594 * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" 595 * [ws] word / knowntype 596 */ 597 protected void handleParams(String params) throws VCardException { 598 final String[] strArray = params.split("=", 2); 599 if (strArray.length == 2) { 600 final String paramName = strArray[0].trim().toUpperCase(); 601 String paramValue = strArray[1].trim(); 602 if (paramName.equals("TYPE")) { 603 handleType(paramValue); 604 } else if (paramName.equals("VALUE")) { 605 handleValue(paramValue); 606 } else if (paramName.equals("ENCODING")) { 607 handleEncoding(paramValue); 608 } else if (paramName.equals("CHARSET")) { 609 handleCharset(paramValue); 610 } else if (paramName.equals("LANGUAGE")) { 611 handleLanguage(paramValue); 612 } else if (paramName.startsWith("X-")) { 613 handleAnyParam(paramName, paramValue); 614 } else { 615 throw new VCardException("Unknown type \"" + paramName + "\""); 616 } 617 } else { 618 handleParamWithoutName(strArray[0]); 619 } 620 } 621 622 /** 623 * vCard 3.0 parser implementation may throw VCardException. 624 */ 625 @SuppressWarnings("unused") 626 protected void handleParamWithoutName(final String paramValue) throws VCardException { 627 handleType(paramValue); 628 } 629 630 /* 631 * ptypeval = knowntype / "X-" word 632 */ 633 protected void handleType(final String ptypeval) { 634 if (!(getKnownTypeSet().contains(ptypeval.toUpperCase()) 635 || ptypeval.startsWith("X-")) 636 && !mUnknownTypeSet.contains(ptypeval)) { 637 mUnknownTypeSet.add(ptypeval); 638 Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); 639 } 640 mInterpreter.propertyParamType("TYPE"); 641 mInterpreter.propertyParamValue(ptypeval); 642 } 643 644 /* 645 * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word 646 */ 647 protected void handleValue(final String pvalueval) { 648 if (!(getKnownValueSet().contains(pvalueval.toUpperCase()) 649 || pvalueval.startsWith("X-") 650 || mUnknownValueSet.contains(pvalueval))) { 651 mUnknownValueSet.add(pvalueval); 652 Log.w(LOG_TAG, String.format( 653 "The value unsupported by TYPE of %s: ", getVersion(), pvalueval)); 654 } 655 mInterpreter.propertyParamType("VALUE"); 656 mInterpreter.propertyParamValue(pvalueval); 657 } 658 659 /* 660 * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word 661 */ 662 protected void handleEncoding(String pencodingval) throws VCardException { 663 if (getAvailableEncodingSet().contains(pencodingval) || 664 pencodingval.startsWith("X-")) { 665 mInterpreter.propertyParamType("ENCODING"); 666 mInterpreter.propertyParamValue(pencodingval); 667 mCurrentEncoding = pencodingval; 668 } else { 669 throw new VCardException("Unknown encoding \"" + pencodingval + "\""); 670 } 671 } 672 673 /** 674 * <p> 675 * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), 676 * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc. 677 * We allow any charset. 678 * </p> 679 */ 680 protected void handleCharset(String charsetval) { 681 mInterpreter.propertyParamType("CHARSET"); 682 mInterpreter.propertyParamValue(charsetval); 683 } 684 685 /** 686 * See also Section 7.1 of RFC 1521 687 */ 688 protected void handleLanguage(String langval) throws VCardException { 689 String[] strArray = langval.split("-"); 690 if (strArray.length != 2) { 691 throw new VCardException("Invalid Language: \"" + langval + "\""); 692 } 693 String tmp = strArray[0]; 694 int length = tmp.length(); 695 for (int i = 0; i < length; i++) { 696 if (!isAsciiLetter(tmp.charAt(i))) { 697 throw new VCardException("Invalid Language: \"" + langval + "\""); 698 } 699 } 700 tmp = strArray[1]; 701 length = tmp.length(); 702 for (int i = 0; i < length; i++) { 703 if (!isAsciiLetter(tmp.charAt(i))) { 704 throw new VCardException("Invalid Language: \"" + langval + "\""); 705 } 706 } 707 mInterpreter.propertyParamType(VCardConstants.PARAM_LANGUAGE); 708 mInterpreter.propertyParamValue(langval); 709 } 710 711 private boolean isAsciiLetter(char ch) { 712 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 713 return true; 714 } 715 return false; 716 } 717 718 /** 719 * Mainly for "X-" type. This accepts any kind of type without check. 720 */ 721 protected void handleAnyParam(String paramName, String paramValue) { 722 mInterpreter.propertyParamType(paramName); 723 mInterpreter.propertyParamValue(paramValue); 724 } 725 726 protected void handlePropertyValue(String propertyName, String propertyValue) 727 throws IOException, VCardException { 728 final String upperEncoding = mCurrentEncoding.toUpperCase(); 729 if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) { 730 final long start = System.currentTimeMillis(); 731 final String result = getQuotedPrintable(propertyValue); 732 final ArrayList<String> v = new ArrayList<String>(); 733 v.add(result); 734 mInterpreter.propertyValues(v); 735 mTimeHandleQuotedPrintable += System.currentTimeMillis() - start; 736 } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64) 737 || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) { 738 final long start = System.currentTimeMillis(); 739 // It is very rare, but some BASE64 data may be so big that 740 // OutOfMemoryError occurs. To ignore such cases, use try-catch. 741 try { 742 final ArrayList<String> arrayList = new ArrayList<String>(); 743 arrayList.add(getBase64(propertyValue)); 744 mInterpreter.propertyValues(arrayList); 745 } catch (OutOfMemoryError error) { 746 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); 747 mInterpreter.propertyValues(null); 748 } 749 mTimeHandleBase64 += System.currentTimeMillis() - start; 750 } else { 751 if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") || 752 upperEncoding.startsWith("X-"))) { 753 Log.w(LOG_TAG, 754 String.format("The encoding \"%s\" is unsupported by vCard %s", 755 mCurrentEncoding, getVersionString())); 756 } 757 758 // Some device uses line folding defined in RFC 2425, which is not allowed 759 // in vCard 2.1 (while needed in vCard 3.0). 760 // 761 // e.g. 762 // BEGIN:VCARD 763 // VERSION:2.1 764 // N:;Omega;;; 765 // EMAIL;INTERNET:"Omega" 766 // <omega@example.com> 767 // FN:Omega 768 // END:VCARD 769 // 770 // The vCard above assumes that email address should become: 771 // "Omega" <omega@example.com> 772 // 773 // But vCard 2.1 requires Quote-Printable when a line contains line break(s). 774 // 775 // For more information about line folding, 776 // see "5.8.1. Line delimiting and folding" in RFC 2425. 777 // 778 // We take care of this case more formally in vCard 3.0, so we only need to 779 // do this in vCard 2.1. 780 if (getVersion() == VCardConfig.VERSION_21) { 781 StringBuilder builder = null; 782 while (true) { 783 final String nextLine = peekLine(); 784 // We don't need to care too much about this exceptional case, 785 // but we should not wrongly eat up "END:VCARD", since it critically 786 // breaks this parser's state machine. 787 // Thus we roughly look over the next line and confirm it is at least not 788 // "END:VCARD". This extra fee is worth paying. This is exceptional 789 // anyway. 790 if (!TextUtils.isEmpty(nextLine) && 791 nextLine.charAt(0) == ' ' && 792 !"END:VCARD".contains(nextLine.toUpperCase())) { 793 getLine(); // Drop the next line. 794 795 if (builder == null) { 796 builder = new StringBuilder(); 797 builder.append(propertyValue); 798 } 799 builder.append(nextLine.substring(1)); 800 } else { 801 break; 802 } 803 } 804 if (builder != null) { 805 propertyValue = builder.toString(); 806 } 807 } 808 809 final long start = System.currentTimeMillis(); 810 ArrayList<String> v = new ArrayList<String>(); 811 v.add(maybeUnescapeText(propertyValue)); 812 mInterpreter.propertyValues(v); 813 mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start; 814 } 815 } 816 817 /** 818 * <p> 819 * Parses and returns Quoted-Printable. 820 * </p> 821 * 822 * @param firstString The string following a parameter name and attributes. 823 * Example: "string" in 824 * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". 825 * @return whole Quoted-Printable string, including a given argument and 826 * following lines. Excludes the last empty line following to Quoted 827 * Printable lines. 828 * @throws IOException 829 * @throws VCardException 830 */ 831 private String getQuotedPrintable(String firstString) throws IOException, VCardException { 832 // Specifically, there may be some padding between = and CRLF. 833 // See the following: 834 // 835 // qp-line := *(qp-segment transport-padding CRLF) 836 // qp-part transport-padding 837 // qp-segment := qp-section *(SPACE / TAB) "=" 838 // ; Maximum length of 76 characters 839 // 840 // e.g. (from RFC 2045) 841 // Now's the time = 842 // for all folk to come= 843 // to the aid of their country. 844 if (firstString.trim().endsWith("=")) { 845 // remove "transport-padding" 846 int pos = firstString.length() - 1; 847 while (firstString.charAt(pos) != '=') { 848 } 849 StringBuilder builder = new StringBuilder(); 850 builder.append(firstString.substring(0, pos + 1)); 851 builder.append("\r\n"); 852 String line; 853 while (true) { 854 line = getLine(); 855 if (line == null) { 856 throw new VCardException("File ended during parsing a Quoted-Printable String"); 857 } 858 if (line.trim().endsWith("=")) { 859 // remove "transport-padding" 860 pos = line.length() - 1; 861 while (line.charAt(pos) != '=') { 862 } 863 builder.append(line.substring(0, pos + 1)); 864 builder.append("\r\n"); 865 } else { 866 builder.append(line); 867 break; 868 } 869 } 870 return builder.toString(); 871 } else { 872 return firstString; 873 } 874 } 875 876 protected String getBase64(String firstString) throws IOException, VCardException { 877 final StringBuilder builder = new StringBuilder(); 878 builder.append(firstString); 879 880 while (true) { 881 final String line = peekLine(); 882 if (line == null) { 883 throw new VCardException("File ended during parsing BASE64 binary"); 884 } 885 886 // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't 887 // have them. We try to detect those cases using semi-colon, given BASE64 doesn't 888 // contain it. Specifically BASE64 doesn't have semi-colon in it, so we should be able 889 // to detect the case safely. 890 if (line.contains(":")) { 891 if (getKnownPropertyNameSet().contains( 892 line.substring(0, line.indexOf(":")).toUpperCase())) { 893 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " + 894 "which must not contain semi-colon. Treat the line as next property."); 895 Log.w(LOG_TAG, "Problematic line: " + line.trim()); 896 break; 897 } 898 } 899 900 // Consume the line. 901 getLine(); 902 903 if (line.length() == 0) { 904 break; 905 } 906 builder.append(line); 907 } 908 909 return builder.toString(); 910 } 911 912 /** 913 * <p> 914 * Mainly for "ADR", "ORG", and "N" 915 * </p> 916 */ 917 /* 918 * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, 919 * Street, Locality, Region, Postal Code, Country Name orgparts = 920 * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are 921 * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family, 922 * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III, 923 * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a 924 * semicolon in this string, it must be escaped ; with a "\" character. We 925 * do not care the number of "strnosemi" here. We are not sure whether we 926 * should add "\" CRLF to each value. We exclude them for now. 927 */ 928 protected void handleMultiplePropertyValue(String propertyName, String propertyValue) 929 throws IOException, VCardException { 930 // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some 931 // softwares/devices 932 // emit such data. 933 if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { 934 propertyValue = getQuotedPrintable(propertyValue); 935 } 936 937 mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue, 938 getVersion())); 939 } 940 941 /* 942 * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an 943 * error toward the AGENT property. 944 * // TODO: Support AGENT property. 945 * item = 946 * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] 947 * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" 948 */ 949 protected void handleAgent(final String propertyValue) throws VCardException { 950 if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) { 951 // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. 952 return; 953 } else { 954 throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); 955 } 956 } 957 958 /** 959 * For vCard 3.0. 960 */ 961 protected String maybeUnescapeText(final String text) { 962 return text; 963 } 964 965 /** 966 * Returns unescaped String if the character should be unescaped. Return 967 * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" 968 * while "\x" should not be. 969 */ 970 protected String maybeUnescapeCharacter(final char ch) { 971 return unescapeCharacter(ch); 972 } 973 974 /* package */ static String unescapeCharacter(final char ch) { 975 // Original vCard 2.1 specification does not allow transformation 976 // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous 977 // implementation of 978 // this class allowed them, so keep it as is. 979 if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { 980 return String.valueOf(ch); 981 } else { 982 return null; 983 } 984 } 985 986 private void showPerformanceInfo() { 987 Log.d(LOG_TAG, "Total parsing time: " + mTimeTotal + " ms"); 988 Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms"); 989 Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord 990 + " ms"); 991 Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms"); 992 Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup 993 + " ms"); 994 Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms"); 995 Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms"); 996 Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue 997 + " ms"); 998 Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms"); 999 Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms"); 1000 } 1001 1002 /** 1003 * @return {@link VCardConfig#VERSION_21} 1004 */ 1005 protected int getVersion() { 1006 return VCardConfig.VERSION_21; 1007 } 1008 1009 /** 1010 * @return {@link VCardConfig#VERSION_30} 1011 */ 1012 protected String getVersionString() { 1013 return VCardConstants.VERSION_V21; 1014 } 1015 1016 protected Set<String> getKnownPropertyNameSet() { 1017 return VCardParser_V21.sKnownPropertyNameSet; 1018 } 1019 1020 protected Set<String> getKnownTypeSet() { 1021 return VCardParser_V21.sKnownTypeSet; 1022 } 1023 1024 protected Set<String> getKnownValueSet() { 1025 return VCardParser_V21.sKnownValueSet; 1026 } 1027 1028 protected Set<String> getAvailableEncodingSet() { 1029 return VCardParser_V21.sAvailableEncoding; 1030 } 1031 1032 protected String getDefaultEncoding() { 1033 return DEFAULT_ENCODING; 1034 } 1035 1036 1037 public void parse(InputStream is, VCardInterpreter interpreter) 1038 throws IOException, VCardException { 1039 if (is == null) { 1040 throw new NullPointerException("InputStream must not be null."); 1041 } 1042 1043 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 1044 mReader = new CustomBufferedReader(tmpReader); 1045 1046 mInterpreter = (interpreter != null ? interpreter : new EmptyInterpreter()); 1047 1048 final long start = System.currentTimeMillis(); 1049 if (mInterpreter != null) { 1050 mInterpreter.start(); 1051 } 1052 parseVCardFile(); 1053 if (mInterpreter != null) { 1054 mInterpreter.end(); 1055 } 1056 mTimeTotal += System.currentTimeMillis() - start; 1057 1058 if (VCardConfig.showPerformanceLog()) { 1059 showPerformanceInfo(); 1060 } 1061 } 1062 1063 public final void cancel() { 1064 Log.i(LOG_TAG, "ParserImpl received cancel operation."); 1065 mCanceled = true; 1066 } 1067} 1068