VCardParserImpl_V21.java revision 677ef21613a9d35053ec098444832ce4125a847e
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16package com.android.vcard; 17 18import android.text.TextUtils; 19import android.util.Log; 20 21import com.android.vcard.exception.VCardAgentNotSupportedException; 22import com.android.vcard.exception.VCardException; 23import com.android.vcard.exception.VCardInvalidCommentLineException; 24import com.android.vcard.exception.VCardInvalidLineException; 25import com.android.vcard.exception.VCardNestedException; 26import com.android.vcard.exception.VCardVersionException; 27 28import java.io.BufferedReader; 29import java.io.IOException; 30import java.io.InputStream; 31import java.io.InputStreamReader; 32import java.io.Reader; 33import java.util.ArrayList; 34import java.util.HashSet; 35import java.util.List; 36import java.util.Set; 37 38/** 39 * <p> 40 * Basic implementation achieving vCard parsing. Based on vCard 2.1. 41 * </p> 42 * @hide 43 */ 44/* package */ class VCardParserImpl_V21 { 45 private static final String LOG_TAG = "VCardParserImpl_V21"; 46 47 private static final class EmptyInterpreter implements VCardInterpreter { 48 @Override 49 public void end() { 50 } 51 @Override 52 public void endEntry() { 53 } 54 @Override 55 public void endProperty() { 56 } 57 @Override 58 public void propertyGroup(String group) { 59 } 60 @Override 61 public void propertyName(String name) { 62 } 63 @Override 64 public void propertyParamType(String type) { 65 } 66 @Override 67 public void propertyParamValue(String value) { 68 } 69 @Override 70 public void propertyValues(List<String> values) { 71 } 72 @Override 73 public void start() { 74 } 75 @Override 76 public void startEntry() { 77 } 78 @Override 79 public void startProperty() { 80 } 81 } 82 83 protected static final class CustomBufferedReader extends BufferedReader { 84 private long mTime; 85 86 /** 87 * Needed since "next line" may be null due to end of line. 88 */ 89 private boolean mNextLineIsValid; 90 private String mNextLine; 91 92 public CustomBufferedReader(Reader in) { 93 super(in); 94 } 95 96 @Override 97 public String readLine() throws IOException { 98 if (mNextLineIsValid) { 99 final String ret = mNextLine; 100 mNextLine = null; 101 mNextLineIsValid = false; 102 return ret; 103 } 104 105 final long start = System.currentTimeMillis(); 106 final String line = super.readLine(); 107 final long end = System.currentTimeMillis(); 108 mTime += end - start; 109 return line; 110 } 111 112 /** 113 * Read one line, but make this object store it in its queue. 114 */ 115 public String peekLine() throws IOException { 116 if (!mNextLineIsValid) { 117 final long start = System.currentTimeMillis(); 118 final String line = super.readLine(); 119 final long end = System.currentTimeMillis(); 120 mTime += end - start; 121 122 mNextLine = line; 123 mNextLineIsValid = true; 124 } 125 126 return mNextLine; 127 } 128 129 public long getTotalmillisecond() { 130 return mTime; 131 } 132 } 133 134 private static final String DEFAULT_ENCODING = "8BIT"; 135 136 protected boolean mCanceled; 137 protected VCardInterpreter mInterpreter; 138 139 protected final String mIntermediateCharset; 140 141 /** 142 * <p> 143 * The encoding type for deconding byte streams. This member variable is 144 * reset to a default encoding every time when a new item comes. 145 * </p> 146 * <p> 147 * "Encoding" in vCard is different from "Charset". It is mainly used for 148 * addresses, notes, images. "7BIT", "8BIT", "BASE64", and 149 * "QUOTED-PRINTABLE" are known examples. 150 * </p> 151 */ 152 protected String mCurrentEncoding; 153 154 /** 155 * <p> 156 * The reader object to be used internally. 157 * </p> 158 * <p> 159 * Developers should not directly read a line from this object. Use 160 * getLine() unless there some reason. 161 * </p> 162 */ 163 protected CustomBufferedReader mReader; 164 165 /** 166 * <p> 167 * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard 168 * specification, but happens to be seen in real world vCard. 169 * </p> 170 * <p> 171 * We just accept those invalid types after emitting a warning for each of it. 172 * </p> 173 */ 174 protected final Set<String> mUnknownTypeSet = new HashSet<String>(); 175 176 /** 177 * <p> 178 * Set for storing unkonwn VALUE attributes, which is not acceptable in 179 * vCard specification, but happens to be seen in real world vCard. 180 * </p> 181 * <p> 182 * We just accept those invalid types after emitting a warning for each of it. 183 * </p> 184 */ 185 protected final Set<String> mUnknownValueSet = new HashSet<String>(); 186 187 188 // In some cases, vCard is nested. Currently, we only consider the most 189 // interior vCard data. 190 // See v21_foma_1.vcf in test directory for more information. 191 // TODO: Don't ignore by using count, but read all of information outside vCard. 192 private int mNestCount; 193 194 // Used only for parsing END:VCARD. 195 private String mPreviousLine; 196 197 // For measuring performance. 198 private long mTimeTotal; 199 private long mTimeReadStartRecord; 200 private long mTimeReadEndRecord; 201 private long mTimeStartProperty; 202 private long mTimeEndProperty; 203 private long mTimeParseItems; 204 private long mTimeParseLineAndHandleGroup; 205 private long mTimeParsePropertyValues; 206 private long mTimeParseAdrOrgN; 207 private long mTimeHandleMiscPropertyValue; 208 private long mTimeHandleQuotedPrintable; 209 private long mTimeHandleBase64; 210 211 public VCardParserImpl_V21() { 212 this(VCardConfig.VCARD_TYPE_DEFAULT); 213 } 214 215 public VCardParserImpl_V21(int vcardType) { 216 if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) { 217 mNestCount = 1; 218 } 219 220 mIntermediateCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 221 } 222 223 /** 224 * <p> 225 * Parses the file at the given position. 226 * </p> 227 */ 228 // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre> 229 protected void parseVCardFile() throws IOException, VCardException { 230 boolean readingFirstFile = true; 231 while (true) { 232 if (mCanceled) { 233 Log.i(LOG_TAG, "Cancel request has come. exitting parse operation."); 234 break; 235 } 236 if (!parseOneVCard(readingFirstFile)) { 237 break; 238 } 239 readingFirstFile = false; 240 } 241 242 if (mNestCount > 0) { 243 boolean useCache = true; 244 for (int i = 0; i < mNestCount; i++) { 245 readEndVCard(useCache, true); 246 useCache = false; 247 } 248 } 249 } 250 251 /** 252 * @return true when a given property name is a valid property name. 253 */ 254 protected boolean isValidPropertyName(final String propertyName) { 255 if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || 256 propertyName.startsWith("X-")) 257 && !mUnknownTypeSet.contains(propertyName)) { 258 mUnknownTypeSet.add(propertyName); 259 Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); 260 } 261 return true; 262 } 263 264 /** 265 * @return String. It may be null, or its length may be 0 266 * @throws IOException 267 */ 268 protected String getLine() throws IOException { 269 return mReader.readLine(); 270 } 271 272 protected String peekLine() throws IOException { 273 return mReader.peekLine(); 274 } 275 276 /** 277 * @return String with it's length > 0 278 * @throws IOException 279 * @throws VCardException when the stream reached end of line 280 */ 281 protected String getNonEmptyLine() throws IOException, VCardException { 282 String line; 283 while (true) { 284 line = getLine(); 285 if (line == null) { 286 throw new VCardException("Reached end of buffer."); 287 } else if (line.trim().length() > 0) { 288 return line; 289 } 290 } 291 } 292 293 /* 294 * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF 295 * items *CRLF 296 * "END" [ws] ":" [ws] "VCARD" 297 */ 298 private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException { 299 boolean allowGarbage = false; 300 if (firstRead) { 301 if (mNestCount > 0) { 302 for (int i = 0; i < mNestCount; i++) { 303 if (!readBeginVCard(allowGarbage)) { 304 return false; 305 } 306 allowGarbage = true; 307 } 308 } 309 } 310 311 if (!readBeginVCard(allowGarbage)) { 312 return false; 313 } 314 final long beforeStartEntry = System.currentTimeMillis(); 315 mInterpreter.startEntry(); 316 mTimeReadStartRecord += System.currentTimeMillis() - beforeStartEntry; 317 318 final long beforeParseItems = System.currentTimeMillis(); 319 parseItems(); 320 mTimeParseItems += System.currentTimeMillis() - beforeParseItems; 321 322 readEndVCard(true, false); 323 324 final long beforeEndEntry = System.currentTimeMillis(); 325 mInterpreter.endEntry(); 326 mTimeReadEndRecord += System.currentTimeMillis() - beforeEndEntry; 327 return true; 328 } 329 330 /** 331 * @return True when successful. False when reaching the end of line 332 * @throws IOException 333 * @throws VCardException 334 */ 335 protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { 336 String line; 337 do { 338 while (true) { 339 line = getLine(); 340 if (line == null) { 341 return false; 342 } else if (line.trim().length() > 0) { 343 break; 344 } 345 } 346 final String[] strArray = line.split(":", 2); 347 final int length = strArray.length; 348 349 // Although vCard 2.1/3.0 specification does not allow lower cases, 350 // we found vCard file emitted by some external vCard expoter have such 351 // invalid Strings. 352 // So we allow it. 353 // e.g. 354 // BEGIN:vCard 355 if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") 356 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 357 return true; 358 } else if (!allowGarbage) { 359 if (mNestCount > 0) { 360 mPreviousLine = line; 361 return false; 362 } else { 363 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " 364 + "(Instead, \"" + line + "\" came)"); 365 } 366 } 367 } while (allowGarbage); 368 369 throw new VCardException("Reached where must not be reached."); 370 } 371 372 /** 373 * <p> 374 * The arguments useCache and allowGarbase are usually true and false 375 * accordingly when this function is called outside this function itself. 376 * </p> 377 * 378 * @param useCache When true, line is obtained from mPreviousline. 379 * Otherwise, getLine() is used. 380 * @param allowGarbage When true, ignore non "END:VCARD" line. 381 * @throws IOException 382 * @throws VCardException 383 */ 384 protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException, 385 VCardException { 386 String line; 387 do { 388 if (useCache) { 389 // Though vCard specification does not allow lower cases, 390 // some data may have them, so we allow it. 391 line = mPreviousLine; 392 } else { 393 while (true) { 394 line = getLine(); 395 if (line == null) { 396 throw new VCardException("Expected END:VCARD was not found."); 397 } else if (line.trim().length() > 0) { 398 break; 399 } 400 } 401 } 402 403 String[] strArray = line.split(":", 2); 404 if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END") 405 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 406 return; 407 } else if (!allowGarbage) { 408 throw new VCardException("END:VCARD != \"" + mPreviousLine + "\""); 409 } 410 useCache = false; 411 } while (allowGarbage); 412 } 413 414 /* 415 * items = *CRLF item / item 416 */ 417 protected void parseItems() throws IOException, VCardException { 418 boolean ended = false; 419 420 final long beforeBeginProperty = System.currentTimeMillis(); 421 mInterpreter.startProperty(); 422 mTimeStartProperty += System.currentTimeMillis() - beforeBeginProperty; 423 ended = parseItem(); 424 if (!ended) { 425 final long beforeEndProperty = System.currentTimeMillis(); 426 mInterpreter.endProperty(); 427 mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty; 428 } 429 430 while (!ended) { 431 final long beforeStartProperty = System.currentTimeMillis(); 432 mInterpreter.startProperty(); 433 mTimeStartProperty += System.currentTimeMillis() - beforeStartProperty; 434 try { 435 ended = parseItem(); 436 } catch (VCardInvalidCommentLineException e) { 437 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 438 ended = false; 439 } 440 441 if (!ended) { 442 final long beforeEndProperty = System.currentTimeMillis(); 443 mInterpreter.endProperty(); 444 mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty; 445 } 446 } 447 } 448 449 /* 450 * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" 451 * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts 452 * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] 453 * "AGENT" [params] ":" vcard CRLF 454 */ 455 protected boolean parseItem() throws IOException, VCardException { 456 mCurrentEncoding = DEFAULT_ENCODING; 457 458 final String line = getNonEmptyLine(); 459 long start = System.currentTimeMillis(); 460 461 String[] propertyNameAndValue = separateLineAndHandleGroup(line); 462 if (propertyNameAndValue == null) { 463 return true; 464 } 465 if (propertyNameAndValue.length != 2) { 466 throw new VCardInvalidLineException("Invalid line \"" + line + "\""); 467 } 468 String propertyName = propertyNameAndValue[0].toUpperCase(); 469 String propertyValue = propertyNameAndValue[1]; 470 471 mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start; 472 473 if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) { 474 start = System.currentTimeMillis(); 475 handleMultiplePropertyValue(propertyName, propertyValue); 476 mTimeParseAdrOrgN += System.currentTimeMillis() - start; 477 return false; 478 } else if (propertyName.equals("AGENT")) { 479 handleAgent(propertyValue); 480 return false; 481 } else if (isValidPropertyName(propertyName)) { 482 if (propertyName.equals("BEGIN")) { 483 if (propertyValue.equals("VCARD")) { 484 throw new VCardNestedException("This vCard has nested vCard data in it."); 485 } else { 486 throw new VCardException("Unknown BEGIN type: " + propertyValue); 487 } 488 } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) { 489 throw new VCardVersionException("Incompatible version: " + propertyValue + " != " 490 + getVersionString()); 491 } 492 start = System.currentTimeMillis(); 493 handlePropertyValue(propertyName, propertyValue); 494 mTimeParsePropertyValues += System.currentTimeMillis() - start; 495 return false; 496 } 497 498 throw new VCardException("Unknown property name: \"" + propertyName + "\""); 499 } 500 501 // For performance reason, the states for group and property name are merged into one. 502 static private final int STATE_GROUP_OR_PROPERTY_NAME = 0; 503 static private final int STATE_PARAMS = 1; 504 // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not. 505 static private final int STATE_PARAMS_IN_DQUOTE = 2; 506 507 protected String[] separateLineAndHandleGroup(String line) throws VCardException { 508 final String[] propertyNameAndValue = new String[2]; 509 final int length = line.length(); 510 if (length > 0 && line.charAt(0) == '#') { 511 throw new VCardInvalidCommentLineException(); 512 } 513 514 int state = STATE_GROUP_OR_PROPERTY_NAME; 515 int nameIndex = 0; 516 517 // This loop is developed so that we don't have to take care of bottle neck here. 518 // Refactor carefully when you need to do so. 519 for (int i = 0; i < length; i++) { 520 final char ch = line.charAt(i); 521 switch (state) { 522 case STATE_GROUP_OR_PROPERTY_NAME: { 523 if (ch == ':') { // End of a property name. 524 final String propertyName = line.substring(nameIndex, i); 525 if (propertyName.equalsIgnoreCase("END")) { 526 mPreviousLine = line; 527 return null; 528 } 529 mInterpreter.propertyName(propertyName); 530 propertyNameAndValue[0] = propertyName; 531 if (i < length - 1) { 532 propertyNameAndValue[1] = line.substring(i + 1); 533 } else { 534 propertyNameAndValue[1] = ""; 535 } 536 return propertyNameAndValue; 537 } else if (ch == '.') { // Each group is followed by the dot. 538 final String groupName = line.substring(nameIndex, i); 539 if (groupName.length() == 0) { 540 Log.w(LOG_TAG, "Empty group found. Ignoring."); 541 } else { 542 mInterpreter.propertyGroup(groupName); 543 } 544 nameIndex = i + 1; // Next should be another group or a property name. 545 } else if (ch == ';') { // End of property name and beginneng of parameters. 546 final String propertyName = line.substring(nameIndex, i); 547 if (propertyName.equalsIgnoreCase("END")) { 548 mPreviousLine = line; 549 return null; 550 } 551 mInterpreter.propertyName(propertyName); 552 propertyNameAndValue[0] = propertyName; 553 nameIndex = i + 1; 554 state = STATE_PARAMS; // Start parameter parsing. 555 } 556 // TODO: comma support (in vCard 3.0 and 4.0). 557 break; 558 } 559 case STATE_PARAMS: { 560 if (ch == '"') { 561 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 562 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 563 "Silently allow it"); 564 } 565 state = STATE_PARAMS_IN_DQUOTE; 566 } else if (ch == ';') { // Starts another param. 567 handleParams(line.substring(nameIndex, i)); 568 nameIndex = i + 1; 569 } else if (ch == ':') { // End of param and beginenning of values. 570 handleParams(line.substring(nameIndex, i)); 571 if (i < length - 1) { 572 propertyNameAndValue[1] = line.substring(i + 1); 573 } else { 574 propertyNameAndValue[1] = ""; 575 } 576 return propertyNameAndValue; 577 } 578 break; 579 } 580 case STATE_PARAMS_IN_DQUOTE: { 581 if (ch == '"') { 582 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 583 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 584 "Silently allow it"); 585 } 586 state = STATE_PARAMS; 587 } 588 break; 589 } 590 } 591 } 592 593 throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); 594 } 595 596 /* 597 * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / 598 * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] 599 * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" 600 * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" 601 * [ws] word / knowntype 602 */ 603 protected void handleParams(String params) throws VCardException { 604 final String[] strArray = params.split("=", 2); 605 if (strArray.length == 2) { 606 final String paramName = strArray[0].trim().toUpperCase(); 607 String paramValue = strArray[1].trim(); 608 if (paramName.equals("TYPE")) { 609 handleType(paramValue); 610 } else if (paramName.equals("VALUE")) { 611 handleValue(paramValue); 612 } else if (paramName.equals("ENCODING")) { 613 handleEncoding(paramValue); 614 } else if (paramName.equals("CHARSET")) { 615 handleCharset(paramValue); 616 } else if (paramName.equals("LANGUAGE")) { 617 handleLanguage(paramValue); 618 } else if (paramName.startsWith("X-")) { 619 handleAnyParam(paramName, paramValue); 620 } else { 621 throw new VCardException("Unknown type \"" + paramName + "\""); 622 } 623 } else { 624 handleParamWithoutName(strArray[0]); 625 } 626 } 627 628 /** 629 * vCard 3.0 parser implementation may throw VCardException. 630 */ 631 @SuppressWarnings("unused") 632 protected void handleParamWithoutName(final String paramValue) throws VCardException { 633 handleType(paramValue); 634 } 635 636 /* 637 * ptypeval = knowntype / "X-" word 638 */ 639 protected void handleType(final String ptypeval) { 640 if (!(getKnownTypeSet().contains(ptypeval.toUpperCase()) 641 || ptypeval.startsWith("X-")) 642 && !mUnknownTypeSet.contains(ptypeval)) { 643 mUnknownTypeSet.add(ptypeval); 644 Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); 645 } 646 mInterpreter.propertyParamType("TYPE"); 647 mInterpreter.propertyParamValue(ptypeval); 648 } 649 650 /* 651 * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word 652 */ 653 protected void handleValue(final String pvalueval) { 654 if (!(getKnownValueSet().contains(pvalueval.toUpperCase()) 655 || pvalueval.startsWith("X-") 656 || mUnknownValueSet.contains(pvalueval))) { 657 mUnknownValueSet.add(pvalueval); 658 Log.w(LOG_TAG, String.format( 659 "The value unsupported by TYPE of %s: ", getVersion(), pvalueval)); 660 } 661 mInterpreter.propertyParamType("VALUE"); 662 mInterpreter.propertyParamValue(pvalueval); 663 } 664 665 /* 666 * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word 667 */ 668 protected void handleEncoding(String pencodingval) throws VCardException { 669 if (getAvailableEncodingSet().contains(pencodingval) || 670 pencodingval.startsWith("X-")) { 671 mInterpreter.propertyParamType("ENCODING"); 672 mInterpreter.propertyParamValue(pencodingval); 673 mCurrentEncoding = pencodingval; 674 } else { 675 throw new VCardException("Unknown encoding \"" + pencodingval + "\""); 676 } 677 } 678 679 /** 680 * <p> 681 * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), 682 * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc. 683 * We allow any charset. 684 * </p> 685 */ 686 protected void handleCharset(String charsetval) { 687 mInterpreter.propertyParamType("CHARSET"); 688 mInterpreter.propertyParamValue(charsetval); 689 } 690 691 /** 692 * See also Section 7.1 of RFC 1521 693 */ 694 protected void handleLanguage(String langval) throws VCardException { 695 String[] strArray = langval.split("-"); 696 if (strArray.length != 2) { 697 throw new VCardException("Invalid Language: \"" + langval + "\""); 698 } 699 String tmp = strArray[0]; 700 int length = tmp.length(); 701 for (int i = 0; i < length; i++) { 702 if (!isAsciiLetter(tmp.charAt(i))) { 703 throw new VCardException("Invalid Language: \"" + langval + "\""); 704 } 705 } 706 tmp = strArray[1]; 707 length = tmp.length(); 708 for (int i = 0; i < length; i++) { 709 if (!isAsciiLetter(tmp.charAt(i))) { 710 throw new VCardException("Invalid Language: \"" + langval + "\""); 711 } 712 } 713 mInterpreter.propertyParamType(VCardConstants.PARAM_LANGUAGE); 714 mInterpreter.propertyParamValue(langval); 715 } 716 717 private boolean isAsciiLetter(char ch) { 718 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 719 return true; 720 } 721 return false; 722 } 723 724 /** 725 * Mainly for "X-" type. This accepts any kind of type without check. 726 */ 727 protected void handleAnyParam(String paramName, String paramValue) { 728 mInterpreter.propertyParamType(paramName); 729 mInterpreter.propertyParamValue(paramValue); 730 } 731 732 protected void handlePropertyValue(String propertyName, String propertyValue) 733 throws IOException, VCardException { 734 final String upperEncoding = mCurrentEncoding.toUpperCase(); 735 if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) { 736 final long start = System.currentTimeMillis(); 737 final String result = getQuotedPrintable(propertyValue); 738 final ArrayList<String> v = new ArrayList<String>(); 739 v.add(result); 740 mInterpreter.propertyValues(v); 741 mTimeHandleQuotedPrintable += System.currentTimeMillis() - start; 742 } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64) 743 || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) { 744 final long start = System.currentTimeMillis(); 745 // It is very rare, but some BASE64 data may be so big that 746 // OutOfMemoryError occurs. To ignore such cases, use try-catch. 747 try { 748 final ArrayList<String> arrayList = new ArrayList<String>(); 749 arrayList.add(getBase64(propertyValue)); 750 mInterpreter.propertyValues(arrayList); 751 } catch (OutOfMemoryError error) { 752 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); 753 mInterpreter.propertyValues(null); 754 } 755 mTimeHandleBase64 += System.currentTimeMillis() - start; 756 } else { 757 if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") || 758 upperEncoding.startsWith("X-"))) { 759 Log.w(LOG_TAG, 760 String.format("The encoding \"%s\" is unsupported by vCard %s", 761 mCurrentEncoding, getVersionString())); 762 } 763 764 // Some device uses line folding defined in RFC 2425, which is not allowed 765 // in vCard 2.1 (while needed in vCard 3.0). 766 // 767 // e.g. 768 // BEGIN:VCARD 769 // VERSION:2.1 770 // N:;Omega;;; 771 // EMAIL;INTERNET:"Omega" 772 // <omega@example.com> 773 // FN:Omega 774 // END:VCARD 775 // 776 // The vCard above assumes that email address should become: 777 // "Omega" <omega@example.com> 778 // 779 // But vCard 2.1 requires Quote-Printable when a line contains line break(s). 780 // 781 // For more information about line folding, 782 // see "5.8.1. Line delimiting and folding" in RFC 2425. 783 // 784 // We take care of this case more formally in vCard 3.0, so we only need to 785 // do this in vCard 2.1. 786 if (getVersion() == VCardConfig.VERSION_21) { 787 StringBuilder builder = null; 788 while (true) { 789 final String nextLine = peekLine(); 790 // We don't need to care too much about this exceptional case, 791 // but we should not wrongly eat up "END:VCARD", since it critically 792 // breaks this parser's state machine. 793 // Thus we roughly look over the next line and confirm it is at least not 794 // "END:VCARD". This extra fee is worth paying. This is exceptional 795 // anyway. 796 if (!TextUtils.isEmpty(nextLine) && 797 nextLine.charAt(0) == ' ' && 798 !"END:VCARD".contains(nextLine.toUpperCase())) { 799 getLine(); // Drop the next line. 800 801 if (builder == null) { 802 builder = new StringBuilder(); 803 builder.append(propertyValue); 804 } 805 builder.append(nextLine.substring(1)); 806 } else { 807 break; 808 } 809 } 810 if (builder != null) { 811 propertyValue = builder.toString(); 812 } 813 } 814 815 final long start = System.currentTimeMillis(); 816 ArrayList<String> v = new ArrayList<String>(); 817 v.add(maybeUnescapeText(propertyValue)); 818 mInterpreter.propertyValues(v); 819 mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start; 820 } 821 } 822 823 /** 824 * <p> 825 * Parses and returns Quoted-Printable. 826 * </p> 827 * 828 * @param firstString The string following a parameter name and attributes. 829 * Example: "string" in 830 * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". 831 * @return whole Quoted-Printable string, including a given argument and 832 * following lines. Excludes the last empty line following to Quoted 833 * Printable lines. 834 * @throws IOException 835 * @throws VCardException 836 */ 837 private String getQuotedPrintable(String firstString) throws IOException, VCardException { 838 // Specifically, there may be some padding between = and CRLF. 839 // See the following: 840 // 841 // qp-line := *(qp-segment transport-padding CRLF) 842 // qp-part transport-padding 843 // qp-segment := qp-section *(SPACE / TAB) "=" 844 // ; Maximum length of 76 characters 845 // 846 // e.g. (from RFC 2045) 847 // Now's the time = 848 // for all folk to come= 849 // to the aid of their country. 850 if (firstString.trim().endsWith("=")) { 851 // remove "transport-padding" 852 int pos = firstString.length() - 1; 853 while (firstString.charAt(pos) != '=') { 854 } 855 StringBuilder builder = new StringBuilder(); 856 builder.append(firstString.substring(0, pos + 1)); 857 builder.append("\r\n"); 858 String line; 859 while (true) { 860 line = getLine(); 861 if (line == null) { 862 throw new VCardException("File ended during parsing a Quoted-Printable String"); 863 } 864 if (line.trim().endsWith("=")) { 865 // remove "transport-padding" 866 pos = line.length() - 1; 867 while (line.charAt(pos) != '=') { 868 } 869 builder.append(line.substring(0, pos + 1)); 870 builder.append("\r\n"); 871 } else { 872 builder.append(line); 873 break; 874 } 875 } 876 return builder.toString(); 877 } else { 878 return firstString; 879 } 880 } 881 882 protected String getBase64(String firstString) throws IOException, VCardException { 883 final StringBuilder builder = new StringBuilder(); 884 builder.append(firstString); 885 886 while (true) { 887 final String line = peekLine(); 888 if (line == null) { 889 throw new VCardException("File ended during parsing BASE64 binary"); 890 } 891 892 // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't 893 // have them. We try to detect those cases using semi-colon, given BASE64 doesn't 894 // contain it. Specifically BASE64 doesn't have semi-colon in it, so we should be able 895 // to detect the case safely. 896 if (line.contains(":")) { 897 if (getKnownPropertyNameSet().contains( 898 line.substring(0, line.indexOf(":")).toUpperCase())) { 899 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " + 900 "which must not contain semi-colon. Treat the line as next property."); 901 Log.w(LOG_TAG, "Problematic line: " + line.trim()); 902 break; 903 } 904 } 905 906 // Consume the line. 907 getLine(); 908 909 if (line.length() == 0) { 910 break; 911 } 912 builder.append(line); 913 } 914 915 return builder.toString(); 916 } 917 918 /** 919 * <p> 920 * Mainly for "ADR", "ORG", and "N" 921 * </p> 922 */ 923 /* 924 * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, 925 * Street, Locality, Region, Postal Code, Country Name orgparts = 926 * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are 927 * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family, 928 * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III, 929 * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a 930 * semicolon in this string, it must be escaped ; with a "\" character. We 931 * do not care the number of "strnosemi" here. We are not sure whether we 932 * should add "\" CRLF to each value. We exclude them for now. 933 */ 934 protected void handleMultiplePropertyValue(String propertyName, String propertyValue) 935 throws IOException, VCardException { 936 // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some 937 // softwares/devices 938 // emit such data. 939 if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) { 940 propertyValue = getQuotedPrintable(propertyValue); 941 } 942 943 mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue, 944 getVersion())); 945 } 946 947 /* 948 * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an 949 * error toward the AGENT property. 950 * // TODO: Support AGENT property. 951 * item = 952 * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] 953 * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" 954 */ 955 protected void handleAgent(final String propertyValue) throws VCardException { 956 if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) { 957 // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. 958 return; 959 } else { 960 throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); 961 } 962 } 963 964 /** 965 * For vCard 3.0. 966 */ 967 protected String maybeUnescapeText(final String text) { 968 return text; 969 } 970 971 /** 972 * Returns unescaped String if the character should be unescaped. Return 973 * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" 974 * while "\x" should not be. 975 */ 976 protected String maybeUnescapeCharacter(final char ch) { 977 return unescapeCharacter(ch); 978 } 979 980 /* package */ static String unescapeCharacter(final char ch) { 981 // Original vCard 2.1 specification does not allow transformation 982 // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous 983 // implementation of 984 // this class allowed them, so keep it as is. 985 if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { 986 return String.valueOf(ch); 987 } else { 988 return null; 989 } 990 } 991 992 private void showPerformanceInfo() { 993 Log.d(LOG_TAG, "Total parsing time: " + mTimeTotal + " ms"); 994 Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms"); 995 Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord 996 + " ms"); 997 Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms"); 998 Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup 999 + " ms"); 1000 Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms"); 1001 Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms"); 1002 Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue 1003 + " ms"); 1004 Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms"); 1005 Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms"); 1006 } 1007 1008 /** 1009 * @return {@link VCardConfig#VERSION_21} 1010 */ 1011 protected int getVersion() { 1012 return VCardConfig.VERSION_21; 1013 } 1014 1015 /** 1016 * @return {@link VCardConfig#VERSION_30} 1017 */ 1018 protected String getVersionString() { 1019 return VCardConstants.VERSION_V21; 1020 } 1021 1022 protected Set<String> getKnownPropertyNameSet() { 1023 return VCardParser_V21.sKnownPropertyNameSet; 1024 } 1025 1026 protected Set<String> getKnownTypeSet() { 1027 return VCardParser_V21.sKnownTypeSet; 1028 } 1029 1030 protected Set<String> getKnownValueSet() { 1031 return VCardParser_V21.sKnownValueSet; 1032 } 1033 1034 protected Set<String> getAvailableEncodingSet() { 1035 return VCardParser_V21.sAvailableEncoding; 1036 } 1037 1038 protected String getDefaultEncoding() { 1039 return DEFAULT_ENCODING; 1040 } 1041 1042 1043 public void parse(InputStream is, VCardInterpreter interpreter) 1044 throws IOException, VCardException { 1045 if (is == null) { 1046 throw new NullPointerException("InputStream must not be null."); 1047 } 1048 1049 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 1050 mReader = new CustomBufferedReader(tmpReader); 1051 1052 mInterpreter = (interpreter != null ? interpreter : new EmptyInterpreter()); 1053 1054 final long start = System.currentTimeMillis(); 1055 if (mInterpreter != null) { 1056 mInterpreter.start(); 1057 } 1058 parseVCardFile(); 1059 if (mInterpreter != null) { 1060 mInterpreter.end(); 1061 } 1062 mTimeTotal += System.currentTimeMillis() - start; 1063 1064 if (VCardConfig.showPerformanceLog()) { 1065 showPerformanceInfo(); 1066 } 1067 } 1068 1069 public final void cancel() { 1070 Log.i(LOG_TAG, "ParserImpl received cancel operation."); 1071 mCanceled = true; 1072 } 1073} 1074