VCardParserImpl_V21.java revision 1de396f6df89363169d3a2e61a61fa98d12c1ef8
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16package com.android.vcard; 17 18import com.android.vcard.exception.VCardAgentNotSupportedException; 19import com.android.vcard.exception.VCardException; 20import com.android.vcard.exception.VCardInvalidCommentLineException; 21import com.android.vcard.exception.VCardInvalidLineException; 22import com.android.vcard.exception.VCardVersionException; 23 24import android.text.TextUtils; 25import android.util.Base64; 26import android.util.Log; 27 28import java.io.BufferedReader; 29import java.io.IOException; 30import java.io.InputStream; 31import java.io.InputStreamReader; 32import java.io.Reader; 33import java.util.ArrayList; 34import java.util.Arrays; 35import java.util.Collection; 36import java.util.HashSet; 37import java.util.List; 38import java.util.Set; 39 40/** 41 * <p> 42 * Basic implementation achieving vCard parsing. Based on vCard 2.1. 43 * </p> 44 * @hide 45 */ 46/* package */ class VCardParserImpl_V21 { 47 private static final String LOG_TAG = VCardConstants.LOG_TAG; 48 49 protected static final class CustomBufferedReader extends BufferedReader { 50 private long mTime; 51 52 /** 53 * Needed since "next line" may be null due to end of line. 54 */ 55 private boolean mNextLineIsValid; 56 private String mNextLine; 57 58 public CustomBufferedReader(Reader in) { 59 super(in); 60 } 61 62 @Override 63 public String readLine() throws IOException { 64 if (mNextLineIsValid) { 65 final String ret = mNextLine; 66 mNextLine = null; 67 mNextLineIsValid = false; 68 return ret; 69 } 70 71 final long start = System.currentTimeMillis(); 72 final String line = super.readLine(); 73 final long end = System.currentTimeMillis(); 74 mTime += end - start; 75 return line; 76 } 77 78 /** 79 * Read one line, but make this object store it in its queue. 80 */ 81 public String peekLine() throws IOException { 82 if (!mNextLineIsValid) { 83 final long start = System.currentTimeMillis(); 84 final String line = super.readLine(); 85 final long end = System.currentTimeMillis(); 86 mTime += end - start; 87 88 mNextLine = line; 89 mNextLineIsValid = true; 90 } 91 92 return mNextLine; 93 } 94 95 public long getTotalmillisecond() { 96 return mTime; 97 } 98 } 99 100 private static final String DEFAULT_ENCODING = "8BIT"; 101 private static final String DEFAULT_CHARSET = "UTF-8"; 102 103 protected final String mIntermediateCharset; 104 105 private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>(); 106 private boolean mCanceled; 107 108 /** 109 * <p> 110 * The encoding type for deconding byte streams. This member variable is 111 * reset to a default encoding every time when a new item comes. 112 * </p> 113 * <p> 114 * "Encoding" in vCard is different from "Charset". It is mainly used for 115 * addresses, notes, images. "7BIT", "8BIT", "BASE64", and 116 * "QUOTED-PRINTABLE" are known examples. 117 * </p> 118 */ 119 protected String mCurrentEncoding; 120 121 protected String mCurrentCharset; 122 123 /** 124 * <p> 125 * The reader object to be used internally. 126 * </p> 127 * <p> 128 * Developers should not directly read a line from this object. Use 129 * getLine() unless there some reason. 130 * </p> 131 */ 132 protected CustomBufferedReader mReader; 133 134 /** 135 * <p> 136 * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard 137 * specification, but happens to be seen in real world vCard. 138 * </p> 139 * <p> 140 * We just accept those invalid types after emitting a warning for each of it. 141 * </p> 142 */ 143 protected final Set<String> mUnknownTypeSet = new HashSet<String>(); 144 145 /** 146 * <p> 147 * Set for storing unkonwn VALUE attributes, which is not acceptable in 148 * vCard specification, but happens to be seen in real world vCard. 149 * </p> 150 * <p> 151 * We just accept those invalid types after emitting a warning for each of it. 152 * </p> 153 */ 154 protected final Set<String> mUnknownValueSet = new HashSet<String>(); 155 156 157 public VCardParserImpl_V21() { 158 this(VCardConfig.VCARD_TYPE_DEFAULT); 159 } 160 161 public VCardParserImpl_V21(int vcardType) { 162 mIntermediateCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 163 } 164 165 /** 166 * <p> 167 * Parses the file at the given position. 168 * </p> 169 */ 170 // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre> 171 protected void parseVCardFile() throws IOException, VCardException { 172 while (true) { 173 synchronized (this) { 174 if (mCanceled) { 175 Log.i(LOG_TAG, "Cancel request has come. exitting parse operation."); 176 break; 177 } 178 } 179 if (!parseOneVCard()) { 180 break; 181 } 182 } 183 } 184 185 /** 186 * @return true when a given property name is a valid property name. 187 */ 188 protected boolean isValidPropertyName(final String propertyName) { 189 if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || 190 propertyName.startsWith("X-")) 191 && !mUnknownTypeSet.contains(propertyName)) { 192 mUnknownTypeSet.add(propertyName); 193 Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); 194 } 195 return true; 196 } 197 198 /** 199 * @return String. It may be null, or its length may be 0 200 * @throws IOException 201 */ 202 protected String getLine() throws IOException { 203 return mReader.readLine(); 204 } 205 206 protected String peekLine() throws IOException { 207 return mReader.peekLine(); 208 } 209 210 /** 211 * @return String with it's length > 0 212 * @throws IOException 213 * @throws VCardException when the stream reached end of line 214 */ 215 protected String getNonEmptyLine() throws IOException, VCardException { 216 String line; 217 while (true) { 218 line = getLine(); 219 if (line == null) { 220 throw new VCardException("Reached end of buffer."); 221 } else if (line.trim().length() > 0) { 222 return line; 223 } 224 } 225 } 226 227 /** 228 * <code> 229 * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF 230 * items *CRLF 231 * "END" [ws] ":" [ws] "VCARD" 232 * </code> 233 */ 234 private boolean parseOneVCard() throws IOException, VCardException { 235 // reset for this entire vCard. 236 mCurrentEncoding = DEFAULT_ENCODING; 237 mCurrentCharset = DEFAULT_CHARSET; 238 239 boolean allowGarbage = false; 240 if (!readBeginVCard(allowGarbage)) { 241 return false; 242 } 243 for (VCardInterpreter interpreter : mInterpreterList) { 244 interpreter.onEntryStarted(); 245 } 246 parseItems(); 247 for (VCardInterpreter interpreter : mInterpreterList) { 248 interpreter.onEntryEnded(); 249 } 250 return true; 251 } 252 253 /** 254 * @return True when successful. False when reaching the end of line 255 * @throws IOException 256 * @throws VCardException 257 */ 258 protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { 259 // TODO: use consructPropertyLine(). 260 String line; 261 do { 262 while (true) { 263 line = getLine(); 264 if (line == null) { 265 return false; 266 } else if (line.trim().length() > 0) { 267 break; 268 } 269 } 270 final String[] strArray = line.split(":", 2); 271 final int length = strArray.length; 272 273 // Although vCard 2.1/3.0 specification does not allow lower cases, 274 // we found vCard file emitted by some external vCard expoter have such 275 // invalid Strings. 276 // e.g. BEGIN:vCard 277 if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") 278 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 279 return true; 280 } else if (!allowGarbage) { 281 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " 282 + "(Instead, \"" + line + "\" came)"); 283 } 284 } while (allowGarbage); 285 286 throw new VCardException("Reached where must not be reached."); 287 } 288 289 /** 290 * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and 291 * "BEGIN:VCARD" in nested vCard. 292 */ 293 /* 294 * items = *CRLF item / item 295 * 296 * Note: BEGIN/END aren't include in the original spec while this method handles them. 297 */ 298 protected void parseItems() throws IOException, VCardException { 299 boolean ended = false; 300 301 try { 302 ended = parseItem(); 303 } catch (VCardInvalidCommentLineException e) { 304 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 305 } 306 307 while (!ended) { 308 try { 309 ended = parseItem(); 310 } catch (VCardInvalidCommentLineException e) { 311 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 312 } 313 } 314 } 315 316 /* 317 * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" 318 * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts 319 * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] 320 * "AGENT" [params] ":" vcard CRLF 321 */ 322 protected boolean parseItem() throws IOException, VCardException { 323 // Reset for an item. 324 mCurrentEncoding = DEFAULT_ENCODING; 325 326 final String line = getNonEmptyLine(); 327 final VCardProperty propertyData = constructPropertyData(line); 328 329 final String propertyNameUpper = propertyData.getName().toUpperCase(); 330 final String propertyRawValue = propertyData.getRawValue(); 331 332 if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) { 333 if (propertyRawValue.equalsIgnoreCase("VCARD")) { 334 handleNest(); 335 } else { 336 throw new VCardException("Unknown BEGIN type: " + propertyRawValue); 337 } 338 } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) { 339 if (propertyRawValue.equalsIgnoreCase("VCARD")) { 340 return true; // Ended. 341 } else { 342 throw new VCardException("Unknown END type: " + propertyRawValue); 343 } 344 } else { 345 parseItemInter(propertyData, propertyNameUpper); 346 } 347 return false; 348 } 349 350 private void parseItemInter(VCardProperty property, String propertyNameUpper) 351 throws IOException, VCardException { 352 String propertyRawValue = property.getRawValue(); 353 if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) { 354 handleAgent(property); 355 } else if (isValidPropertyName(propertyNameUpper)) { 356 if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) && 357 !propertyRawValue.equals(getVersionString())) { 358 throw new VCardVersionException( 359 "Incompatible version: " + propertyRawValue + " != " + getVersionString()); 360 } 361 handlePropertyValue(property, propertyNameUpper); 362 } else { 363 throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\""); 364 } 365 } 366 367 private void handleNest() throws IOException, VCardException { 368 for (VCardInterpreter interpreter : mInterpreterList) { 369 interpreter.onEntryStarted(); 370 } 371 parseItems(); 372 for (VCardInterpreter interpreter : mInterpreterList) { 373 interpreter.onEntryEnded(); 374 } 375 } 376 377 // For performance reason, the states for group and property name are merged into one. 378 static private final int STATE_GROUP_OR_PROPERTY_NAME = 0; 379 static private final int STATE_PARAMS = 1; 380 // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not. 381 static private final int STATE_PARAMS_IN_DQUOTE = 2; 382 383 protected VCardProperty constructPropertyData(String line) throws VCardException { 384 final VCardProperty propertyData = new VCardProperty(); 385 386 final int length = line.length(); 387 if (length > 0 && line.charAt(0) == '#') { 388 throw new VCardInvalidCommentLineException(); 389 } 390 391 int state = STATE_GROUP_OR_PROPERTY_NAME; 392 int nameIndex = 0; 393 394 // This loop is developed so that we don't have to take care of bottle neck here. 395 // Refactor carefully when you need to do so. 396 for (int i = 0; i < length; i++) { 397 final char ch = line.charAt(i); 398 switch (state) { 399 case STATE_GROUP_OR_PROPERTY_NAME: { 400 if (ch == ':') { // End of a property name. 401 final String propertyName = line.substring(nameIndex, i); 402 propertyData.setName(propertyName); 403 propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : ""); 404 return propertyData; 405 } else if (ch == '.') { // Each group is followed by the dot. 406 final String groupName = line.substring(nameIndex, i); 407 if (groupName.length() == 0) { 408 Log.w(LOG_TAG, "Empty group found. Ignoring."); 409 } else { 410 propertyData.addGroup(groupName); 411 } 412 nameIndex = i + 1; // Next should be another group or a property name. 413 } else if (ch == ';') { // End of property name and beginneng of parameters. 414 final String propertyName = line.substring(nameIndex, i); 415 propertyData.setName(propertyName); 416 nameIndex = i + 1; 417 state = STATE_PARAMS; // Start parameter parsing. 418 } 419 // TODO: comma support (in vCard 3.0 and 4.0). 420 break; 421 } 422 case STATE_PARAMS: { 423 if (ch == '"') { 424 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 425 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 426 "Silently allow it"); 427 } 428 state = STATE_PARAMS_IN_DQUOTE; 429 } else if (ch == ';') { // Starts another param. 430 handleParams(propertyData, line.substring(nameIndex, i)); 431 nameIndex = i + 1; 432 } else if (ch == ':') { // End of param and beginenning of values. 433 handleParams(propertyData, line.substring(nameIndex, i)); 434 propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : ""); 435 return propertyData; 436 } 437 break; 438 } 439 case STATE_PARAMS_IN_DQUOTE: { 440 if (ch == '"') { 441 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 442 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 443 "Silently allow it"); 444 } 445 state = STATE_PARAMS; 446 } 447 break; 448 } 449 } 450 } 451 452 throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); 453 } 454 455 /* 456 * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / 457 * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] 458 * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" 459 * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" 460 * [ws] word / knowntype 461 */ 462 protected void handleParams(VCardProperty propertyData, String params) 463 throws VCardException { 464 final String[] strArray = params.split("=", 2); 465 if (strArray.length == 2) { 466 final String paramName = strArray[0].trim().toUpperCase(); 467 String paramValue = strArray[1].trim(); 468 if (paramName.equals("TYPE")) { 469 handleType(propertyData, paramValue); 470 } else if (paramName.equals("VALUE")) { 471 handleValue(propertyData, paramValue); 472 } else if (paramName.equals("ENCODING")) { 473 handleEncoding(propertyData, paramValue); 474 } else if (paramName.equals("CHARSET")) { 475 handleCharset(propertyData, paramValue); 476 } else if (paramName.equals("LANGUAGE")) { 477 handleLanguage(propertyData, paramValue); 478 } else if (paramName.startsWith("X-")) { 479 handleAnyParam(propertyData, paramName, paramValue); 480 } else { 481 throw new VCardException("Unknown type \"" + paramName + "\""); 482 } 483 } else { 484 handleParamWithoutName(propertyData, strArray[0]); 485 } 486 } 487 488 /** 489 * vCard 3.0 parser implementation may throw VCardException. 490 */ 491 protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) { 492 handleType(propertyData, paramValue); 493 } 494 495 /* 496 * ptypeval = knowntype / "X-" word 497 */ 498 protected void handleType(VCardProperty propertyData, final String ptypeval) { 499 if (!(getKnownTypeSet().contains(ptypeval.toUpperCase()) 500 || ptypeval.startsWith("X-")) 501 && !mUnknownTypeSet.contains(ptypeval)) { 502 mUnknownTypeSet.add(ptypeval); 503 Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); 504 } 505 propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval); 506 } 507 508 /* 509 * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word 510 */ 511 protected void handleValue(VCardProperty propertyData, final String pvalueval) { 512 if (!(getKnownValueSet().contains(pvalueval.toUpperCase()) 513 || pvalueval.startsWith("X-") 514 || mUnknownValueSet.contains(pvalueval))) { 515 mUnknownValueSet.add(pvalueval); 516 Log.w(LOG_TAG, String.format( 517 "The value unsupported by TYPE of %s: ", getVersion(), pvalueval)); 518 } 519 propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval); 520 } 521 522 /* 523 * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word 524 */ 525 protected void handleEncoding(VCardProperty propertyData, String pencodingval) 526 throws VCardException { 527 if (getAvailableEncodingSet().contains(pencodingval) || 528 pencodingval.startsWith("X-")) { 529 propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval); 530 // Update encoding right away, as this is needed to understanding other params. 531 mCurrentEncoding = pencodingval; 532 } else { 533 throw new VCardException("Unknown encoding \"" + pencodingval + "\""); 534 } 535 } 536 537 /** 538 * <p> 539 * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), 540 * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc. 541 * We allow any charset. 542 * </p> 543 */ 544 protected void handleCharset(VCardProperty propertyData, String charsetval) { 545 mCurrentCharset = charsetval; 546 propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval); 547 } 548 549 /** 550 * See also Section 7.1 of RFC 1521 551 */ 552 protected void handleLanguage(VCardProperty propertyData, String langval) 553 throws VCardException { 554 String[] strArray = langval.split("-"); 555 if (strArray.length != 2) { 556 throw new VCardException("Invalid Language: \"" + langval + "\""); 557 } 558 String tmp = strArray[0]; 559 int length = tmp.length(); 560 for (int i = 0; i < length; i++) { 561 if (!isAsciiLetter(tmp.charAt(i))) { 562 throw new VCardException("Invalid Language: \"" + langval + "\""); 563 } 564 } 565 tmp = strArray[1]; 566 length = tmp.length(); 567 for (int i = 0; i < length; i++) { 568 if (!isAsciiLetter(tmp.charAt(i))) { 569 throw new VCardException("Invalid Language: \"" + langval + "\""); 570 } 571 } 572 propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval); 573 } 574 575 private boolean isAsciiLetter(char ch) { 576 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 577 return true; 578 } 579 return false; 580 } 581 582 /** 583 * Mainly for "X-" type. This accepts any kind of type without check. 584 */ 585 protected void handleAnyParam( 586 VCardProperty propertyData, String paramName, String paramValue) { 587 propertyData.addParameter(paramName, paramValue); 588 } 589 590 protected void handlePropertyValue(VCardProperty property, String propertyName) 591 throws IOException, VCardException { 592 final String propertyNameUpper = property.getName().toUpperCase(); 593 String propertyRawValue = property.getRawValue(); 594 final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 595 final Collection<String> charsetCollection = 596 property.getParameters(VCardConstants.PARAM_CHARSET); 597 String targetCharset = 598 ((charsetCollection != null) ? charsetCollection.iterator().next() : null); 599 if (TextUtils.isEmpty(targetCharset)) { 600 targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET; 601 } 602 603 // TODO: have "separableProperty" which reflects vCard spec.. 604 if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR) 605 || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG) 606 || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) { 607 List<String> encodedValueList = new ArrayList<String>(); 608 609 // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit 610 // such data. 611 if (mCurrentEncoding.equalsIgnoreCase(VCardConstants.PARAM_ENCODING_QP)) { 612 // First we retrieve Quoted-Printable String from vCard entry, which may include 613 // multiple lines. 614 final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue); 615 616 // "Raw value" from the view of users should contain all part of QP string. 617 // TODO: add test for this handling 618 property.setRawValue(quotedPrintablePart); 619 620 // We split Quoted-Printable String using semi-colon before decoding it, as 621 // the Quoted-Printable may have semi-colon, which confuses splitter. 622 final List<String> quotedPrintableValueList = 623 VCardUtils.constructListFromValue(quotedPrintablePart, getVersion()); 624 for (String quotedPrintableValue : quotedPrintableValueList) { 625 String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue, 626 false, sourceCharset, targetCharset); 627 encodedValueList.add(encoded); 628 } 629 } else { 630 final List<String> rawValueList = 631 VCardUtils.constructListFromValue(propertyRawValue, getVersion()); 632 for (String rawValue : rawValueList) { 633 encodedValueList.add(VCardUtils.convertStringCharset( 634 rawValue, sourceCharset, targetCharset)); 635 } 636 } 637 638 property.setValues(encodedValueList); 639 for (VCardInterpreter interpreter : mInterpreterList) { 640 interpreter.onPropertyCreated(property); 641 } 642 return; 643 } 644 645 final String upperEncoding = mCurrentEncoding.toUpperCase(); 646 if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) { 647 final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue); 648 final String propertyEncodedValue = 649 VCardUtils.parseQuotedPrintable(quotedPrintablePart, 650 false, sourceCharset, targetCharset); 651 property.setRawValue(quotedPrintablePart); 652 property.setValues(propertyEncodedValue); 653 for (VCardInterpreter interpreter : mInterpreterList) { 654 interpreter.onPropertyCreated(property); 655 } 656 } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64) 657 || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) { 658 // It is very rare, but some BASE64 data may be so big that 659 // OutOfMemoryError occurs. To ignore such cases, use try-catch. 660 try { 661 property.setByteValue(Base64.decode(getBase64(propertyRawValue), Base64.DEFAULT)); 662 for (VCardInterpreter interpreter : mInterpreterList) { 663 interpreter.onPropertyCreated(property); 664 } 665 } catch (OutOfMemoryError error) { 666 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); 667 for (VCardInterpreter interpreter : mInterpreterList) { 668 interpreter.onPropertyCreated(property); 669 } 670 } 671 } else { 672 if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") || 673 upperEncoding.startsWith("X-"))) { 674 Log.w(LOG_TAG, 675 String.format("The encoding \"%s\" is unsupported by vCard %s", 676 mCurrentEncoding, getVersionString())); 677 } 678 679 // Some device uses line folding defined in RFC 2425, which is not allowed 680 // in vCard 2.1 (while needed in vCard 3.0). 681 // 682 // e.g. 683 // BEGIN:VCARD 684 // VERSION:2.1 685 // N:;Omega;;; 686 // EMAIL;INTERNET:"Omega" 687 // <omega@example.com> 688 // FN:Omega 689 // END:VCARD 690 // 691 // The vCard above assumes that email address should become: 692 // "Omega" <omega@example.com> 693 // 694 // But vCard 2.1 requires Quote-Printable when a line contains line break(s). 695 // 696 // For more information about line folding, 697 // see "5.8.1. Line delimiting and folding" in RFC 2425. 698 // 699 // We take care of this case more formally in vCard 3.0, so we only need to 700 // do this in vCard 2.1. 701 if (getVersion() == VCardConfig.VERSION_21) { 702 StringBuilder builder = null; 703 while (true) { 704 final String nextLine = peekLine(); 705 // We don't need to care too much about this exceptional case, 706 // but we should not wrongly eat up "END:VCARD", since it critically 707 // breaks this parser's state machine. 708 // Thus we roughly look over the next line and confirm it is at least not 709 // "END:VCARD". This extra fee is worth paying. This is exceptional 710 // anyway. 711 if (!TextUtils.isEmpty(nextLine) && 712 nextLine.charAt(0) == ' ' && 713 !"END:VCARD".contains(nextLine.toUpperCase())) { 714 getLine(); // Drop the next line. 715 716 if (builder == null) { 717 builder = new StringBuilder(); 718 builder.append(propertyRawValue); 719 } 720 builder.append(nextLine.substring(1)); 721 } else { 722 break; 723 } 724 } 725 if (builder != null) { 726 propertyRawValue = builder.toString(); 727 } 728 } 729 730 ArrayList<String> propertyValueList = new ArrayList<String>(); 731 String value = VCardUtils.convertStringCharset( 732 maybeUnescapeText(propertyRawValue), sourceCharset, targetCharset); 733 propertyValueList.add(value); 734 property.setValues(propertyValueList); 735 for (VCardInterpreter interpreter : mInterpreterList) { 736 interpreter.onPropertyCreated(property); 737 } 738 } 739 } 740 741 /** 742 * <p> 743 * Parses and returns Quoted-Printable. 744 * </p> 745 * 746 * @param firstString The string following a parameter name and attributes. 747 * Example: "string" in 748 * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". 749 * @return whole Quoted-Printable string, including a given argument and 750 * following lines. Excludes the last empty line following to Quoted 751 * Printable lines. 752 * @throws IOException 753 * @throws VCardException 754 */ 755 private String getQuotedPrintablePart(String firstString) 756 throws IOException, VCardException { 757 // Specifically, there may be some padding between = and CRLF. 758 // See the following: 759 // 760 // qp-line := *(qp-segment transport-padding CRLF) 761 // qp-part transport-padding 762 // qp-segment := qp-section *(SPACE / TAB) "=" 763 // ; Maximum length of 76 characters 764 // 765 // e.g. (from RFC 2045) 766 // Now's the time = 767 // for all folk to come= 768 // to the aid of their country. 769 if (firstString.trim().endsWith("=")) { 770 // remove "transport-padding" 771 int pos = firstString.length() - 1; 772 while (firstString.charAt(pos) != '=') { 773 } 774 StringBuilder builder = new StringBuilder(); 775 builder.append(firstString.substring(0, pos + 1)); 776 builder.append("\r\n"); 777 String line; 778 while (true) { 779 line = getLine(); 780 if (line == null) { 781 throw new VCardException("File ended during parsing a Quoted-Printable String"); 782 } 783 if (line.trim().endsWith("=")) { 784 // remove "transport-padding" 785 pos = line.length() - 1; 786 while (line.charAt(pos) != '=') { 787 } 788 builder.append(line.substring(0, pos + 1)); 789 builder.append("\r\n"); 790 } else { 791 builder.append(line); 792 break; 793 } 794 } 795 return builder.toString(); 796 } else { 797 return firstString; 798 } 799 } 800 801 protected String getBase64(String firstString) throws IOException, VCardException { 802 final StringBuilder builder = new StringBuilder(); 803 builder.append(firstString); 804 805 while (true) { 806 final String line = peekLine(); 807 if (line == null) { 808 throw new VCardException("File ended during parsing BASE64 binary"); 809 } 810 811 // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't 812 // have them. We try to detect those cases using semi-colon, given BASE64 doesn't 813 // contain it. Specifically BASE64 doesn't have semi-colon in it, so we should be able 814 // to detect the case safely. 815 if (line.contains(":")) { 816 if (getKnownPropertyNameSet().contains( 817 line.substring(0, line.indexOf(":")).toUpperCase())) { 818 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " + 819 "which must not contain semi-colon. Treat the line as next property."); 820 Log.w(LOG_TAG, "Problematic line: " + line.trim()); 821 break; 822 } 823 } 824 825 // Consume the line. 826 getLine(); 827 828 if (line.length() == 0) { 829 break; 830 } 831 builder.append(line); 832 } 833 834 return builder.toString(); 835 } 836 837 /* 838 * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an 839 * error toward the AGENT property. 840 * // TODO: Support AGENT property. 841 * item = 842 * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] 843 * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" 844 */ 845 protected void handleAgent(final VCardProperty property) throws VCardException { 846 if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) { 847 // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. 848 for (VCardInterpreter interpreter : mInterpreterList) { 849 interpreter.onPropertyCreated(property); 850 } 851 return; 852 } else { 853 throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); 854 } 855 } 856 857 /** 858 * For vCard 3.0. 859 */ 860 protected String maybeUnescapeText(final String text) { 861 return text; 862 } 863 864 /** 865 * Returns unescaped String if the character should be unescaped. Return 866 * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" 867 * while "\x" should not be. 868 */ 869 protected String maybeUnescapeCharacter(final char ch) { 870 return unescapeCharacter(ch); 871 } 872 873 /* package */ static String unescapeCharacter(final char ch) { 874 // Original vCard 2.1 specification does not allow transformation 875 // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous 876 // implementation of 877 // this class allowed them, so keep it as is. 878 if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { 879 return String.valueOf(ch); 880 } else { 881 return null; 882 } 883 } 884 885 /** 886 * @return {@link VCardConfig#VERSION_21} 887 */ 888 protected int getVersion() { 889 return VCardConfig.VERSION_21; 890 } 891 892 /** 893 * @return {@link VCardConfig#VERSION_30} 894 */ 895 protected String getVersionString() { 896 return VCardConstants.VERSION_V21; 897 } 898 899 protected Set<String> getKnownPropertyNameSet() { 900 return VCardParser_V21.sKnownPropertyNameSet; 901 } 902 903 protected Set<String> getKnownTypeSet() { 904 return VCardParser_V21.sKnownTypeSet; 905 } 906 907 protected Set<String> getKnownValueSet() { 908 return VCardParser_V21.sKnownValueSet; 909 } 910 911 protected Set<String> getAvailableEncodingSet() { 912 return VCardParser_V21.sAvailableEncoding; 913 } 914 915 protected String getDefaultEncoding() { 916 return DEFAULT_ENCODING; 917 } 918 919 protected String getDefaultCharset() { 920 return DEFAULT_CHARSET; 921 } 922 923 protected String getCurrentCharset() { 924 return mCurrentCharset; 925 } 926 927 public void addInterpreter(VCardInterpreter interpreter) { 928 mInterpreterList.add(interpreter); 929 } 930 931 public void parse(InputStream is) throws IOException, VCardException { 932 if (is == null) { 933 throw new NullPointerException("InputStream must not be null."); 934 } 935 936 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 937 mReader = new CustomBufferedReader(tmpReader); 938 939 final long start = System.currentTimeMillis(); 940 for (VCardInterpreter interpreter : mInterpreterList) { 941 interpreter.onVCardStarted(); 942 } 943 parseVCardFile(); 944 for (VCardInterpreter interpreter : mInterpreterList) { 945 interpreter.onVCardEnded(); 946 } 947 } 948 949 public final synchronized void cancel() { 950 Log.i(LOG_TAG, "ParserImpl received cancel operation."); 951 mCanceled = true; 952 } 953} 954