1/* 2 * Copyright 2013 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkPdfConfig.h" 9#include "SkPdfDiffEncoder.h" 10#include "SkPdfNativeObject.h" 11#include "SkPdfNativeTokenizer.h" 12#include "SkPdfUtils.h" 13 14// TODO(edisonn): mac builder does not find the header ... but from headers is ok 15//#include "SkPdfStreamCommonDictionary_autogen.h" 16//#include "SkPdfImageDictionary_autogen.h" 17#include "SkPdfHeaders_autogen.h" 18 19 20// TODO(edisonn): Perf, Make this function run faster. 21// There could be 0s between start and end. 22// needle will not contain 0s. 23static char* strrstrk(char* hayStart, char* hayEnd, const char* needle) { 24 int needleLen = strlen(needle); 25 if ((isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || (hayStart+needleLen == hayEnd)) && 26 strncmp(hayStart, needle, needleLen) == 0) { 27 return hayStart; 28 } 29 30 hayStart++; 31 32 while (hayStart < hayEnd) { 33 if (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart-1)) && 34 (isPdfWhiteSpaceOrPdfDelimiter(*(hayStart+needleLen)) || 35 (hayStart+needleLen == hayEnd)) && 36 strncmp(hayStart, needle, needleLen) == 0) { 37 return hayStart; 38 } 39 hayStart++; 40 } 41 return NULL; 42} 43 44const unsigned char* skipPdfWhiteSpaces(const unsigned char* start, const unsigned char* end) { 45 while (start < end && (isPdfWhiteSpace(*start) || *start == kComment_PdfDelimiter)) { 46 TRACE_COMMENT(*start); 47 if (*start == kComment_PdfDelimiter) { 48 // skip the comment until end of line 49 while (start < end && !isPdfEOL(*start)) { 50 start++; 51 TRACE_COMMENT(*start); 52 } 53 } else { 54 start++; 55 } 56 } 57 return start; 58} 59 60const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end) { 61 SkASSERT(!isPdfWhiteSpace(*start)); 62 63 if (start < end && isPdfDelimiter(*start)) { 64 TRACE_TK(*start); 65 start++; 66 return start; 67 } 68 69 while (start < end && !isPdfWhiteSpaceOrPdfDelimiter(*start)) { 70 TRACE_TK(*start); 71 start++; 72 } 73 return start; 74} 75 76// The parsing should end with a ]. 77static const unsigned char* readArray(const unsigned char* start, const unsigned char* end, 78 SkPdfNativeObject* array, 79 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { 80 SkPdfNativeObject::makeEmptyArray(array); 81 // PUT_TRACK_STREAM(array, start, start) 82 83 if (allocator == NULL) { 84 // TODO(edisonn): report/warning error/assert 85 return end; 86 } 87 88 while (start < end) { 89 // skip white spaces 90 start = skipPdfWhiteSpaces(start, end); 91 92 const unsigned char* endOfToken = endOfPdfToken(start, end); 93 94 if (endOfToken == start) { 95 // TODO(edisonn): report error in pdf file (end of stream with ] for end of aray 96 return start; 97 } 98 99 if (endOfToken == start + 1 && *start == kClosedSquareBracket_PdfDelimiter) { 100 return endOfToken; 101 } 102 103 SkPdfNativeObject* newObj = allocator->allocObject(); 104 start = nextObject(start, end, newObj, allocator, doc); 105 // TODO(edisonn): perf/memory: put the variables on the stack, and flush them on the array 106 // only when we are sure they are not references! 107 if (newObj->isKeywordReference() && array->size() >= 2 && 108 array->objAtAIndex(array->size() - 1)->isInteger() && 109 array->objAtAIndex(array->size() - 2)->isInteger()) { 110 SkPdfNativeObject* gen = array->removeLastInArray(); 111 SkPdfNativeObject* id = array->removeLastInArray(); 112 113 SkPdfNativeObject::resetAndMakeReference((unsigned int)id->intValue(), 114 (unsigned int)gen->intValue(), newObj); 115 // newObj PUT_TRACK_PARAMETERS_OBJ2(id, newObj) - store end, as now 116 } 117 array->appendInArray(newObj); 118 } 119 // TODO(edisonn): report not reached, we should never get here 120 // TODO(edisonn): there might be a bug here, enable an assert and run it on files 121 // or it might be that the files were actually corrupted 122 return start; 123} 124 125static const unsigned char* readString(const unsigned char* start, const unsigned char* end, 126 unsigned char* out) { 127 const unsigned char* in = start; 128 bool hasOut = (out != NULL); 129 130 int openRoundBrackets = 1; 131 while (in < end) { 132 openRoundBrackets += ((*in) == kOpenedRoundBracket_PdfDelimiter); 133 openRoundBrackets -= ((*in) == kClosedRoundBracket_PdfDelimiter); 134 if (openRoundBrackets == 0) { 135 in++; // consumed ) 136 break; 137 } 138 139 if (*in == kEscape_PdfSpecial) { 140 if (in + 1 < end) { 141 switch (in[1]) { 142 case 'n': 143 if (hasOut) { *out = kLF_PdfWhiteSpace; } 144 out++; 145 in += 2; 146 break; 147 148 case 'r': 149 if (hasOut) { *out = kCR_PdfWhiteSpace; } 150 out++; 151 in += 2; 152 break; 153 154 case 't': 155 if (hasOut) { *out = kHT_PdfWhiteSpace; } 156 out++; 157 in += 2; 158 break; 159 160 case 'b': 161 // TODO(edisonn): any special meaning to backspace? 162 if (hasOut) { *out = kBackspace_PdfSpecial; } 163 out++; 164 in += 2; 165 break; 166 167 case 'f': 168 if (hasOut) { *out = kFF_PdfWhiteSpace; } 169 out++; 170 in += 2; 171 break; 172 173 case kOpenedRoundBracket_PdfDelimiter: 174 if (hasOut) { *out = kOpenedRoundBracket_PdfDelimiter; } 175 out++; 176 in += 2; 177 break; 178 179 case kClosedRoundBracket_PdfDelimiter: 180 if (hasOut) { *out = kClosedRoundBracket_PdfDelimiter; } 181 out++; 182 in += 2; 183 break; 184 185 case kEscape_PdfSpecial: 186 if (hasOut) { *out = kEscape_PdfSpecial; } 187 out++; 188 in += 2; 189 break; 190 191 case '0': 192 case '1': 193 case '2': 194 case '3': 195 case '4': 196 case '5': 197 case '6': 198 case '7': { 199 //read octals 200 in++; // consume backslash 201 202 int code = 0; 203 int i = 0; 204 while (in < end && *in >= '0' && *in < '8') { 205 code = (code << 3) + ((*in) - '0'); // code * 8 + d 206 i++; 207 in++; 208 if (i == 3) { 209 if (hasOut) { *out = code & 0xff; } 210 out++; 211 i = 0; 212 } 213 } 214 if (i > 0) { 215 if (hasOut) { *out = code & 0xff; } 216 out++; 217 } 218 } 219 break; 220 221 default: 222 // Per spec, backslash is ignored if escaped ch is unknown 223 in++; 224 break; 225 } 226 } else { 227 in++; 228 } 229 } else { 230 if (hasOut) { *out = *in; } 231 in++; 232 out++; 233 } 234 } 235 236 if (hasOut) { 237 return in; // consumed already ) at the end of the string 238 } else { 239 // return where the string would end if we reuse the string 240 return start + (out - (const unsigned char*)NULL); 241 } 242} 243 244static int readStringLength(const unsigned char* start, const unsigned char* end) { 245 return readString(start, end, NULL) - start; 246} 247 248static const unsigned char* readString(const unsigned char* start, const unsigned char* end, 249 SkPdfNativeObject* str, SkPdfAllocator* allocator) { 250 if (!allocator) { 251 // TODO(edisonn): report error/warn/assert 252 return end; 253 } 254 255 int outLength = readStringLength(start, end); 256 unsigned char* out = (unsigned char*)allocator->alloc(outLength); 257 const unsigned char* now = readString(start, end, out); 258 SkPdfNativeObject::makeString(out, out + outLength, str); 259 // PUT_TRACK_STREAM(str, start, now) 260 TRACE_STRING(out, out + outLength); 261 return now; // consumed already ) at the end of the string 262} 263 264static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, 265 unsigned char* out) { 266 bool hasOut = (out != NULL); 267 const unsigned char* in = start; 268 269 unsigned char code = 0; 270 271 while (in < end) { 272 while (in < end && isPdfWhiteSpace(*in)) { 273 in++; 274 } 275 276 if (*in == kClosedInequityBracket_PdfDelimiter) { 277 in++; // consume > 278 // normal exit 279 break; 280 } 281 282 if (in >= end) { 283 // end too soon 284 break; 285 } 286 287 switch (*in) { 288 case '0': 289 case '1': 290 case '2': 291 case '3': 292 case '4': 293 case '5': 294 case '6': 295 case '7': 296 case '8': 297 case '9': 298 code = (*in - '0') << 4; 299 break; 300 301 case 'a': 302 case 'b': 303 case 'c': 304 case 'd': 305 case 'e': 306 case 'f': 307 code = (*in - 'a' + 10) << 4; 308 break; 309 310 case 'A': 311 case 'B': 312 case 'C': 313 case 'D': 314 case 'E': 315 case 'F': 316 code = (*in - 'A' + 10) << 4; 317 break; 318 319 // TODO(edisonn): spec does not say how to handle this error 320 default: 321 break; 322 } 323 324 in++; // advance 325 326 while (in < end && isPdfWhiteSpace(*in)) { 327 in++; 328 } 329 330 // TODO(edisonn): report error 331 if (in >= end) { 332 if (hasOut) { *out = code; } 333 out++; 334 break; 335 } 336 337 if (*in == kClosedInequityBracket_PdfDelimiter) { 338 if (hasOut) { *out = code; } 339 out++; 340 in++; 341 break; 342 } 343 344 switch (*in) { 345 case '0': 346 case '1': 347 case '2': 348 case '3': 349 case '4': 350 case '5': 351 case '6': 352 case '7': 353 case '8': 354 case '9': 355 code += (*in - '0'); 356 break; 357 358 case 'a': 359 case 'b': 360 case 'c': 361 case 'd': 362 case 'e': 363 case 'f': 364 code += (*in - 'a' + 10); 365 break; 366 367 case 'A': 368 case 'B': 369 case 'C': 370 case 'D': 371 case 'E': 372 case 'F': 373 code += (*in - 'A' + 10); 374 break; 375 376 // TODO(edisonn): spec does not say how to handle this error 377 default: 378 break; 379 } 380 381 if (hasOut) { *out = code; } 382 out++; 383 in++; 384 } 385 386 if (hasOut) { 387 return in; // consumed already ) at the end of the string 388 } else { 389 // return where the string would end if we reuse the string 390 return start + (out - (const unsigned char*)NULL); 391 } 392} 393 394static int readHexStringLength(const unsigned char* start, const unsigned char* end) { 395 return readHexString(start, end, NULL) - start; 396} 397 398static const unsigned char* readHexString(const unsigned char* start, const unsigned char* end, SkPdfNativeObject* str, SkPdfAllocator* allocator) { 399 if (!allocator) { 400 // TODO(edisonn): report error/warn/assert 401 return end; 402 } 403 int outLength = readHexStringLength(start, end); 404 unsigned char* out = (unsigned char*)allocator->alloc(outLength); 405 const unsigned char* now = readHexString(start, end, out); 406 SkPdfNativeObject::makeHexString(out, out + outLength, str); 407 // str PUT_TRACK_STREAM(start, now) 408 TRACE_HEXSTRING(out, out + outLength); 409 return now; // consumed already > at the end of the string 410} 411 412// TODO(edisonn): add version parameter, before PDF 1.2 name could not have special characters. 413static const unsigned char* readName(const unsigned char* start, const unsigned char* end, 414 unsigned char* out) { 415 bool hasOut = (out != NULL); 416 const unsigned char* in = start; 417 418 unsigned char code = 0; 419 420 while (in < end) { 421 if (isPdfWhiteSpaceOrPdfDelimiter(*in)) { 422 break; 423 } 424 425 if (*in == '#' && in + 2 < end) { 426 in++; 427 switch (*in) { 428 case '0': 429 case '1': 430 case '2': 431 case '3': 432 case '4': 433 case '5': 434 case '6': 435 case '7': 436 case '8': 437 case '9': 438 code = (*in - '0') << 4; 439 break; 440 441 case 'a': 442 case 'b': 443 case 'c': 444 case 'd': 445 case 'e': 446 case 'f': 447 code = (*in - 'a' + 10) << 4; 448 break; 449 450 case 'A': 451 case 'B': 452 case 'C': 453 case 'D': 454 case 'E': 455 case 'F': 456 code = (*in - 'A' + 10) << 4; 457 break; 458 459 // TODO(edisonn): spec does not say how to handle this error 460 default: 461 break; 462 } 463 464 in++; // advance 465 466 switch (*in) { 467 case '0': 468 case '1': 469 case '2': 470 case '3': 471 case '4': 472 case '5': 473 case '6': 474 case '7': 475 case '8': 476 case '9': 477 code += (*in - '0'); 478 break; 479 480 case 'a': 481 case 'b': 482 case 'c': 483 case 'd': 484 case 'e': 485 case 'f': 486 code += (*in - 'a' + 10); 487 break; 488 489 case 'A': 490 case 'B': 491 case 'C': 492 case 'D': 493 case 'E': 494 case 'F': 495 code += (*in - 'A' + 10); 496 break; 497 498 // TODO(edisonn): spec does not say how to handle this error 499 default: 500 break; 501 } 502 503 if (hasOut) { *out = code; } 504 out++; 505 in++; 506 } else { 507 if (hasOut) { *out = *in; } 508 out++; 509 in++; 510 } 511 } 512 513 if (hasOut) { 514 return in; // consumed already ) at the end of the string 515 } else { 516 // return where the string would end if we reuse the string 517 return start + (out - (const unsigned char*)NULL); 518 } 519} 520 521static int readNameLength(const unsigned char* start, const unsigned char* end) { 522 return readName(start, end, NULL) - start; 523} 524 525static const unsigned char* readName(const unsigned char* start, const unsigned char* end, 526 SkPdfNativeObject* name, SkPdfAllocator* allocator) { 527 if (!allocator) { 528 // TODO(edisonn): report error/warn/assert 529 return end; 530 } 531 int outLength = readNameLength(start, end); 532 unsigned char* out = (unsigned char*)allocator->alloc(outLength); 533 const unsigned char* now = readName(start, end, out); 534 SkPdfNativeObject::makeName(out, out + outLength, name); 535 //PUT_TRACK_STREAM(start, now) 536 TRACE_NAME(out, out + outLength); 537 return now; 538} 539 540// TODO(edisonn): pdf spec let Length to be an indirect object define after the stream 541// that makes for an interesting scenario, where the stream itself contains endstream, together 542// with a reference object with the length, but the real length object would be somewhere else 543// it could confuse the parser 544/*example: 545 5467 0 obj 547<< /length 8 0 R>> 548stream 549............... 550endstream 5518 0 obj #we are in stream actually, not a real object 552<< 10 >> #we are in stream actually, not a real object 553endobj 554endstream 5558 0 obj #real obj 556<< 100 >> #real obj 557endobj 558and it could get worse, with multiple object like this 559*/ 560 561// right now implement the silly algorithm that assumes endstream is finishing the stream 562 563static const unsigned char* readStream(const unsigned char* start, const unsigned char* end, 564 SkPdfNativeObject* dict, SkPdfNativeDoc* doc) { 565 start = skipPdfWhiteSpaces(start, end); 566 if (!( start[0] == 's' && 567 start[1] == 't' && 568 start[2] == 'r' && 569 start[3] == 'e' && 570 start[4] == 'a' && 571 start[5] == 'm')) { 572 // no stream. return. 573 return start; 574 } 575 576 start += 6; // strlen("stream") 577 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { 578 start += 2; 579 } else if (start[0] == kLF_PdfWhiteSpace) { 580 start += 1; 581 } else if (isPdfWhiteSpace(start[0])) { 582 start += 1; 583 } else { 584 // TODO(edisonn): warn it should be isPdfDelimiter(start[0])) ? 585 } 586 587 SkPdfStreamCommonDictionary* stream = (SkPdfStreamCommonDictionary*) dict; 588 // TODO(edisonn): load Length 589 int64_t length = -1; 590 591 // TODO(edisonn): very basic implementation 592 if (stream->has_Length() && stream->Length(doc) > 0) { 593 length = stream->Length(doc); 594 } 595 596 // TODO(edisonn): load external streams 597 // TODO(edisonn): look at the last filter, to determine how to deal with possible parsing 598 // issues. The last filter can have special rules to terminate a stream, which we could 599 // use to determine end of stream. 600 601 if (length >= 0) { 602 const unsigned char* endstream = start + length; 603 604 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) { 605 endstream += 2; 606 } else if (endstream[0] == kLF_PdfWhiteSpace) { 607 endstream += 1; 608 } 609 610 if (strncmp((const char*)endstream, "endstream", strlen("endstream")) != 0) { 611 length = -1; 612 } 613 } 614 615 if (length < 0) { 616 // scan the buffer, until we find first endstream 617 // TODO(edisonn): all buffers must have a 0 at the end now, 618 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, 619 "endstream"); 620 621 if (endstream) { 622 length = endstream - start; 623 if (*(endstream-1) == kLF_PdfWhiteSpace) length--; 624 if (*(endstream-2) == kCR_PdfWhiteSpace) length--; 625 } 626 } 627 if (length >= 0) { 628 const unsigned char* endstream = start + length; 629 630 if (endstream[0] == kCR_PdfWhiteSpace && endstream[1] == kLF_PdfWhiteSpace) { 631 endstream += 2; 632 } else if (endstream[0] == kLF_PdfWhiteSpace) { 633 endstream += 1; 634 } 635 636 // TODO(edisonn): verify the next bytes are "endstream" 637 638 endstream += strlen("endstream"); 639 // TODO(edisonn): Assert? report error/warning? 640 dict->addStream(start, (size_t)length); 641 return endstream; 642 } 643 return start; 644} 645 646static const unsigned char* readInlineImageStream(const unsigned char* start, 647 const unsigned char* end, 648 SkPdfImageDictionary* inlineImage, 649 SkPdfNativeDoc* doc) { 650 // We already processed ID keyword, and we should be positioned immediately after it 651 652 // TODO(edisonn): security: either make all streams to have extra 2 bytes at the end, 653 // instead of this if. 654 //if (end - start <= 2) { 655 // // TODO(edisonn): warning? 656 // return end; // but can we have a pixel image encoded in 1-2 bytes? 657 //} 658 659 if (start[0] == kCR_PdfWhiteSpace && start[1] == kLF_PdfWhiteSpace) { 660 start += 2; 661 } else if (start[0] == kLF_PdfWhiteSpace) { 662 start += 1; 663 } else if (isPdfWhiteSpace(start[0])) { 664 start += 1; 665 } else { 666 SkASSERT(isPdfDelimiter(start[0])); 667 // TODO(edisonn): warning? 668 } 669 670 const unsigned char* endstream = (const unsigned char*)strrstrk((char*)start, (char*)end, "EI"); 671 const unsigned char* endEI = endstream ? endstream + 2 : NULL; // 2 == strlen("EI") 672 673 if (endstream) { 674 int length = endstream - start; 675 if (*(endstream-1) == kLF_PdfWhiteSpace) length--; 676 if (*(endstream-2) == kCR_PdfWhiteSpace) length--; 677 inlineImage->addStream(start, (size_t)length); 678 } else { 679 // TODO(edisonn): report error in inline image stream (ID-EI) section 680 // TODO(edisonn): based on filter, try to ignore a missing EI, and read data properly 681 return end; 682 } 683 return endEI; 684} 685 686static const unsigned char* readDictionary(const unsigned char* start, const unsigned char* end, 687 SkPdfNativeObject* dict, 688 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { 689 if (allocator == NULL) { 690 // TODO(edisonn): report/warning error 691 return end; 692 } 693 SkPdfNativeObject::makeEmptyDictionary(dict); 694 // PUT_TRACK_STREAM(dict, start, start) 695 696 start = skipPdfWhiteSpaces(start, end); 697 SkPdfAllocator tmpStorage; // keys will be stored in dict, we can free them after set. 698 699 while (start < end && *start == kNamed_PdfDelimiter) { 700 SkPdfNativeObject key; 701 //*start = '\0'; 702 start++; 703 start = readName(start, end, &key, &tmpStorage); 704 start = skipPdfWhiteSpaces(start, end); 705 706 if (start < end) { 707 SkPdfNativeObject* value = allocator->allocObject(); 708 start = nextObject(start, end, value, allocator, doc); 709 710 start = skipPdfWhiteSpaces(start, end); 711 712 if (start < end) { 713 // We should have an indirect reference 714 if (isPdfDigit(*start)) { 715 SkPdfNativeObject generation; 716 start = nextObject(start, end, &generation, allocator, doc); 717 718 SkPdfNativeObject keywordR; 719 start = nextObject(start, end, &keywordR, allocator, doc); 720 721 if (value->isInteger() && generation.isInteger() && 722 keywordR.isKeywordReference()) { 723 int64_t id = value->intValue(); 724 SkPdfNativeObject::resetAndMakeReference( 725 (unsigned int)id, 726 (unsigned int)generation.intValue(), 727 value); 728 // PUT_TRACK_PARAMETERS_OBJ2(value, &generation) 729 dict->set(&key, value); 730 } else { 731 // TODO(edisonn) error?, ignore it for now. 732 dict->set(&key, value); 733 } 734 } else { 735 // next elem is not a digit, but it might not be / either! 736 dict->set(&key, value); 737 } 738 } else { 739 // /key >> 740 dict->set(&key, value); 741 return end; 742 } 743 start = skipPdfWhiteSpaces(start, end); 744 } else { 745 dict->set(&key, &SkPdfNativeObject::kNull); 746 return end; 747 } 748 } 749 750 // now we should expect >> 751 start = skipPdfWhiteSpaces(start, end); 752 if (*start != kClosedInequityBracket_PdfDelimiter) { 753 // TODO(edisonn): report/warning 754 } 755 756 start++; // skip > 757 if (*start != kClosedInequityBracket_PdfDelimiter) { 758 // TODO(edisonn): report/warning 759 } 760 761 start++; // skip > 762 763 //STORE_TRACK_PARAMETER_OFFSET_END(dict,start); 764 765 start = readStream(start, end, dict, doc); 766 767 return start; 768} 769 770const unsigned char* nextObject(const unsigned char* start, const unsigned char* end, 771 SkPdfNativeObject* token, 772 SkPdfAllocator* allocator, SkPdfNativeDoc* doc) { 773 const unsigned char* current; 774 775 // skip white spaces 776 start = skipPdfWhiteSpaces(start, end); 777 778 if (start >= end) { 779 return end; 780 } 781 782 current = endOfPdfToken(start, end); 783 784 // no token, len would be 0 785 if (current == start || current == end) { 786 return end; 787 } 788 789 int tokenLen = current - start; 790 791 if (tokenLen == 1) { 792 // start array 793 switch (*start) { 794 case kOpenedSquareBracket_PdfDelimiter: 795 return readArray(current, end, token, allocator, doc); 796 797 case kOpenedRoundBracket_PdfDelimiter: 798 return readString(start + 1, end, token, allocator); 799 800 case kOpenedInequityBracket_PdfDelimiter: 801 if (end > start + 1 && start[1] == kOpenedInequityBracket_PdfDelimiter) { 802 // TODO(edisonn): pass here the length somehow? 803 return readDictionary(start + 2, end, token, allocator, doc); // skip << 804 } else { 805 return readHexString(start + 1, end, token, allocator); // skip < 806 } 807 808 case kNamed_PdfDelimiter: 809 return readName(start + 1, end, token, allocator); 810 811 // TODO(edisonn): what to do curly brackets? 812 case kOpenedCurlyBracket_PdfDelimiter: 813 default: 814 break; 815 } 816 817 SkASSERT(!isPdfWhiteSpace(*start)); 818 if (isPdfDelimiter(*start)) { 819 // TODO(edisonn): how unexpected stream ] } > ) will be handled? 820 // for now ignore, and it will become a keyword to be ignored 821 } 822 } 823 824 if (tokenLen == 4 && start[0] == 'n' && start[1] == 'u' && start[2] == 'l' && start[3] == 'l') { 825 SkPdfNativeObject::makeNull(token); 826 // PUT_TRACK_STREAM(start, start + 4) 827 return current; 828 } 829 830 if (tokenLen == 4 && start[0] == 't' && start[1] == 'r' && start[2] == 'u' && start[3] == 'e') { 831 SkPdfNativeObject::makeBoolean(true, token); 832 // PUT_TRACK_STREAM(start, start + 4) 833 return current; 834 } 835 836 // TODO(edisonn): again, make all buffers have 5 extra bytes 837 if (tokenLen == 5 && start[0] == 'f' && 838 start[1] == 'a' && 839 start[2] == 'l' && 840 start[3] == 's' && 841 start[4] == 'e') { 842 SkPdfNativeObject::makeBoolean(false, token); 843 // PUT_TRACK_STREAM(start, start + 5) 844 return current; 845 } 846 847 if (isPdfNumeric(*start)) { 848 SkPdfNativeObject::makeNumeric(start, current, token); 849 // PUT_TRACK_STREAM(start, current) 850 } else { 851 SkPdfNativeObject::makeKeyword(start, current, token); 852 // PUT_TRACK_STREAM(start, current) 853 } 854 return current; 855} 856 857SkPdfNativeObject* SkPdfAllocator::allocBlock() { 858 fSizeInBytes += BUFFER_SIZE * sizeof(SkPdfNativeObject); 859 return new SkPdfNativeObject[BUFFER_SIZE]; 860} 861 862SkPdfAllocator::~SkPdfAllocator() { 863 for (int i = 0 ; i < fHandles.count(); i++) { 864 free(fHandles[i]); 865 } 866 for (int i = 0 ; i < fHistory.count(); i++) { 867 for (int j = 0 ; j < BUFFER_SIZE; j++) { 868 fHistory[i][j].reset(); 869 } 870 delete[] fHistory[i]; 871 } 872 for (int j = 0 ; j < BUFFER_SIZE; j++) { 873 fCurrent[j].reset(); 874 } 875 delete[] fCurrent; 876} 877 878SkPdfNativeObject* SkPdfAllocator::allocObject() { 879 if (fCurrentUsed >= BUFFER_SIZE) { 880 fHistory.push(fCurrent); 881 fCurrent = allocBlock(); 882 fCurrentUsed = 0; 883 fSizeInBytes += sizeof(SkPdfNativeObject*); 884 } 885 fCurrentUsed++; 886 return &fCurrent[fCurrentUsed - 1]; 887} 888 889// TODO(edisonn): perf: do no copy the buffers, but reuse them, and mark cache the result, 890// so there is no need of a second pass 891SkPdfNativeTokenizer::SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream, 892 SkPdfAllocator* allocator, 893 SkPdfNativeDoc* doc) 894 : fDoc(doc) 895 , fAllocator(allocator) 896 , fUncompressedStream(NULL) 897 , fUncompressedStreamEnd(NULL) 898 , fEmpty(false) 899 , fHasPutBack(false) { 900 const unsigned char* buffer = NULL; 901 size_t len = 0; 902 objWithStream->GetFilteredStreamRef(&buffer, &len); 903 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!) 904 // we need to do now for perf, and our generated pdfs do not have comments, 905 // but we need to remove this hack for pdfs in the wild 906 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); 907 if (endobj) { 908 len = endobj - (char*)buffer + strlen("endobj"); 909 } 910 fUncompressedStreamStart = fUncompressedStream = buffer; 911 fUncompressedStreamEnd = fUncompressedStream + len; 912} 913 914SkPdfNativeTokenizer::SkPdfNativeTokenizer(const unsigned char* buffer, int len, 915 SkPdfAllocator* allocator, 916 SkPdfNativeDoc* doc) : fDoc(doc) 917 , fAllocator(allocator) 918 , fEmpty(false) 919 , fHasPutBack(false) { 920 // TODO(edisonn): really bad hack, find end of object (endobj might be in a comment!) 921 // we need to do now for perf, and our generated pdfs do not have comments, 922 // but we need to remove this hack for pdfs in the wild 923 char* endobj = strrstrk((char*)buffer, (char*)buffer + len, "endobj"); 924 if (endobj) { 925 len = endobj - (char*)buffer + strlen("endobj"); 926 } 927 fUncompressedStreamStart = fUncompressedStream = buffer; 928 fUncompressedStreamEnd = fUncompressedStream + len; 929} 930 931SkPdfNativeTokenizer::~SkPdfNativeTokenizer() { 932} 933 934bool SkPdfNativeTokenizer::readTokenCore(PdfToken* token) { 935#ifdef PDF_TRACE_READ_TOKEN 936 static int read_op = 0; 937#endif 938 939 token->fKeyword = NULL; 940 token->fObject = NULL; 941 942 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd); 943 if (fUncompressedStream >= fUncompressedStreamEnd) { 944 fEmpty = true; 945 return false; 946 } 947 948 SkPdfNativeObject obj; 949 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, &obj, fAllocator, fDoc); 950 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart) 951 952 // If it is a keyword, we will only get the pointer of the string. 953 if (obj.type() == SkPdfNativeObject::kKeyword_PdfObjectType) { 954 token->fKeyword = obj.c_str(); 955 token->fKeywordLength = obj.lenstr(); 956 token->fType = kKeyword_TokenType; 957 } else { 958 SkPdfNativeObject* pobj = fAllocator->allocObject(); 959 *pobj = obj; 960 token->fObject = pobj; 961 token->fType = kObject_TokenType; 962 } 963 964#ifdef PDF_TRACE_READ_TOKEN 965 read_op++; 966#if 0 967 if (548 == read_op) { 968 printf("break;\n"); 969 } 970#endif 971 printf("%i READ %s %s\n", read_op, token->fType == kKeyword_TokenType ? "Keyword" : "Object", 972 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : 973 token->fObject->toString().c_str()); 974#endif 975 976 return true; 977} 978 979void SkPdfNativeTokenizer::PutBack(PdfToken token) { 980 SkASSERT(!fHasPutBack); 981 fHasPutBack = true; 982 fPutBack = token; 983#ifdef PDF_TRACE_READ_TOKEN 984 printf("PUT_BACK %s %s\n", token.fType == kKeyword_TokenType ? "Keyword" : "Object", 985 token.fKeyword ? SkString(token.fKeyword, token.fKeywordLength).c_str() : 986 token.fObject->toString().c_str()); 987#endif 988} 989 990bool SkPdfNativeTokenizer::readToken(PdfToken* token, bool writeDiff) { 991 if (fHasPutBack) { 992 *token = fPutBack; 993 fHasPutBack = false; 994#ifdef PDF_TRACE_READ_TOKEN 995 printf("READ_BACK %s %s\n", token->fType == kKeyword_TokenType ? "Keyword" : "Object", 996 token->fKeyword ? SkString(token->fKeyword, token->fKeywordLength).c_str() : 997 token->fObject->toString().c_str()); 998#endif 999 if (writeDiff) { 1000 SkPdfDiffEncoder::WriteToFile(token); 1001 } 1002 return true; 1003 } 1004 1005 if (fEmpty) { 1006#ifdef PDF_TRACE_READ_TOKEN 1007 printf("EMPTY TOKENIZER\n"); 1008#endif 1009 return false; 1010 } 1011 1012 const bool result = readTokenCore(token); 1013 if (result && writeDiff) { 1014 SkPdfDiffEncoder::WriteToFile(token); 1015 } 1016 return result; 1017} 1018 1019#define DECLARE_PDF_NAME(longName) SkPdfName longName((char*)#longName) 1020 1021// keys 1022DECLARE_PDF_NAME(BitsPerComponent); 1023DECLARE_PDF_NAME(ColorSpace); 1024DECLARE_PDF_NAME(Decode); 1025DECLARE_PDF_NAME(DecodeParms); 1026DECLARE_PDF_NAME(Filter); 1027DECLARE_PDF_NAME(Height); 1028DECLARE_PDF_NAME(ImageMask); 1029DECLARE_PDF_NAME(Intent); // PDF 1.1 - the key, or the abBreviations? 1030DECLARE_PDF_NAME(Interpolate); 1031DECLARE_PDF_NAME(Width); 1032 1033// values 1034DECLARE_PDF_NAME(DeviceGray); 1035DECLARE_PDF_NAME(DeviceRGB); 1036DECLARE_PDF_NAME(DeviceCMYK); 1037DECLARE_PDF_NAME(Indexed); 1038DECLARE_PDF_NAME(ASCIIHexDecode); 1039DECLARE_PDF_NAME(ASCII85Decode); 1040DECLARE_PDF_NAME(LZWDecode); 1041DECLARE_PDF_NAME(FlateDecode); // PDF 1.2 1042DECLARE_PDF_NAME(RunLengthDecode); 1043DECLARE_PDF_NAME(CCITTFaxDecode); 1044DECLARE_PDF_NAME(DCTDecode); 1045 1046#define HANDLE_NAME_ABBR(obj,longName,shortName) if (obj->isName(#shortName)) return &longName; 1047 1048 1049static SkPdfNativeObject* inlineImageKeyAbbreviationExpand(SkPdfNativeObject* key) { 1050 if (!key || !key->isName()) { 1051 return key; 1052 } 1053 1054 // TODO(edisonn): use autogenerated code! 1055 HANDLE_NAME_ABBR(key, BitsPerComponent, BPC); 1056 HANDLE_NAME_ABBR(key, ColorSpace, CS); 1057 HANDLE_NAME_ABBR(key, Decode, D); 1058 HANDLE_NAME_ABBR(key, DecodeParms, DP); 1059 HANDLE_NAME_ABBR(key, Filter, F); 1060 HANDLE_NAME_ABBR(key, Height, H); 1061 HANDLE_NAME_ABBR(key, ImageMask, IM); 1062// HANDLE_NAME_ABBR(key, Intent, ); 1063 HANDLE_NAME_ABBR(key, Interpolate, I); 1064 HANDLE_NAME_ABBR(key, Width, W); 1065 1066 return key; 1067} 1068 1069static SkPdfNativeObject* inlineImageValueAbbreviationExpand(SkPdfNativeObject* value) { 1070 if (!value || !value->isName()) { 1071 return value; 1072 } 1073 1074 // TODO(edisonn): use autogenerated code! 1075 HANDLE_NAME_ABBR(value, DeviceGray, G); 1076 HANDLE_NAME_ABBR(value, DeviceRGB, RGB); 1077 HANDLE_NAME_ABBR(value, DeviceCMYK, CMYK); 1078 HANDLE_NAME_ABBR(value, Indexed, I); 1079 HANDLE_NAME_ABBR(value, ASCIIHexDecode, AHx); 1080 HANDLE_NAME_ABBR(value, ASCII85Decode, A85); 1081 HANDLE_NAME_ABBR(value, LZWDecode, LZW); 1082 HANDLE_NAME_ABBR(value, FlateDecode, Fl); // (PDF 1.2) 1083 HANDLE_NAME_ABBR(value, RunLengthDecode, RL); 1084 HANDLE_NAME_ABBR(value, CCITTFaxDecode, CCF); 1085 HANDLE_NAME_ABBR(value, DCTDecode, DCT); 1086 1087 return value; 1088} 1089 1090SkPdfImageDictionary* SkPdfNativeTokenizer::readInlineImage() { 1091 // BI already processed 1092 fUncompressedStream = skipPdfWhiteSpaces(fUncompressedStream, fUncompressedStreamEnd); 1093 if (fUncompressedStream >= fUncompressedStreamEnd) { 1094 return NULL; 1095 } 1096 1097 SkPdfImageDictionary* inlineImage = (SkPdfImageDictionary*)fAllocator->allocObject(); 1098 SkPdfNativeObject::makeEmptyDictionary(inlineImage); 1099 // PUT_TRACK_STREAM_ARGS_EXPL(fStreamId, fUncompressedStream - fUncompressedStreamStart, 1100 // fUncompressedStream - fUncompressedStreamStart) 1101 1102 while (fUncompressedStream < fUncompressedStreamEnd) { 1103 SkPdfNativeObject* key = fAllocator->allocObject(); 1104 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, key, 1105 fAllocator, fDoc); 1106 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s 1107 1108 if (key->isKeyword() && key->lenstr() == 2 && 1109 key->c_str()[0] == 'I' && key->c_str()[1] == 'D') { // ID 1110 fUncompressedStream = readInlineImageStream(fUncompressedStream, fUncompressedStreamEnd, 1111 inlineImage, fDoc); 1112 return inlineImage; 1113 } else { 1114 SkPdfNativeObject* obj = fAllocator->allocObject(); 1115 fUncompressedStream = nextObject(fUncompressedStream, fUncompressedStreamEnd, obj, 1116 fAllocator, fDoc); 1117 // PUT_TRACK_STREAM_ARGS_EXPL2(fStreamId, fUncompressedStreamStart)s 1118 // TODO(edisonn): perf maybe we should not expand abBreviation like this 1119 inlineImage->set(inlineImageKeyAbbreviationExpand(key), 1120 inlineImageValueAbbreviationExpand(obj)); 1121 } 1122 } 1123 // TODO(edisonn): report end of data with inline image without an EI 1124 return inlineImage; 1125} 1126