1/* 2 * Copyright 2013 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkPdfNativeDoc.h" 9 10#include <stdio.h> 11#include <string.h> 12#include <sys/types.h> 13#include <sys/stat.h> 14 15#include "SkPdfMapper_autogen.h" 16#include "SkPdfNativeObject.h" 17#include "SkPdfNativeTokenizer.h" 18#include "SkPdfReporter.h" 19#include "SkStream.h" 20 21// TODO(edisonn): for some reason on mac these files are found here, but are found from headers 22//#include "SkPdfFileTrailerDictionary_autogen.h" 23//#include "SkPdfCatalogDictionary_autogen.h" 24//#include "SkPdfPageObjectDictionary_autogen.h" 25//#include "SkPdfPageTreeNodeDictionary_autogen.h" 26#include "SkPdfHeaders_autogen.h" 27 28static long getFileSize(const char* filename) 29{ 30 struct stat stat_buf; 31 int rc = stat(filename, &stat_buf); 32 return rc == 0 ? (long)stat_buf.st_size : -1; 33} 34 35static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) { 36 while (current > start && !isPdfEOL(*(current - 1))) { 37 current--; 38 } 39 return current; 40} 41 42static const unsigned char* previousLineHome(const unsigned char* start, 43 const unsigned char* current) { 44 if (current > start && isPdfEOL(*(current - 1))) { 45 current--; 46 } 47 48 // allows CR+LF, LF+CR but not two CR+CR or LF+LF 49 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) { 50 current--; 51 } 52 53 while (current > start && !isPdfEOL(*(current - 1))) { 54 current--; 55 } 56 57 return current; 58} 59 60static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) { 61 while (current < end && !isPdfEOL(*current)) { 62 current++; 63 } 64 current++; 65 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) { 66 current++; 67 } 68 return current; 69} 70 71SkPdfNativeDoc* gDoc = NULL; 72 73SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream) 74 : fAllocator(new SkPdfAllocator()) 75 , fFileContent(NULL) 76 , fContentLength(0) 77 , fRootCatalogRef(NULL) 78 , fRootCatalog(NULL) { 79 size_t size = stream->getLength(); 80 void* ptr = sk_malloc_throw(size); 81 stream->read(ptr, size); 82 83 init(ptr, size); 84} 85 86SkPdfNativeDoc::SkPdfNativeDoc(const char* path) 87 : fAllocator(new SkPdfAllocator()) 88 , fFileContent(NULL) 89 , fContentLength(0) 90 , fRootCatalogRef(NULL) 91 , fRootCatalog(NULL) { 92 gDoc = this; 93 FILE* file = fopen(path, "r"); 94 // TODO(edisonn): put this in a function that can return NULL 95 if (file) { 96 size_t size = getFileSize(path); 97 void* content = sk_malloc_throw(size); 98 bool ok = (0 != fread(content, size, 1, file)); 99 fclose(file); 100 if (!ok) { 101 sk_free(content); 102 SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, 103 "could not read file", NULL, NULL); 104 // TODO(edisonn): not nice to return like this from constructor, create a static 105 // function that can report NULL for failures. 106 return; // Doc will have 0 pages 107 } 108 109 init(content, size); 110 } 111} 112 113void SkPdfNativeDoc::init(const void* bytes, size_t length) { 114 fFileContent = (const unsigned char*)bytes; 115 fContentLength = length; 116 const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1); 117 const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine); 118 const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine); 119 120 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) { 121 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, 122 "Could not find startxref", NULL, NULL); 123 } 124 125 long xrefByteOffset = atol((const char*)xrefByteOffsetLine); 126 127 bool storeCatalog = true; 128 while (xrefByteOffset >= 0) { 129 const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset, 130 xrefstartKeywordLine); 131 xrefByteOffset = -1; 132 if (trailerStart < xrefstartKeywordLine) { 133 this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false); 134 storeCatalog = false; 135 } 136 } 137 138 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration 139 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper 140 141 if (fRootCatalogRef) { 142 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); 143 if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { 144 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); 145 if (tree && tree->isDictionary() && tree->valid()) { 146 fillPages(tree); 147 } 148 } 149 } 150 151 if (pages() == 0) { 152 // TODO(edisonn): probably it would be better to return NULL and make a clean document. 153 loadWithoutXRef(); 154 } 155 156 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild 157 // (xref, trailer, or just read all objects) 158} 159 160void SkPdfNativeDoc::loadWithoutXRef() { 161 const unsigned char* current = fFileContent; 162 const unsigned char* end = fFileContent + fContentLength; 163 164 // TODO(edisonn): read pdf version 165 current = ignoreLine(current, end); 166 167 current = skipPdfWhiteSpaces(current, end); 168 while (current < end) { 169 SkPdfNativeObject token; 170 current = nextObject(current, end, &token, NULL, NULL); 171 if (token.isInteger()) { 172 int id = (int)token.intValue(); 173 174 token.reset(); 175 current = nextObject(current, end, &token, NULL, NULL); 176 // TODO(edisonn): generation ignored for now (used in pdfs with updates) 177 // int generation = (int)token.intValue(); 178 179 token.reset(); 180 current = nextObject(current, end, &token, NULL, NULL); 181 // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring. 182 if (!token.isKeyword("obj")) { 183 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, 184 "Could not find obj", NULL, NULL); 185 continue; 186 } 187 188 while (fObjects.count() < id + 1) { 189 reset(fObjects.append()); 190 } 191 192 fObjects[id].fOffset = current - fFileContent; 193 194 SkPdfNativeObject* obj = fAllocator->allocObject(); 195 current = nextObject(current, end, obj, fAllocator, this); 196 197 fObjects[id].fResolvedReference = obj; 198 fObjects[id].fObj = obj; 199 fObjects[id].fIsReferenceResolved = true; 200 } else if (token.isKeyword("trailer")) { 201 long dummy; 202 current = readTrailer(current, end, true, &dummy, true); 203 } else if (token.isKeyword("startxref")) { 204 token.reset(); 205 current = nextObject(current, end, &token, NULL, NULL); // ignore startxref 206 } 207 208 current = skipPdfWhiteSpaces(current, end); 209 } 210 211 // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we 212 // might not need it. 213 if (!fRootCatalogRef) { 214 for (unsigned int i = 0 ; i < objects(); i++) { 215 SkPdfNativeObject* obj = object(i); 216 SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL; 217 if (root && root->isReference()) { 218 fRootCatalogRef = root; 219 } 220 } 221 } 222 223 if (fRootCatalogRef) { 224 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef); 225 if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) { 226 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this); 227 if (tree && tree->isDictionary() && tree->valid()) { 228 fillPages(tree); 229 } 230 } 231 } 232 233 234} 235 236SkPdfNativeDoc::~SkPdfNativeDoc() { 237 sk_free((void*)fFileContent); 238 delete fAllocator; 239} 240 241const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart, 242 const unsigned char* trailerEnd) { 243 SkPdfNativeObject xref; 244 const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL); 245 246 if (!xref.isKeyword("xref")) { 247 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref", 248 NULL, NULL); 249 return trailerEnd; 250 } 251 252 SkPdfNativeObject token; 253 while (current < trailerEnd) { 254 token.reset(); 255 const unsigned char* previous = current; 256 current = nextObject(current, trailerEnd, &token, NULL, NULL); 257 if (!token.isInteger()) { 258 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, 259 "Done readCrossReferenceSection", NULL, NULL); 260 return previous; 261 } 262 263 int startId = (int)token.intValue(); 264 token.reset(); 265 current = nextObject(current, trailerEnd, &token, NULL, NULL); 266 267 if (!token.isInteger()) { 268 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection", 269 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); 270 return current; 271 } 272 273 int entries = (int)token.intValue(); 274 275 for (int i = 0; i < entries; i++) { 276 token.reset(); 277 current = nextObject(current, trailerEnd, &token, NULL, NULL); 278 if (!token.isInteger()) { 279 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 280 "readCrossReferenceSection", 281 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); 282 return current; 283 } 284 int offset = (int)token.intValue(); 285 286 token.reset(); 287 current = nextObject(current, trailerEnd, &token, NULL, NULL); 288 if (!token.isInteger()) { 289 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 290 "readCrossReferenceSection", 291 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL); 292 return current; 293 } 294 int generation = (int)token.intValue(); 295 296 token.reset(); 297 current = nextObject(current, trailerEnd, &token, NULL, NULL); 298 if (!token.isKeyword() || token.lenstr() != 1 || 299 (*token.c_str() != 'f' && *token.c_str() != 'n')) { 300 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 301 "readCrossReferenceSection: f or n expected", 302 &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); 303 return current; 304 } 305 306 this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f'); 307 } 308 } 309 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue, 310 "Unexpected end of readCrossReferenceSection", NULL, NULL); 311 return current; 312} 313 314const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart, 315 const unsigned char* trailerEnd, 316 bool storeCatalog, long* prev, bool skipKeyword) { 317 *prev = -1; 318 319 const unsigned char* current = trailerStart; 320 if (!skipKeyword) { 321 SkPdfNativeObject trailerKeyword; 322 // Use null allocator, and let it just fail if memory, it should not crash. 323 current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL); 324 325 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() || 326 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) { 327 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 328 "readTrailer: trailer keyword expected", 329 &trailerKeyword, 330 SkPdfNativeObject::kKeyword_PdfObjectType, NULL); 331 return current; 332 } 333 } 334 335 SkPdfNativeObject token; 336 current = nextObject(current, trailerEnd, &token, fAllocator, NULL); 337 if (!token.isDictionary()) { 338 return current; 339 } 340 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token; 341 if (!trailer->valid()) { 342 return current; 343 } 344 345 if (storeCatalog) { 346 SkPdfNativeObject* ref = trailer->Root(NULL); 347 if (ref == NULL || !ref->isReference()) { 348 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 349 "readTrailer: unexpected root reference", 350 ref, SkPdfNativeObject::kReference_PdfObjectType, NULL); 351 return current; 352 } 353 fRootCatalogRef = ref; 354 } 355 356 if (trailer->has_Prev()) { 357 *prev = (long)trailer->Prev(NULL); 358 } 359 360 return current; 361} 362 363void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) { 364 // TODO(edisonn): security here, verify id 365 while (fObjects.count() < id + 1) { 366 this->reset(fObjects.append()); 367 } 368 369 fObjects[id].fOffset = offset; 370 fObjects[id].fObj = NULL; 371 fObjects[id].fResolvedReference = NULL; 372 fObjects[id].fIsReferenceResolved = false; 373} 374 375SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) { 376 long startOffset = fObjects[id].fOffset; 377 //long endOffset = fObjects[id].fOffsetEnd; 378 // TODO(edisonn): use hinted endOffset 379 const unsigned char* current = fFileContent + startOffset; 380 const unsigned char* end = fFileContent + fContentLength; 381 382 SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this); 383 384 SkPdfNativeObject idObj; 385 SkPdfNativeObject generationObj; 386 SkPdfNativeObject objKeyword; 387 SkPdfNativeObject* dict = fAllocator->allocObject(); 388 389 current = nextObject(current, end, &idObj, NULL, NULL); 390 if (current >= end) { 391 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id", 392 NULL, NULL); 393 return NULL; 394 } 395 396 current = nextObject(current, end, &generationObj, NULL, NULL); 397 if (current >= end) { 398 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, 399 "reading generation", NULL, NULL); 400 return NULL; 401 } 402 403 current = nextObject(current, end, &objKeyword, NULL, NULL); 404 if (current >= end) { 405 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, 406 "reading keyword obj", NULL, NULL); 407 return NULL; 408 } 409 410 if (!idObj.isInteger() || id != idObj.intValue()) { 411 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id", 412 &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); 413 } 414 415 // TODO(edisonn): verify that the generation is the right one 416 if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) { 417 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 418 "readObject: unexpected generation", 419 &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL); 420 } 421 422 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) { 423 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, 424 "readObject: unexpected obj keyword", 425 &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL); 426 } 427 428 current = nextObject(current, end, dict, fAllocator, this); 429 430 // TODO(edisonn): report warning/error - verify that the last token is endobj 431 432 return dict; 433} 434 435void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) { 436 SkPdfArray* kids = tree->Kids(this); 437 if (kids == NULL) { 438 *fPages.append() = (SkPdfPageObjectDictionary*)tree; 439 return; 440 } 441 442 int cnt = (int) kids->size(); 443 for (int i = 0; i < cnt; i++) { 444 SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i)); 445 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) { 446 *fPages.append() = (SkPdfPageObjectDictionary*)obj; 447 } else { 448 // TODO(edisonn): verify that it is a page tree indeed 449 fillPages((SkPdfPageTreeNodeDictionary*)obj); 450 } 451 } 452} 453 454int SkPdfNativeDoc::pages() const { 455 return fPages.count(); 456} 457 458SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) { 459 SkASSERT(page >= 0 && page < fPages.count()); 460 return fPages[page]; 461} 462 463 464SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) { 465 SkASSERT(page >= 0 && page < fPages.count()); 466 return fPages[page]->Resources(this); 467} 468 469// TODO(edisonn): Partial implemented. 470// Move the logics directly in the code generator for inheritable and default values? 471SkRect SkPdfNativeDoc::MediaBox(int page) { 472 SkPdfPageObjectDictionary* current = fPages[page]; 473 while (!current->has_MediaBox() && current->has_Parent()) { 474 current = (SkPdfPageObjectDictionary*)current->Parent(this); 475 } 476 if (current) { 477 return current->MediaBox(this); 478 } 479 return SkRect::MakeEmpty(); 480} 481 482size_t SkPdfNativeDoc::objects() const { 483 return fObjects.count(); 484} 485 486SkPdfNativeObject* SkPdfNativeDoc::object(int i) { 487 SkASSERT(!(i < 0 || i > fObjects.count())); 488 489 if (i < 0 || i > fObjects.count()) { 490 return NULL; 491 } 492 493 if (fObjects[i].fObj == NULL) { 494 fObjects[i].fObj = readObject(i); 495 // TODO(edisonn): For perf, when we read the cross reference sections, we should take 496 // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad 497 // stream, and fail quickly, in case we default to sequential stream read. 498 } 499 500 return fObjects[i].fObj; 501} 502 503const SkPdfMapper* SkPdfNativeDoc::mapper() const { 504 return fMapper; 505} 506 507SkPdfReal* SkPdfNativeDoc::createReal(double value) const { 508 SkPdfNativeObject* obj = fAllocator->allocObject(); 509 SkPdfNativeObject::makeReal(value, obj); 510 TRACK_OBJECT_SRC(obj); 511 return (SkPdfReal*)obj; 512} 513 514SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const { 515 SkPdfNativeObject* obj = fAllocator->allocObject(); 516 SkPdfNativeObject::makeInteger(value, obj); 517 TRACK_OBJECT_SRC(obj); 518 return (SkPdfInteger*)obj; 519} 520 521SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const { 522 SkPdfNativeObject* obj = fAllocator->allocObject(); 523 SkPdfNativeObject::makeString(sz, len, obj); 524 TRACK_OBJECT_SRC(obj); 525 return (SkPdfString*)obj; 526} 527 528SkPdfAllocator* SkPdfNativeDoc::allocator() const { 529 return fAllocator; 530} 531 532SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) { 533 if (ref && ref->isReference()) { 534 int id = ref->referenceId(); 535 // TODO(edisonn): generation/updates not supported now 536 //int gen = ref->referenceGeneration(); 537 538 // TODO(edisonn): verify id and gen expected 539 if (id < 0 || id >= fObjects.count()) { 540 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, 541 "resolve reference id out of bounds", NULL, NULL); 542 return NULL; 543 } 544 545 if (fObjects[id].fIsReferenceResolved) { 546 SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity, 547 kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL); 548 return fObjects[id].fResolvedReference; 549 } 550 551 // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow 552 // here unless we check for resolved reference on next line. 553 // Determine if the pdf is corrupted, or we have a bug here. 554 555 // Avoids recursive calls 556 fObjects[id].fIsReferenceResolved = true; 557 558 if (fObjects[id].fObj == NULL) { 559 fObjects[id].fObj = readObject(id); 560 } 561 562 if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) { 563 if (!fObjects[id].fObj->isReference()) { 564 fObjects[id].fResolvedReference = fObjects[id].fObj; 565 } else { 566 fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj); 567 } 568 } 569 570 return fObjects[id].fResolvedReference; 571 } 572 573 return (SkPdfNativeObject*)ref; 574} 575 576size_t SkPdfNativeDoc::bytesUsed() const { 577 return fAllocator->bytesUsed() + 578 fContentLength + 579 fObjects.count() * sizeof(PublicObjectEntry) + 580 fPages.count() * sizeof(SkPdfPageObjectDictionary*) + 581 sizeof(*this); 582} 583