1/* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkPDFMetadata.h" 9#include "SkPDFTypes.h" 10#include <utility> 11 12#ifdef SK_PDF_GENERATE_PDFA 13#include "SkMD5.h" 14#endif 15 16static SkString pdf_date(const SkTime::DateTime& dt) { 17 int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes); 18 char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-'; 19 int timeZoneHours = SkTAbs(timeZoneMinutes) / 60; 20 timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60; 21 return SkStringPrintf( 22 "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'", 23 static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth), 24 static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour), 25 static_cast<unsigned>(dt.fMinute), 26 static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours, 27 timeZoneMinutes); 28} 29 30SkPDFObject* SkPDFMetadata::createDocumentInformationDict() const { 31 SkAutoTUnref<SkPDFDict> dict(new SkPDFDict); 32 static const char* keys[] = { 33 "Title", "Author", "Subject", "Keywords", "Creator"}; 34 for (const char* key : keys) { 35 for (const SkDocument::Attribute& keyValue : fInfo) { 36 if (keyValue.fKey.equals(key)) { 37 dict->insertString(key, keyValue.fValue); 38 } 39 } 40 } 41 dict->insertString("Producer", "Skia/PDF"); 42 if (fCreation) { 43 dict->insertString("CreationDate", pdf_date(*fCreation.get())); 44 } 45 if (fModified) { 46 dict->insertString("ModDate", pdf_date(*fModified.get())); 47 } 48 return dict.detach(); 49} 50 51#ifdef SK_PDF_GENERATE_PDFA 52SkPDFMetadata::UUID SkPDFMetadata::uuid() const { 53 // The main requirement is for the UUID to be unique; the exact 54 // format of the data that will be hashed is not important. 55 SkMD5 md5; 56 const char uuidNamespace[] = "org.skia.pdf\n"; 57 md5.write(uuidNamespace, strlen(uuidNamespace)); 58 SkMSec msec = SkTime::GetMSecs(); 59 md5.write(&msec, sizeof(msec)); 60 SkTime::DateTime dateTime; 61 SkTime::GetDateTime(&dateTime); 62 md5.write(&dateTime, sizeof(dateTime)); 63 if (fCreation) { 64 md5.write(fCreation.get(), sizeof(fCreation)); 65 } 66 if (fModified) { 67 md5.write(fModified.get(), sizeof(fModified)); 68 } 69 for (const auto& kv : fInfo) { 70 md5.write(kv.fKey.c_str(), kv.fKey.size()); 71 md5.write("\037", 1); 72 md5.write(kv.fValue.c_str(), kv.fValue.size()); 73 md5.write("\036", 1); 74 } 75 SkMD5::Digest digest; 76 md5.finish(digest); 77 // See RFC 4122, page 6-7. 78 digest.data[6] = (digest.data[6] & 0x0F) | 0x30; 79 digest.data[8] = (digest.data[6] & 0x3F) | 0x80; 80 static_assert(sizeof(digest) == sizeof(UUID), "uuid_size"); 81 SkPDFMetadata::UUID uuid; 82 memcpy(&uuid, &digest, sizeof(digest)); 83 return uuid; 84} 85 86SkPDFObject* SkPDFMetadata::CreatePdfId(const UUID& doc, const UUID& instance) { 87 // /ID [ <81b14aafa313db63dbd6f981e49f94f4> 88 // <81b14aafa313db63dbd6f981e49f94f4> ] 89 SkAutoTUnref<SkPDFArray> array(new SkPDFArray); 90 static_assert(sizeof(UUID) == 16, "uuid_size"); 91 array->appendString( 92 SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID))); 93 array->appendString( 94 SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID))); 95 return array.detach(); 96} 97 98// Improvement on SkStringPrintf to allow for arbitrarily long output. 99// TODO: replace SkStringPrintf. 100static SkString sk_string_printf(const char* format, ...) { 101#ifdef SK_BUILD_FOR_WIN 102 va_list args; 103 va_start(args, format); 104 char buffer[1024]; 105 int length = _vsnprintf_s(buffer, sizeof(buffer), _TRUNCATE, format, args); 106 va_end(args); 107 if (length >= 0 && length < (int)sizeof(buffer)) { 108 return SkString(buffer, length); 109 } 110 va_start(args, format); 111 length = _vscprintf(format, args); 112 va_end(args); 113 114 SkString string((size_t)length); 115 va_start(args, format); 116 SkDEBUGCODE(int check = ) _vsnprintf_s(string.writable_str(), length + 1, 117 _TRUNCATE, format, args); 118 va_end(args); 119 SkASSERT(check == length); 120 SkASSERT(string[length] == '\0'); 121 return std::move(string); 122#else // C99/C++11 standard vsnprintf 123 // TODO: When all compilers support this, remove windows-specific code. 124 va_list args; 125 va_start(args, format); 126 char buffer[1024]; 127 int length = vsnprintf(buffer, sizeof(buffer), format, args); 128 va_end(args); 129 if (length < 0) { 130 return SkString(); 131 } 132 if (length < (int)sizeof(buffer)) { 133 return SkString(buffer, length); 134 } 135 SkString string((size_t)length); 136 va_start(args, format); 137 SkDEBUGCODE(int check = ) 138 vsnprintf(string.writable_str(), length + 1, format, args); 139 va_end(args); 140 SkASSERT(check == length); 141 SkASSERT(string[length] == '\0'); 142 return std::move(string); 143#endif 144} 145 146static const SkString get(const SkTArray<SkDocument::Attribute>& info, 147 const char* key) { 148 for (const auto& keyValue : info) { 149 if (keyValue.fKey.equals(key)) { 150 return keyValue.fValue; 151 } 152 } 153 return SkString(); 154} 155 156#define HEXIFY(INPUT_PTR, OUTPUT_PTR, HEX_STRING, BYTE_COUNT) \ 157 do { \ 158 for (int i = 0; i < (BYTE_COUNT); ++i) { \ 159 uint8_t value = *(INPUT_PTR)++; \ 160 *(OUTPUT_PTR)++ = (HEX_STRING)[value >> 4]; \ 161 *(OUTPUT_PTR)++ = (HEX_STRING)[value & 0xF]; \ 162 } \ 163 } while (false) 164static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) { 165 // 8-4-4-4-12 166 char buffer[36]; // [32 + 4] 167 static const char gHex[] = "0123456789abcdef"; 168 SkASSERT(strlen(gHex) == 16); 169 char* ptr = buffer; 170 const uint8_t* data = uuid.fData; 171 HEXIFY(data, ptr, gHex, 4); 172 *ptr++ = '-'; 173 HEXIFY(data, ptr, gHex, 2); 174 *ptr++ = '-'; 175 HEXIFY(data, ptr, gHex, 2); 176 *ptr++ = '-'; 177 HEXIFY(data, ptr, gHex, 2); 178 *ptr++ = '-'; 179 HEXIFY(data, ptr, gHex, 6); 180 SkASSERT(ptr == buffer + 36); 181 SkASSERT(data == uuid.fData + 16); 182 return SkString(buffer, 36); 183} 184#undef HEXIFY 185 186namespace { 187class PDFXMLObject final : public SkPDFObject { 188public: 189 PDFXMLObject(SkString xml) : fXML(std::move(xml)) {} 190 void emitObject(SkWStream* stream, 191 const SkPDFObjNumMap& omap, 192 const SkPDFSubstituteMap& smap) const override { 193 SkPDFDict dict("Metadata"); 194 dict.insertName("Subtype", "XML"); 195 dict.insertInt("Length", fXML.size()); 196 dict.emitObject(stream, omap, smap); 197 static const char streamBegin[] = " stream\n"; 198 stream->write(streamBegin, strlen(streamBegin)); 199 // Do not compress this. The standard requires that a 200 // program that does not understand PDF can grep for 201 // "<?xpacket" and extracť the entire XML. 202 stream->write(fXML.c_str(), fXML.size()); 203 static const char streamEnd[] = "\nendstream"; 204 stream->write(streamEnd, strlen(streamEnd)); 205 } 206 207private: 208 const SkString fXML; 209}; 210} // namespace 211 212static int count_xml_escape_size(const SkString& input) { 213 int extra = 0; 214 for (size_t i = 0; i < input.size(); ++i) { 215 if (input[i] == '&') { 216 extra += 4; // strlen("&") - strlen("&") 217 } else if (input[i] == '<') { 218 extra += 3; // strlen("<") - strlen("<") 219 } 220 } 221 return extra; 222} 223 224const SkString escape_xml(const SkString& input, 225 const char* before = nullptr, 226 const char* after = nullptr) { 227 if (input.size() == 0) { 228 return input; 229 } 230 // "&" --> "&" and "<" --> "<" 231 // text is assumed to be in UTF-8 232 // all strings are xml content, not attribute values. 233 size_t beforeLen = before ? strlen(before) : 0; 234 size_t afterLen = after ? strlen(after) : 0; 235 int extra = count_xml_escape_size(input); 236 SkString output(input.size() + extra + beforeLen + afterLen); 237 char* out = output.writable_str(); 238 if (before) { 239 strncpy(out, before, beforeLen); 240 out += beforeLen; 241 } 242 static const char kAmp[] = "&"; 243 static const char kLt[] = "<"; 244 for (size_t i = 0; i < input.size(); ++i) { 245 if (input[i] == '&') { 246 strncpy(out, kAmp, strlen(kAmp)); 247 out += strlen(kAmp); 248 } else if (input[i] == '<') { 249 strncpy(out, kLt, strlen(kLt)); 250 out += strlen(kLt); 251 } else { 252 *out++ = input[i]; 253 } 254 } 255 if (after) { 256 strncpy(out, after, afterLen); 257 out += afterLen; 258 } 259 // Validate that we haven't written outside of our string. 260 SkASSERT(out == &output.writable_str()[output.size()]); 261 *out = '\0'; 262 return std::move(output); 263} 264 265SkPDFObject* SkPDFMetadata::createXMPObject(const UUID& doc, 266 const UUID& instance) const { 267 static const char templateString[] = 268 "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" 269 "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" 270 " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, " 271 "2012/08/23-13:03:03\">\n" 272 "<rdf:RDF " 273 "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" 274 "<rdf:Description rdf:about=\"\"\n" 275 " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" 276 " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" 277 " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n" 278 " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" 279 " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" 280 "<pdfaid:part>2</pdfaid:part>\n" 281 "<pdfaid:conformance>B</pdfaid:conformance>\n" 282 "%s" // ModifyDate 283 "%s" // CreateDate 284 "%s" // MetadataDate 285 "%s" // xmp:CreatorTool 286 "<dc:format>application/pdf</dc:format>\n" 287 "%s" // dc:title 288 "%s" // dc:description 289 "%s" // author 290 "%s" // keywords 291 "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n" 292 "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n" 293 "<pdf:Producer>Skia/PDF</pdf:Producer>\n" 294 "%s" // pdf:Keywords 295 "</rdf:Description>\n" 296 "</rdf:RDF>\n" 297 "</x:xmpmeta>\n" // Note: the standard suggests 4k of padding. 298 "<?xpacket end=\"w\"?>\n"; 299 300 SkString creationDate; 301 SkString modificationDate; 302 SkString metadataDate; 303 if (fCreation) { 304 SkString tmp; 305 fCreation->toISO8601(&tmp); 306 SkASSERT(0 == count_xml_escape_size(tmp)); 307 // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape 308 creationDate = sk_string_printf("<xmp:CreateDate>%s</xmp:CreateDate>\n", 309 tmp.c_str()); 310 } 311 if (fModified) { 312 SkString tmp; 313 fModified->toISO8601(&tmp); 314 SkASSERT(0 == count_xml_escape_size(tmp)); 315 modificationDate = sk_string_printf( 316 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str()); 317 metadataDate = sk_string_printf( 318 "<xmp:MetadataDate>%s</xmp:MetadataDate>\n", tmp.c_str()); 319 } 320 321 SkString title = 322 escape_xml(get(fInfo, "Title"), "<dc:title><rdf:Alt><rdf:li>", 323 "</rdf:li></rdf:Alt></dc:title>\n"); 324 SkString author = 325 escape_xml(get(fInfo, "Author"), "<dc:creator><rdf:Bag><rdf:li>", 326 "</rdf:li></rdf:Bag></dc:creator>\n"); 327 // TODO: in theory, XMP can support multiple authors. Split on a delimiter? 328 SkString subject = escape_xml(get(fInfo, "Subject"), 329 "<dc:description><rdf:Alt><rdf:li>", 330 "</rdf:li></rdf:Alt></dc:description>\n"); 331 SkString keywords1 = 332 escape_xml(get(fInfo, "Keywords"), "<dc:subject><rdf:Bag><rdf:li>", 333 "</rdf:li></rdf:Bag></dc:subject>\n"); 334 SkString keywords2 = escape_xml(get(fInfo, "Keywords"), "<pdf:Keywords>", 335 "</pdf:Keywords>\n"); 336 337 // TODO: in theory, keywords can be a list too. 338 SkString creator = escape_xml(get(fInfo, "Creator"), "<xmp:CreatorTool>", 339 "</xmp:CreatorTool>\n"); 340 SkString documentID = uuid_to_string(doc); // no need to escape 341 SkASSERT(0 == count_xml_escape_size(documentID)); 342 SkString instanceID = uuid_to_string(instance); 343 SkASSERT(0 == count_xml_escape_size(instanceID)); 344 return new PDFXMLObject(sk_string_printf( 345 templateString, modificationDate.c_str(), creationDate.c_str(), 346 metadataDate.c_str(), creator.c_str(), title.c_str(), 347 subject.c_str(), author.c_str(), keywords1.c_str(), 348 documentID.c_str(), instanceID.c_str(), keywords2.c_str())); 349} 350 351#endif // SK_PDF_GENERATE_PDFA 352