1/* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkMD5.h" 9#include "SkMilestone.h" 10#include "SkPDFMetadata.h" 11#include "SkPDFTypes.h" 12#include "SkUtils.h" 13 14#include <utility> 15 16#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X) 17#define SKPDF_STRING_IMPL(X) #X 18#define SKPDF_PRODUCER "Skia/PDF m" SKPDF_STRING(SK_MILESTONE) 19#define SKPDF_CUSTOM_PRODUCER_KEY "ProductionLibrary" 20 21static SkString pdf_date(const SkTime::DateTime& dt) { 22 int timeZoneMinutes = SkToInt(dt.fTimeZoneMinutes); 23 char timezoneSign = timeZoneMinutes >= 0 ? '+' : '-'; 24 int timeZoneHours = SkTAbs(timeZoneMinutes) / 60; 25 timeZoneMinutes = SkTAbs(timeZoneMinutes) % 60; 26 return SkStringPrintf( 27 "D:%04u%02u%02u%02u%02u%02u%c%02d'%02d'", 28 static_cast<unsigned>(dt.fYear), static_cast<unsigned>(dt.fMonth), 29 static_cast<unsigned>(dt.fDay), static_cast<unsigned>(dt.fHour), 30 static_cast<unsigned>(dt.fMinute), 31 static_cast<unsigned>(dt.fSecond), timezoneSign, timeZoneHours, 32 timeZoneMinutes); 33} 34 35namespace { 36static const struct { 37 const char* const key; 38 SkString SkDocument::PDFMetadata::*const valuePtr; 39} gMetadataKeys[] = { 40 {"Title", &SkDocument::PDFMetadata::fTitle}, 41 {"Author", &SkDocument::PDFMetadata::fAuthor}, 42 {"Subject", &SkDocument::PDFMetadata::fSubject}, 43 {"Keywords", &SkDocument::PDFMetadata::fKeywords}, 44 {"Creator", &SkDocument::PDFMetadata::fCreator}, 45}; 46} // namespace 47 48sk_sp<SkPDFObject> SkPDFMetadata::MakeDocumentInformationDict( 49 const SkDocument::PDFMetadata& metadata) { 50 auto dict = sk_make_sp<SkPDFDict>(); 51 for (const auto keyValuePtr : gMetadataKeys) { 52 const SkString& value = metadata.*(keyValuePtr.valuePtr); 53 if (value.size() > 0) { 54 dict->insertString(keyValuePtr.key, value); 55 } 56 } 57 if (metadata.fProducer.isEmpty()) { 58 dict->insertString("Producer", SKPDF_PRODUCER); 59 } else { 60 dict->insertString("Producer", metadata.fProducer); 61 dict->insertString(SKPDF_CUSTOM_PRODUCER_KEY, SKPDF_PRODUCER); 62 } 63 if (metadata.fCreation.fEnabled) { 64 dict->insertString("CreationDate", 65 pdf_date(metadata.fCreation.fDateTime)); 66 } 67 if (metadata.fModified.fEnabled) { 68 dict->insertString("ModDate", pdf_date(metadata.fModified.fDateTime)); 69 } 70 return dict; 71} 72 73SkPDFMetadata::UUID SkPDFMetadata::CreateUUID( 74 const SkDocument::PDFMetadata& metadata) { 75 // The main requirement is for the UUID to be unique; the exact 76 // format of the data that will be hashed is not important. 77 SkMD5 md5; 78 const char uuidNamespace[] = "org.skia.pdf\n"; 79 md5.writeText(uuidNamespace); 80 double msec = SkTime::GetMSecs(); 81 md5.write(&msec, sizeof(msec)); 82 SkTime::DateTime dateTime; 83 SkTime::GetDateTime(&dateTime); 84 md5.write(&dateTime, sizeof(dateTime)); 85 if (metadata.fCreation.fEnabled) { 86 md5.write(&metadata.fCreation.fDateTime, 87 sizeof(metadata.fCreation.fDateTime)); 88 } 89 if (metadata.fModified.fEnabled) { 90 md5.write(&metadata.fModified.fDateTime, 91 sizeof(metadata.fModified.fDateTime)); 92 } 93 94 for (const auto keyValuePtr : gMetadataKeys) { 95 md5.writeText(keyValuePtr.key); 96 md5.write("\037", 1); 97 const SkString& value = metadata.*(keyValuePtr.valuePtr); 98 md5.write(value.c_str(), value.size()); 99 md5.write("\036", 1); 100 } 101 SkMD5::Digest digest; 102 md5.finish(digest); 103 // See RFC 4122, page 6-7. 104 digest.data[6] = (digest.data[6] & 0x0F) | 0x30; 105 digest.data[8] = (digest.data[6] & 0x3F) | 0x80; 106 static_assert(sizeof(digest) == sizeof(UUID), "uuid_size"); 107 SkPDFMetadata::UUID uuid; 108 memcpy(&uuid, &digest, sizeof(digest)); 109 return uuid; 110} 111 112sk_sp<SkPDFObject> SkPDFMetadata::MakePdfId(const UUID& doc, 113 const UUID& instance) { 114 // /ID [ <81b14aafa313db63dbd6f981e49f94f4> 115 // <81b14aafa313db63dbd6f981e49f94f4> ] 116 auto array = sk_make_sp<SkPDFArray>(); 117 static_assert(sizeof(SkPDFMetadata::UUID) == 16, "uuid_size"); 118 array->appendString( 119 SkString(reinterpret_cast<const char*>(&doc), sizeof(UUID))); 120 array->appendString( 121 SkString(reinterpret_cast<const char*>(&instance), sizeof(UUID))); 122 return array; 123} 124 125// Convert a block of memory to hexadecimal. Input and output pointers will be 126// moved to end of the range. 127static void hexify(const uint8_t** inputPtr, char** outputPtr, int count) { 128 SkASSERT(inputPtr && *inputPtr); 129 SkASSERT(outputPtr && *outputPtr); 130 while (count-- > 0) { 131 uint8_t value = *(*inputPtr)++; 132 *(*outputPtr)++ = SkHexadecimalDigits::gLower[value >> 4]; 133 *(*outputPtr)++ = SkHexadecimalDigits::gLower[value & 0xF]; 134 } 135} 136 137static SkString uuid_to_string(const SkPDFMetadata::UUID& uuid) { 138 // 8-4-4-4-12 139 char buffer[36]; // [32 + 4] 140 char* ptr = buffer; 141 const uint8_t* data = uuid.fData; 142 hexify(&data, &ptr, 4); 143 *ptr++ = '-'; 144 hexify(&data, &ptr, 2); 145 *ptr++ = '-'; 146 hexify(&data, &ptr, 2); 147 *ptr++ = '-'; 148 hexify(&data, &ptr, 2); 149 *ptr++ = '-'; 150 hexify(&data, &ptr, 6); 151 SkASSERT(ptr == buffer + 36); 152 SkASSERT(data == uuid.fData + 16); 153 return SkString(buffer, 36); 154} 155 156namespace { 157class PDFXMLObject final : public SkPDFObject { 158public: 159 PDFXMLObject(SkString xml) : fXML(std::move(xml)) {} 160 void emitObject(SkWStream* stream, 161 const SkPDFObjNumMap& omap) const override { 162 SkPDFDict dict("Metadata"); 163 dict.insertName("Subtype", "XML"); 164 dict.insertInt("Length", fXML.size()); 165 dict.emitObject(stream, omap); 166 static const char streamBegin[] = " stream\n"; 167 stream->writeText(streamBegin); 168 // Do not compress this. The standard requires that a 169 // program that does not understand PDF can grep for 170 // "<?xpacket" and extract the entire XML. 171 stream->write(fXML.c_str(), fXML.size()); 172 static const char streamEnd[] = "\nendstream"; 173 stream->writeText(streamEnd); 174 } 175 176private: 177 const SkString fXML; 178}; 179} // namespace 180 181static int count_xml_escape_size(const SkString& input) { 182 int extra = 0; 183 for (size_t i = 0; i < input.size(); ++i) { 184 if (input[i] == '&') { 185 extra += 4; // strlen("&") - strlen("&") 186 } else if (input[i] == '<') { 187 extra += 3; // strlen("<") - strlen("<") 188 } 189 } 190 return extra; 191} 192 193const SkString escape_xml(const SkString& input, 194 const char* before = nullptr, 195 const char* after = nullptr) { 196 if (input.size() == 0) { 197 return input; 198 } 199 // "&" --> "&" and "<" --> "<" 200 // text is assumed to be in UTF-8 201 // all strings are xml content, not attribute values. 202 size_t beforeLen = before ? strlen(before) : 0; 203 size_t afterLen = after ? strlen(after) : 0; 204 int extra = count_xml_escape_size(input); 205 SkString output(input.size() + extra + beforeLen + afterLen); 206 char* out = output.writable_str(); 207 if (before) { 208 strncpy(out, before, beforeLen); 209 out += beforeLen; 210 } 211 static const char kAmp[] = "&"; 212 static const char kLt[] = "<"; 213 for (size_t i = 0; i < input.size(); ++i) { 214 if (input[i] == '&') { 215 strncpy(out, kAmp, strlen(kAmp)); 216 out += strlen(kAmp); 217 } else if (input[i] == '<') { 218 strncpy(out, kLt, strlen(kLt)); 219 out += strlen(kLt); 220 } else { 221 *out++ = input[i]; 222 } 223 } 224 if (after) { 225 strncpy(out, after, afterLen); 226 out += afterLen; 227 } 228 // Validate that we haven't written outside of our string. 229 SkASSERT(out == &output.writable_str()[output.size()]); 230 *out = '\0'; 231 return output; 232} 233 234sk_sp<SkPDFObject> SkPDFMetadata::MakeXMPObject( 235 const SkDocument::PDFMetadata& metadata, 236 const UUID& doc, 237 const UUID& instance) { 238 static const char templateString[] = 239 "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" 240 "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" 241 " x:xmptk=\"Adobe XMP Core 5.4-c005 78.147326, " 242 "2012/08/23-13:03:03\">\n" 243 "<rdf:RDF " 244 "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" 245 "<rdf:Description rdf:about=\"\"\n" 246 " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" 247 " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" 248 " xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\"\n" 249 " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" 250 " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" 251 "<pdfaid:part>2</pdfaid:part>\n" 252 "<pdfaid:conformance>B</pdfaid:conformance>\n" 253 "%s" // ModifyDate 254 "%s" // CreateDate 255 "%s" // xmp:CreatorTool 256 "<dc:format>application/pdf</dc:format>\n" 257 "%s" // dc:title 258 "%s" // dc:description 259 "%s" // author 260 "%s" // keywords 261 "<xmpMM:DocumentID>uuid:%s</xmpMM:DocumentID>\n" 262 "<xmpMM:InstanceID>uuid:%s</xmpMM:InstanceID>\n" 263 "%s" // pdf:Producer 264 "%s" // pdf:Keywords 265 "</rdf:Description>\n" 266 "</rdf:RDF>\n" 267 "</x:xmpmeta>\n" // Note: the standard suggests 4k of padding. 268 "<?xpacket end=\"w\"?>\n"; 269 270 SkString creationDate; 271 SkString modificationDate; 272 if (metadata.fCreation.fEnabled) { 273 SkString tmp; 274 metadata.fCreation.fDateTime.toISO8601(&tmp); 275 SkASSERT(0 == count_xml_escape_size(tmp)); 276 // YYYY-mm-ddTHH:MM:SS[+|-]ZZ:ZZ; no need to escape 277 creationDate = SkStringPrintf("<xmp:CreateDate>%s</xmp:CreateDate>\n", 278 tmp.c_str()); 279 } 280 if (metadata.fModified.fEnabled) { 281 SkString tmp; 282 metadata.fModified.fDateTime.toISO8601(&tmp); 283 SkASSERT(0 == count_xml_escape_size(tmp)); 284 modificationDate = SkStringPrintf( 285 "<xmp:ModifyDate>%s</xmp:ModifyDate>\n", tmp.c_str()); 286 } 287 SkString title = 288 escape_xml(metadata.fTitle, 289 "<dc:title><rdf:Alt><rdf:li xml:lang=\"x-default\">", 290 "</rdf:li></rdf:Alt></dc:title>\n"); 291 SkString author = 292 escape_xml(metadata.fAuthor, "<dc:creator><rdf:Bag><rdf:li>", 293 "</rdf:li></rdf:Bag></dc:creator>\n"); 294 // TODO: in theory, XMP can support multiple authors. Split on a delimiter? 295 SkString subject = escape_xml( 296 metadata.fSubject, 297 "<dc:description><rdf:Alt><rdf:li xml:lang=\"x-default\">", 298 "</rdf:li></rdf:Alt></dc:description>\n"); 299 SkString keywords1 = 300 escape_xml(metadata.fKeywords, "<dc:subject><rdf:Bag><rdf:li>", 301 "</rdf:li></rdf:Bag></dc:subject>\n"); 302 SkString keywords2 = escape_xml(metadata.fKeywords, "<pdf:Keywords>", 303 "</pdf:Keywords>\n"); 304 // TODO: in theory, keywords can be a list too. 305 306 SkString producer("<pdf:Producer>" SKPDF_PRODUCER "</pdf:Producer>\n"); 307 if (!metadata.fProducer.isEmpty()) { 308 // TODO: register a developer prefix to make 309 // <skia:SKPDF_CUSTOM_PRODUCER_KEY> a real XML tag. 310 producer = escape_xml( 311 metadata.fProducer, "<pdf:Producer>", 312 "</pdf:Producer>\n<!-- <skia:" SKPDF_CUSTOM_PRODUCER_KEY ">" 313 SKPDF_PRODUCER "</skia:" SKPDF_CUSTOM_PRODUCER_KEY "> -->\n"); 314 } 315 316 SkString creator = escape_xml(metadata.fCreator, "<xmp:CreatorTool>", 317 "</xmp:CreatorTool>\n"); 318 SkString documentID = uuid_to_string(doc); // no need to escape 319 SkASSERT(0 == count_xml_escape_size(documentID)); 320 SkString instanceID = uuid_to_string(instance); 321 SkASSERT(0 == count_xml_escape_size(instanceID)); 322 return sk_make_sp<PDFXMLObject>(SkStringPrintf( 323 templateString, modificationDate.c_str(), creationDate.c_str(), 324 creator.c_str(), title.c_str(), subject.c_str(), author.c_str(), 325 keywords1.c_str(), documentID.c_str(), instanceID.c_str(), 326 producer.c_str(), keywords2.c_str())); 327} 328 329#undef SKPDF_CUSTOM_PRODUCER_KEY 330#undef SKPDF_PRODUCER 331#undef SKPDF_STRING 332#undef SKPDF_STRING_IMPL 333