1/* 2 * Copyright 2011 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkPDFMakeToUnicodeCmap.h" 9#include "SkPDFUtils.h" 10#include "SkUtils.h" 11 12static void append_tounicode_header(SkDynamicMemoryWStream* cmap, 13 bool multibyte) { 14 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. 15 // It's there to prevent old version Adobe Readers from malfunctioning. 16 const char* kHeader = 17 "/CIDInit /ProcSet findresource begin\n" 18 "12 dict begin\n" 19 "begincmap\n"; 20 cmap->writeText(kHeader); 21 22 // The /CIDSystemInfo must be consistent to the one in 23 // SkPDFFont::populateCIDFont(). 24 // We can not pass over the system info object here because the format is 25 // different. This is not a reference object. 26 const char* kSysInfo = 27 "/CIDSystemInfo\n" 28 "<< /Registry (Adobe)\n" 29 "/Ordering (UCS)\n" 30 "/Supplement 0\n" 31 ">> def\n"; 32 cmap->writeText(kSysInfo); 33 34 // The CMapName must be consistent to /CIDSystemInfo above. 35 // /CMapType 2 means ToUnicode. 36 // Codespace range just tells the PDF processor the valid range. 37 const char* kTypeInfoHeader = 38 "/CMapName /Adobe-Identity-UCS def\n" 39 "/CMapType 2 def\n" 40 "1 begincodespacerange\n"; 41 cmap->writeText(kTypeInfoHeader); 42 if (multibyte) { 43 cmap->writeText("<0000> <FFFF>\n"); 44 } else { 45 cmap->writeText("<00> <FF>\n"); 46 } 47 cmap->writeText("endcodespacerange\n"); 48} 49 50static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { 51 const char kFooter[] = 52 "endcmap\n" 53 "CMapName currentdict /CMap defineresource pop\n" 54 "end\n" 55 "end"; 56 cmap->writeText(kFooter); 57} 58 59namespace { 60struct BFChar { 61 SkGlyphID fGlyphId; 62 SkUnichar fUnicode; 63}; 64 65struct BFRange { 66 SkGlyphID fStart; 67 SkGlyphID fEnd; 68 SkUnichar fUnicode; 69}; 70} // namespace 71 72static void write_glyph(SkDynamicMemoryWStream* cmap, 73 bool multiByte, 74 SkGlyphID gid) { 75 if (multiByte) { 76 SkPDFUtils::WriteUInt16BE(cmap, gid); 77 } else { 78 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); 79 } 80} 81 82static void append_bfchar_section(const SkTDArray<BFChar>& bfchar, 83 bool multiByte, 84 SkDynamicMemoryWStream* cmap) { 85 // PDF spec defines that every bf* list can have at most 100 entries. 86 for (int i = 0; i < bfchar.count(); i += 100) { 87 int count = bfchar.count() - i; 88 count = SkMin32(count, 100); 89 cmap->writeDecAsText(count); 90 cmap->writeText(" beginbfchar\n"); 91 for (int j = 0; j < count; ++j) { 92 cmap->writeText("<"); 93 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); 94 cmap->writeText("> <"); 95 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); 96 cmap->writeText(">\n"); 97 } 98 cmap->writeText("endbfchar\n"); 99 } 100} 101 102static void append_bfrange_section(const SkTDArray<BFRange>& bfrange, 103 bool multiByte, 104 SkDynamicMemoryWStream* cmap) { 105 // PDF spec defines that every bf* list can have at most 100 entries. 106 for (int i = 0; i < bfrange.count(); i += 100) { 107 int count = bfrange.count() - i; 108 count = SkMin32(count, 100); 109 cmap->writeDecAsText(count); 110 cmap->writeText(" beginbfrange\n"); 111 for (int j = 0; j < count; ++j) { 112 cmap->writeText("<"); 113 write_glyph(cmap, multiByte, bfrange[i + j].fStart); 114 cmap->writeText("> <"); 115 write_glyph(cmap, multiByte, bfrange[i + j].fEnd); 116 cmap->writeText("> <"); 117 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); 118 cmap->writeText(">\n"); 119 } 120 cmap->writeText("endbfrange\n"); 121 } 122} 123 124// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe 125// Technote 5014. 126// The function is not static so we can test it in unit tests. 127// 128// Current implementation guarantees bfchar and bfrange entries do not overlap. 129// 130// Current implementation does not attempt aggresive optimizations against 131// following case because the specification is not clear. 132// 133// 4 beginbfchar 1 beginbfchar 134// <0003> <0013> <0020> <0014> 135// <0005> <0015> to endbfchar 136// <0007> <0017> 1 beginbfrange 137// <0020> <0014> <0003> <0007> <0013> 138// endbfchar endbfrange 139// 140// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may 141// overlap, but succeeding maps supersede preceding maps." 142// 143// In case of searching text in PDF, bfrange will have higher precedence so 144// typing char id 0x0014 in search box will get glyph id 0x0004 first. However, 145// the spec does not mention how will this kind of conflict being resolved. 146// 147// For the worst case (having 65536 continuous unicode and we use every other 148// one of them), the possible savings by aggressive optimization is 416KB 149// pre-compressed and does not provide enough motivation for implementation. 150void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode, 151 const SkBitSet* subset, 152 SkDynamicMemoryWStream* cmap, 153 bool multiByteGlyphs, 154 SkGlyphID firstGlyphID, 155 SkGlyphID lastGlyphID) { 156 if (glyphToUnicode.isEmpty()) { 157 return; 158 } 159 int glyphOffset = 0; 160 if (!multiByteGlyphs) { 161 glyphOffset = firstGlyphID - 1; 162 } 163 164 SkTDArray<BFChar> bfcharEntries; 165 SkTDArray<BFRange> bfrangeEntries; 166 167 BFRange currentRangeEntry = {0, 0, 0}; 168 bool rangeEmpty = true; 169 const int limit = 170 SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset; 171 172 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { 173 bool inSubset = i < limit && 174 (subset == nullptr || subset->has(i + glyphOffset)); 175 if (!rangeEmpty) { 176 // PDF spec requires bfrange not changing the higher byte, 177 // e.g. <1035> <10FF> <2222> is ok, but 178 // <1035> <1100> <2222> is no good 179 bool inRange = 180 i == currentRangeEntry.fEnd + 1 && 181 i >> 8 == currentRangeEntry.fStart >> 8 && 182 i < limit && 183 glyphToUnicode[i + glyphOffset] == 184 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; 185 if (!inSubset || !inRange) { 186 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { 187 bfrangeEntries.push(currentRangeEntry); 188 } else { 189 BFChar* entry = bfcharEntries.append(); 190 entry->fGlyphId = currentRangeEntry.fStart; 191 entry->fUnicode = currentRangeEntry.fUnicode; 192 } 193 rangeEmpty = true; 194 } 195 } 196 if (inSubset) { 197 currentRangeEntry.fEnd = i; 198 if (rangeEmpty) { 199 currentRangeEntry.fStart = i; 200 currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset]; 201 rangeEmpty = false; 202 } 203 } 204 } 205 206 // The spec requires all bfchar entries for a font must come before bfrange 207 // entries. 208 append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); 209 append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); 210} 211 212sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap( 213 const SkTDArray<SkUnichar>& glyphToUnicode, 214 const SkBitSet* subset, 215 bool multiByteGlyphs, 216 SkGlyphID firstGlyphID, 217 SkGlyphID lastGlyphID) { 218 SkDynamicMemoryWStream cmap; 219 append_tounicode_header(&cmap, multiByteGlyphs); 220 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, 221 firstGlyphID, lastGlyphID); 222 append_cmap_footer(&cmap); 223 return sk_make_sp<SkPDFStream>( 224 std::unique_ptr<SkStreamAsset>(cmap.detachAsStream())); 225} 226