1/*
2 * Copyright 2011 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkPDFMakeToUnicodeCmap.h"
9#include "SkPDFUtils.h"
10#include "SkUtils.h"
11
12static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
13                                    bool multibyte) {
14    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
15    // It's there to prevent old version Adobe Readers from malfunctioning.
16    const char* kHeader =
17        "/CIDInit /ProcSet findresource begin\n"
18        "12 dict begin\n"
19        "begincmap\n";
20    cmap->writeText(kHeader);
21
22    // The /CIDSystemInfo must be consistent to the one in
23    // SkPDFFont::populateCIDFont().
24    // We can not pass over the system info object here because the format is
25    // different. This is not a reference object.
26    const char* kSysInfo =
27        "/CIDSystemInfo\n"
28        "<<  /Registry (Adobe)\n"
29        "/Ordering (UCS)\n"
30        "/Supplement 0\n"
31        ">> def\n";
32    cmap->writeText(kSysInfo);
33
34    // The CMapName must be consistent to /CIDSystemInfo above.
35    // /CMapType 2 means ToUnicode.
36    // Codespace range just tells the PDF processor the valid range.
37    const char* kTypeInfoHeader =
38        "/CMapName /Adobe-Identity-UCS def\n"
39        "/CMapType 2 def\n"
40        "1 begincodespacerange\n";
41    cmap->writeText(kTypeInfoHeader);
42    if (multibyte) {
43        cmap->writeText("<0000> <FFFF>\n");
44    } else {
45        cmap->writeText("<00> <FF>\n");
46    }
47    cmap->writeText("endcodespacerange\n");
48}
49
50static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
51    const char kFooter[] =
52        "endcmap\n"
53        "CMapName currentdict /CMap defineresource pop\n"
54        "end\n"
55        "end";
56    cmap->writeText(kFooter);
57}
58
59namespace {
60struct BFChar {
61    SkGlyphID fGlyphId;
62    SkUnichar fUnicode;
63};
64
65struct BFRange {
66    SkGlyphID fStart;
67    SkGlyphID fEnd;
68    SkUnichar fUnicode;
69};
70}  // namespace
71
72static void write_glyph(SkDynamicMemoryWStream* cmap,
73                        bool multiByte,
74                        SkGlyphID gid) {
75    if (multiByte) {
76        SkPDFUtils::WriteUInt16BE(cmap, gid);
77    } else {
78        SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
79    }
80}
81
82static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
83                                  bool multiByte,
84                                  SkDynamicMemoryWStream* cmap) {
85    // PDF spec defines that every bf* list can have at most 100 entries.
86    for (int i = 0; i < bfchar.count(); i += 100) {
87        int count = bfchar.count() - i;
88        count = SkMin32(count, 100);
89        cmap->writeDecAsText(count);
90        cmap->writeText(" beginbfchar\n");
91        for (int j = 0; j < count; ++j) {
92            cmap->writeText("<");
93            write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
94            cmap->writeText("> <");
95            SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
96            cmap->writeText(">\n");
97        }
98        cmap->writeText("endbfchar\n");
99    }
100}
101
102static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
103                                   bool multiByte,
104                                   SkDynamicMemoryWStream* cmap) {
105    // PDF spec defines that every bf* list can have at most 100 entries.
106    for (int i = 0; i < bfrange.count(); i += 100) {
107        int count = bfrange.count() - i;
108        count = SkMin32(count, 100);
109        cmap->writeDecAsText(count);
110        cmap->writeText(" beginbfrange\n");
111        for (int j = 0; j < count; ++j) {
112            cmap->writeText("<");
113            write_glyph(cmap, multiByte, bfrange[i + j].fStart);
114            cmap->writeText("> <");
115            write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
116            cmap->writeText("> <");
117            SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
118            cmap->writeText(">\n");
119        }
120        cmap->writeText("endbfrange\n");
121    }
122}
123
124// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
125// Technote 5014.
126// The function is not static so we can test it in unit tests.
127//
128// Current implementation guarantees bfchar and bfrange entries do not overlap.
129//
130// Current implementation does not attempt aggresive optimizations against
131// following case because the specification is not clear.
132//
133// 4 beginbfchar          1 beginbfchar
134// <0003> <0013>          <0020> <0014>
135// <0005> <0015>    to    endbfchar
136// <0007> <0017>          1 beginbfrange
137// <0020> <0014>          <0003> <0007> <0013>
138// endbfchar              endbfrange
139//
140// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
141// overlap, but succeeding maps supersede preceding maps."
142//
143// In case of searching text in PDF, bfrange will have higher precedence so
144// typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
145// the spec does not mention how will this kind of conflict being resolved.
146//
147// For the worst case (having 65536 continuous unicode and we use every other
148// one of them), the possible savings by aggressive optimization is 416KB
149// pre-compressed and does not provide enough motivation for implementation.
150void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
151                             const SkBitSet* subset,
152                             SkDynamicMemoryWStream* cmap,
153                             bool multiByteGlyphs,
154                             SkGlyphID firstGlyphID,
155                             SkGlyphID lastGlyphID) {
156    if (glyphToUnicode.isEmpty()) {
157        return;
158    }
159    int glyphOffset = 0;
160    if (!multiByteGlyphs) {
161        glyphOffset = firstGlyphID - 1;
162    }
163
164    SkTDArray<BFChar> bfcharEntries;
165    SkTDArray<BFRange> bfrangeEntries;
166
167    BFRange currentRangeEntry = {0, 0, 0};
168    bool rangeEmpty = true;
169    const int limit =
170            SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
171
172    for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
173        bool inSubset = i < limit &&
174                        (subset == nullptr || subset->has(i + glyphOffset));
175        if (!rangeEmpty) {
176            // PDF spec requires bfrange not changing the higher byte,
177            // e.g. <1035> <10FF> <2222> is ok, but
178            //      <1035> <1100> <2222> is no good
179            bool inRange =
180                i == currentRangeEntry.fEnd + 1 &&
181                i >> 8 == currentRangeEntry.fStart >> 8 &&
182                i < limit &&
183                glyphToUnicode[i + glyphOffset] ==
184                    currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
185            if (!inSubset || !inRange) {
186                if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
187                    bfrangeEntries.push(currentRangeEntry);
188                } else {
189                    BFChar* entry = bfcharEntries.append();
190                    entry->fGlyphId = currentRangeEntry.fStart;
191                    entry->fUnicode = currentRangeEntry.fUnicode;
192                }
193                rangeEmpty = true;
194            }
195        }
196        if (inSubset) {
197            currentRangeEntry.fEnd = i;
198            if (rangeEmpty) {
199              currentRangeEntry.fStart = i;
200              currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
201              rangeEmpty = false;
202            }
203        }
204    }
205
206    // The spec requires all bfchar entries for a font must come before bfrange
207    // entries.
208    append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
209    append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
210}
211
212sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
213        const SkTDArray<SkUnichar>& glyphToUnicode,
214        const SkBitSet* subset,
215        bool multiByteGlyphs,
216        SkGlyphID firstGlyphID,
217        SkGlyphID lastGlyphID) {
218    SkDynamicMemoryWStream cmap;
219    append_tounicode_header(&cmap, multiByteGlyphs);
220    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
221                            firstGlyphID, lastGlyphID);
222    append_cmap_footer(&cmap);
223    return sk_make_sp<SkPDFStream>(
224            std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
225}
226