1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Determine coverage of font given its raw "cmap" OpenType table
18
19#define LOG_TAG "Minikin"
20
21#include <algorithm>
22#include <vector>
23using std::vector;
24
25#include <log/log.h>
26
27#include <minikin/SparseBitSet.h>
28#include <minikin/CmapCoverage.h>
29#include "MinikinInternal.h"
30
31#include <MinikinInternal.h>
32
33namespace minikin {
34
35constexpr uint32_t U32MAX = std::numeric_limits<uint32_t>::max();
36
37// These could perhaps be optimized to use __builtin_bswap16 and friends.
38static uint32_t readU16(const uint8_t* data, size_t offset) {
39    return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
40}
41
42static uint32_t readU24(const uint8_t* data, size_t offset) {
43    return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
44        ((uint32_t)data[offset + 2]);
45}
46
47static uint32_t readU32(const uint8_t* data, size_t offset) {
48    return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
49        ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
50}
51
52static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
53#ifdef VERBOSE_DEBUG
54    ALOGD("adding range %d-%d\n", start, end);
55#endif
56    if (coverage.empty() || coverage.back() < start) {
57        coverage.push_back(start);
58        coverage.push_back(end);
59    } else {
60        coverage.back() = end;
61    }
62}
63
64struct Range {
65    uint32_t start;  // inclusive
66    uint32_t end;  // exclusive
67
68    static Range InvalidRange() {
69        return Range({ U32MAX, U32MAX });
70    }
71
72    inline bool isValid() const {
73        return start != U32MAX && end != U32MAX;
74    }
75
76    // Returns true if left and right intersect.
77    inline static bool intersects(const Range& left, const Range& right) {
78        return left.isValid() && right.isValid() &&
79                left.start < right.end && right.start < left.end;
80    }
81
82    // Returns merged range. This method assumes left and right are not invalid ranges and they have
83    // an intersection.
84    static Range merge(const Range& left, const Range& right) {
85        return Range({ std::min(left.start, right.start), std::max(left.end, right.end) });
86    }
87};
88
89// Returns Range from given ranges vector. Returns InvalidRange if i is out of range.
90static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
91    return i + 1 < r.size() ? Range({ r[i], r[i + 1] }) : Range::InvalidRange();
92}
93
94// Merge two sorted lists of ranges into one sorted list.
95static std::vector<uint32_t> mergeRanges(
96        const std::vector<uint32_t>& lRanges, const std::vector<uint32_t>& rRanges) {
97    std::vector<uint32_t> out;
98
99    const size_t lsize = lRanges.size();
100    const size_t rsize = rRanges.size();
101    out.reserve(lsize + rsize);
102    size_t ri = 0;
103    size_t li = 0;
104    while (li < lsize || ri < rsize) {
105        Range left = getRange(lRanges, li);
106        Range right = getRange(rRanges, ri);
107
108        if (!right.isValid()) {
109            // No ranges left in rRanges. Just put all remaining ranges in lRanges.
110            do {
111                Range r = getRange(lRanges, li);
112                addRange(out, r.start, r.end);
113                li += 2;
114            } while (li < lsize);
115            break;
116        } else if (!left.isValid()) {
117            // No ranges left in lRanges. Just put all remaining ranges in rRanges.
118            do {
119                Range r = getRange(rRanges, ri);
120                addRange(out, r.start, r.end);
121                ri += 2;
122            } while (ri < rsize);
123            break;
124        } else if (!Range::intersects(left, right)) {
125            // No intersection. Add smaller range.
126            if (left.start < right.start) {
127                addRange(out, left.start, left.end);
128                li += 2;
129            } else {
130                addRange(out, right.start, right.end);
131                ri += 2;
132            }
133        } else {
134            Range merged = Range::merge(left, right);
135            li += 2;
136            ri += 2;
137            left = getRange(lRanges, li);
138            right = getRange(rRanges, ri);
139            while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
140                if (Range::intersects(merged, left)) {
141                    merged = Range::merge(merged, left);
142                    li += 2;
143                    left = getRange(lRanges, li);
144                } else {
145                    merged = Range::merge(merged, right);
146                    ri += 2;
147                    right = getRange(rRanges, ri);
148                }
149            }
150            addRange(out, merged.start, merged.end);
151        }
152    }
153
154    return out;
155}
156
157// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
158static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
159    const size_t kSegCountOffset = 6;
160    const size_t kEndCountOffset = 14;
161    const size_t kHeaderSize = 16;
162    const size_t kSegmentSize = 8;  // total size of array elements for one segment
163    if (kEndCountOffset > size) {
164        return false;
165    }
166    size_t segCount = readU16(data, kSegCountOffset) >> 1;
167    if (kHeaderSize + segCount * kSegmentSize > size) {
168        return false;
169    }
170    for (size_t i = 0; i < segCount; i++) {
171        uint32_t end = readU16(data, kEndCountOffset + 2 * i);
172        uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
173        if (end < start) {
174            // invalid segment range: size must be positive
175            android_errorWriteLog(0x534e4554, "26413177");
176            return false;
177        }
178        uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
179        if (rangeOffset == 0) {
180            uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
181            if (((end + delta) & 0xffff) > end - start) {
182                addRange(coverage, start, end + 1);
183            } else {
184                for (uint32_t j = start; j < end + 1; j++) {
185                    if (((j + delta) & 0xffff) != 0) {
186                        addRange(coverage, j, j + 1);
187                    }
188                }
189            }
190        } else {
191            for (uint32_t j = start; j < end + 1; j++) {
192                uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset +
193                    (i + j - start) * 2;
194                if (actualRangeOffset + 2 > size) {
195                    // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
196                    continue;
197                }
198                uint32_t glyphId = readU16(data, actualRangeOffset);
199                if (glyphId != 0) {
200                    addRange(coverage, j, j + 1);
201                }
202            }
203        }
204    }
205    return true;
206}
207
208// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
209static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
210    const size_t kNGroupsOffset = 12;
211    const size_t kFirstGroupOffset = 16;
212    const size_t kGroupSize = 12;
213    const size_t kStartCharCodeOffset = 0;
214    const size_t kEndCharCodeOffset = 4;
215    const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
216    // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
217    if (kFirstGroupOffset > size) {
218        return false;
219    }
220    uint32_t nGroups = readU32(data, kNGroupsOffset);
221    if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
222        android_errorWriteLog(0x534e4554, "25645298");
223        return false;
224    }
225    for (uint32_t i = 0; i < nGroups; i++) {
226        uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
227        uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
228        uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
229        if (end < start) {
230            // invalid group range: size must be positive
231            android_errorWriteLog(0x534e4554, "26413177");
232            return false;
233        }
234
235        // No need to read outside of Unicode code point range.
236        if (start > MAX_UNICODE_CODE_POINT) {
237            return true;
238        }
239        if (end > MAX_UNICODE_CODE_POINT) {
240            // file is inclusive, vector is exclusive
241            addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
242            return true;
243        }
244        addRange(coverage, start, end + 1);  // file is inclusive, vector is exclusive
245    }
246    return true;
247}
248
249// Lower value has higher priority. 0 for the highest priority table.
250// kLowestPriority for unsupported tables.
251// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
252constexpr uint8_t kLowestPriority = 255;
253uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
254    if (platformId == 3 && encodingId == 10) {
255        return 0;
256    }
257    if (platformId == 0 && encodingId == 6) {
258        return 1;
259    }
260    if (platformId == 0 && encodingId == 4) {
261        return 2;
262    }
263    if (platformId == 3 && encodingId == 1) {
264        return 3;
265    }
266    if (platformId == 0 && encodingId == 3) {
267        return 4;
268    }
269    if (platformId == 0 && encodingId == 2) {
270        return 5;
271    }
272    if (platformId == 0 && encodingId == 1) {
273        return 6;
274    }
275    if (platformId == 0 && encodingId == 0) {
276        return 7;
277    }
278    // Tables other than above are not supported.
279    return kLowestPriority;
280}
281
282// Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
283// function assumes code points in both default UVS Table and non-default UVS table are stored in
284// ascending order. This is required by the standard.
285static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
286        uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
287        const SparseBitSet& baseCoverage) {
288    // Need to merge supported ranges from default UVS Table and non-default UVS Table.
289    // First, collect all supported code points from non default UVS table.
290    std::vector<uint32_t> rangesFromNonDefaultUVSTable;
291    if (nonDefaultUVSTableOffset != 0) {
292        constexpr size_t kHeaderSize = 4;
293        constexpr size_t kUVSMappingRecordSize = 5;
294
295        const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
296        // This subtraction doesn't underflow since the caller already checked
297        // size > nonDefaultUVSTableOffset.
298        const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
299        if (nonDefaultUVSTableRemaining < kHeaderSize) {
300            return false;
301        }
302        const uint32_t numRecords = readU32(nonDefaultUVSTable, 0);
303        if (numRecords * kUVSMappingRecordSize + kHeaderSize > nonDefaultUVSTableRemaining) {
304            return false;
305        }
306        for (uint32_t i = 0; i < numRecords; ++i) {
307            const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
308            const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
309            addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1);
310        }
311    }
312
313    // Then, construct range from default UVS Table with merging code points from non default UVS
314    // table.
315    std::vector<uint32_t> rangesFromDefaultUVSTable;
316    if (defaultUVSTableOffset != 0) {
317        constexpr size_t kHeaderSize = 4;
318        constexpr size_t kUnicodeRangeRecordSize = 4;
319
320        const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
321        // This subtraction doesn't underflow since the caller already checked
322        // size > defaultUVSTableOffset.
323        const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
324
325        if (defaultUVSTableRemaining < kHeaderSize) {
326            return false;
327        }
328        const uint32_t numRecords = readU32(defaultUVSTable, 0);
329        if (numRecords * kUnicodeRangeRecordSize + kHeaderSize > defaultUVSTableRemaining) {
330            return false;
331        }
332
333        for (uint32_t i = 0; i < numRecords; ++i) {
334            const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
335            const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
336            const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
337
338            // Then insert range from default UVS Table, but exclude if the base codepoint is not
339            // supported.
340            for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
341                // All codepoints in default UVS table should go to the glyphs of the codepoints
342                // without variation selectors. We need to check the default glyph availability and
343                // exclude the codepoint if it is not supported by defualt cmap table.
344                if (baseCoverage.get(cp)) {
345                    addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */);
346                }
347            }
348        }
349    }
350    *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
351    return true;
352}
353
354static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out,
355        const uint8_t* data, size_t size, const SparseBitSet& baseCoverage) {
356    constexpr size_t kHeaderSize = 10;
357    constexpr size_t kRecordSize = 11;
358    constexpr size_t kLengthOffset = 2;
359    constexpr size_t kNumRecordOffset = 6;
360
361    out->clear();
362    if (size < kHeaderSize) {
363        return;
364    }
365
366    const uint32_t length = readU32(data, kLengthOffset);
367    if (size < length) {
368        return;
369    }
370
371    uint32_t numRecords = readU32(data, kNumRecordOffset);
372    if (numRecords == 0 || kHeaderSize + kRecordSize * numRecords > length) {
373        return;
374    }
375
376    for (uint32_t i = 0; i < numRecords; ++i) {
377        // Insert from the largest code points since it determines the size of the output vector.
378        const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
379        const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
380        const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
381        const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
382        if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
383            continue;
384        }
385
386        const uint16_t vsIndex = getVsIndex(vsCodePoint);
387        if (vsIndex == INVALID_VS_INDEX) {
388            continue;
389        }
390        std::vector<uint32_t> ranges;
391        if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
392                baseCoverage)) {
393            continue;
394        }
395        if (out->size() < vsIndex + 1) {
396            out->resize(vsIndex + 1);
397        }
398        (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1));
399    }
400
401    out->shrink_to_fit();
402}
403
404SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
405        std::vector<std::unique_ptr<SparseBitSet>>* out) {
406    constexpr size_t kHeaderSize = 4;
407    constexpr size_t kNumTablesOffset = 2;
408    constexpr size_t kTableSize = 8;
409    constexpr size_t kPlatformIdOffset = 0;
410    constexpr size_t kEncodingIdOffset = 2;
411    constexpr size_t kOffsetOffset = 4;
412    constexpr size_t kFormatOffset = 0;
413    constexpr uint32_t kNoTable = UINT32_MAX;
414
415    if (kHeaderSize > cmap_size) {
416        return SparseBitSet();
417    }
418    uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
419    if (kHeaderSize + numTables * kTableSize > cmap_size) {
420        return SparseBitSet();
421    }
422
423    uint32_t bestTableOffset = kNoTable;
424    uint16_t bestTableFormat = 0;
425    uint8_t bestTablePriority = kLowestPriority;
426    uint32_t vsTableOffset = kNoTable;
427    for (uint32_t i = 0; i < numTables; ++i) {
428        const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
429        const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
430        const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
431        const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
432
433        if (offset > cmap_size - 2) {
434            continue;  // Invalid table: not enough space to read.
435        }
436        const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
437
438        if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
439            if (vsTableOffset == kNoTable && format == 14) {
440                vsTableOffset = offset;
441            } else {
442                // Ignore the (0, 5) table if we have already seen another valid one or it's in a
443                // format we don't understand.
444            }
445        } else {
446            uint32_t length;
447            uint32_t language;
448
449            if (format == 4) {
450                constexpr size_t lengthOffset = 2;
451                constexpr size_t languageOffset = 4;
452                constexpr size_t minTableSize = languageOffset + 2;
453                if (offset > cmap_size - minTableSize) {
454                    continue;  // Invalid table: not enough space to read.
455                }
456                length = readU16(cmap_data, offset + lengthOffset);
457                language = readU16(cmap_data, offset + languageOffset);
458            } else if (format == 12) {
459                constexpr size_t lengthOffset = 4;
460                constexpr size_t languageOffset = 8;
461                constexpr size_t minTableSize = languageOffset + 4;
462                if (offset > cmap_size - minTableSize) {
463                    continue;  // Invalid table: not enough space to read.
464                }
465                length = readU32(cmap_data, offset + lengthOffset);
466                language = readU32(cmap_data, offset + languageOffset);
467            } else {
468                continue;
469            }
470
471            if (length > cmap_size - offset) {
472                continue;  // Invalid table: table length is larger than whole cmap data size.
473            }
474            if (language != 0) {
475                // Unsupported or invalid table: this is either a subtable for the Macintosh
476                // platform (which we don't support), or an invalid subtable since language field
477                // should be zero for non-Macintosh subtables.
478                continue;
479            }
480            const uint8_t priority = getTablePriority(platformId, encodingId);
481            if (priority < bestTablePriority) {
482                bestTableOffset = offset;
483                bestTablePriority = priority;
484                bestTableFormat = format;
485            }
486        }
487        if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
488            // Already found the highest priority table and variation sequences table. No need to
489            // look at remaining tables.
490            break;
491        }
492    }
493
494    SparseBitSet coverage;
495
496    if (bestTableOffset != kNoTable) {
497        const uint8_t* tableData = cmap_data + bestTableOffset;
498        const size_t tableSize = cmap_size - bestTableOffset;
499        bool success;
500        vector<uint32_t> coverageVec;
501        if (bestTableFormat == 4) {
502            success = getCoverageFormat4(coverageVec, tableData, tableSize);
503        } else {
504            success = getCoverageFormat12(coverageVec, tableData, tableSize);
505        }
506
507        if (success) {
508            coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
509        }
510    }
511
512    if (vsTableOffset != kNoTable) {
513        const uint8_t* tableData = cmap_data + vsTableOffset;
514        const size_t tableSize = cmap_size - vsTableOffset;
515        getCoverageFormat14(out, tableData, tableSize, coverage);
516    }
517    return coverage;
518}
519
520}  // namespace minikin
521