CmapCoverage.cpp revision 818fbee83a72ca86f64527eb90b2f15ec9b28504
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// Determine coverage of font given its raw "cmap" OpenType table
18
19#define LOG_TAG "Minikin"
20
21#include <vector>
22using std::vector;
23
24#include <log/log.h>
25
26#include <minikin/SparseBitSet.h>
27#include <minikin/CmapCoverage.h>
28#include "MinikinInternal.h"
29
30namespace minikin {
31
32// These could perhaps be optimized to use __builtin_bswap16 and friends.
33static uint32_t readU16(const uint8_t* data, size_t offset) {
34    return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
35}
36
37static uint32_t readU32(const uint8_t* data, size_t offset) {
38    return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
39        ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
40}
41
42static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) {
43#ifdef VERBOSE_DEBUG
44    ALOGD("adding range %d-%d\n", start, end);
45#endif
46    if (coverage.empty() || coverage.back() < start) {
47        coverage.push_back(start);
48        coverage.push_back(end);
49    } else {
50        coverage.back() = end;
51    }
52}
53
54// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
55static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
56    const size_t kSegCountOffset = 6;
57    const size_t kEndCountOffset = 14;
58    const size_t kHeaderSize = 16;
59    const size_t kSegmentSize = 8;  // total size of array elements for one segment
60    if (kEndCountOffset > size) {
61        return false;
62    }
63    size_t segCount = readU16(data, kSegCountOffset) >> 1;
64    if (kHeaderSize + segCount * kSegmentSize > size) {
65        return false;
66    }
67    for (size_t i = 0; i < segCount; i++) {
68        uint32_t end = readU16(data, kEndCountOffset + 2 * i);
69        uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
70        if (end < start) {
71            // invalid segment range: size must be positive
72            android_errorWriteLog(0x534e4554, "26413177");
73            return false;
74        }
75        uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
76        if (rangeOffset == 0) {
77            uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
78            if (((end + delta) & 0xffff) > end - start) {
79                addRange(coverage, start, end + 1);
80            } else {
81                for (uint32_t j = start; j < end + 1; j++) {
82                    if (((j + delta) & 0xffff) != 0) {
83                        addRange(coverage, j, j + 1);
84                    }
85                }
86            }
87        } else {
88            for (uint32_t j = start; j < end + 1; j++) {
89                uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset +
90                    (i + j - start) * 2;
91                if (actualRangeOffset + 2 > size) {
92                    // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
93                    continue;
94                }
95                uint32_t glyphId = readU16(data, actualRangeOffset);
96                if (glyphId != 0) {
97                    addRange(coverage, j, j + 1);
98                }
99            }
100        }
101    }
102    return true;
103}
104
105// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
106static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
107    const size_t kNGroupsOffset = 12;
108    const size_t kFirstGroupOffset = 16;
109    const size_t kGroupSize = 12;
110    const size_t kStartCharCodeOffset = 0;
111    const size_t kEndCharCodeOffset = 4;
112    const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
113    // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
114    if (kFirstGroupOffset > size) {
115        return false;
116    }
117    uint32_t nGroups = readU32(data, kNGroupsOffset);
118    if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
119        android_errorWriteLog(0x534e4554, "25645298");
120        return false;
121    }
122    for (uint32_t i = 0; i < nGroups; i++) {
123        uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
124        uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
125        uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
126        if (end < start) {
127            // invalid group range: size must be positive
128            android_errorWriteLog(0x534e4554, "26413177");
129            return false;
130        }
131
132        // No need to read outside of Unicode code point range.
133        if (start > MAX_UNICODE_CODE_POINT) {
134            return true;
135        }
136        if (end > MAX_UNICODE_CODE_POINT) {
137            // file is inclusive, vector is exclusive
138            addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
139            return true;
140        }
141        addRange(coverage, start, end + 1);  // file is inclusive, vector is exclusive
142    }
143    return true;
144}
145
146// Lower value has higher priority. 0 for the highest priority table.
147// kLowestPriority for unsupported tables.
148// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
149constexpr uint8_t kLowestPriority = 255;
150uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
151    if (platformId == 3 && encodingId == 10) {
152        return 0;
153    }
154    if (platformId == 0 && encodingId == 6) {
155        return 1;
156    }
157    if (platformId == 0 && encodingId == 4) {
158        return 2;
159    }
160    if (platformId == 3 && encodingId == 1) {
161        return 3;
162    }
163    if (platformId == 0 && encodingId == 3) {
164        return 4;
165    }
166    if (platformId == 0 && encodingId == 2) {
167        return 5;
168    }
169    if (platformId == 0 && encodingId == 1) {
170        return 6;
171    }
172    if (platformId == 0 && encodingId == 0) {
173        return 7;
174    }
175    // Tables other than above are not supported.
176    return kLowestPriority;
177}
178
179SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
180        bool* has_cmap_format14_subtable) {
181    constexpr size_t kHeaderSize = 4;
182    constexpr size_t kNumTablesOffset = 2;
183    constexpr size_t kTableSize = 8;
184    constexpr size_t kPlatformIdOffset = 0;
185    constexpr size_t kEncodingIdOffset = 2;
186    constexpr size_t kOffsetOffset = 4;
187    constexpr size_t kFormatOffset = 0;
188    constexpr uint32_t kInvalidOffset = UINT32_MAX;
189
190    if (kHeaderSize > cmap_size) {
191        return SparseBitSet();
192    }
193    uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
194    if (kHeaderSize + numTables * kTableSize > cmap_size) {
195        return SparseBitSet();
196    }
197
198    uint32_t bestTableOffset = kInvalidOffset;
199    uint16_t bestTableFormat = 0;
200    uint8_t bestTablePriority = kLowestPriority;
201    *has_cmap_format14_subtable = false;
202    for (uint32_t i = 0; i < numTables; ++i) {
203        const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
204        const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
205        const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
206        const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
207
208        if (offset > cmap_size - 2) {
209            continue;  // Invalid table: not enough space to read.
210        }
211        const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
212
213        if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
214            if (!(*has_cmap_format14_subtable) && format == 14) {
215                *has_cmap_format14_subtable = true;
216            } else {
217                // Ignore the (0, 5) table if we have already seen another valid one or it's in a
218                // format we don't understand.
219            }
220        } else {
221            uint32_t length;
222            uint32_t language;
223
224            if (format == 4) {
225                constexpr size_t lengthOffset = 2;
226                constexpr size_t languageOffset = 4;
227                constexpr size_t minTableSize = languageOffset + 2;
228                if (offset > cmap_size - minTableSize) {
229                    continue;  // Invalid table: not enough space to read.
230                }
231                length = readU16(cmap_data, offset + lengthOffset);
232                language = readU16(cmap_data, offset + languageOffset);
233            } else if (format == 12) {
234                constexpr size_t lengthOffset = 4;
235                constexpr size_t languageOffset = 8;
236                constexpr size_t minTableSize = languageOffset + 4;
237                if (offset > cmap_size - minTableSize) {
238                    continue;  // Invalid table: not enough space to read.
239                }
240                length = readU32(cmap_data, offset + lengthOffset);
241                language = readU32(cmap_data, offset + languageOffset);
242            } else {
243                continue;
244            }
245
246            if (length > cmap_size - offset) {
247                continue;  // Invalid table: table length is larger than whole cmap data size.
248            }
249            if (language != 0) {
250                // Unsupported or invalid table: this is either a subtable for the Macintosh
251                // platform (which we don't support), or an invalid subtable since language field
252                // should be zero for non-Macintosh subtables.
253                continue;
254            }
255            const uint8_t priority = getTablePriority(platformId, encodingId);
256            if (priority < bestTablePriority) {
257                bestTableOffset = offset;
258                bestTablePriority = priority;
259                bestTableFormat = format;
260            }
261        }
262        if (*has_cmap_format14_subtable && bestTablePriority == 0 /* highest priority */) {
263            // Already found the highest priority table and variation sequences table. No need to
264            // look at remaining tables.
265            break;
266        }
267    }
268    if (bestTableOffset == kInvalidOffset) {
269        return SparseBitSet();
270    }
271    const uint8_t* tableData = cmap_data + bestTableOffset;
272    const size_t tableSize = cmap_size - bestTableOffset;
273    vector<uint32_t> coverageVec;
274    bool success;
275    if (bestTableFormat == 4) {
276        success = getCoverageFormat4(coverageVec, tableData, tableSize);
277    } else {
278        success = getCoverageFormat12(coverageVec, tableData, tableSize);
279    }
280    if (success) {
281        return SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
282    } else {
283        return SparseBitSet();
284    }
285
286}
287
288}  // namespace minikin
289