1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// Determine coverage of font given its raw "cmap" OpenType table 18 19#define LOG_TAG "Minikin" 20 21#include <algorithm> 22#include <vector> 23using std::vector; 24 25#include <log/log.h> 26 27#include <minikin/SparseBitSet.h> 28#include <minikin/CmapCoverage.h> 29#include "MinikinInternal.h" 30 31#include <MinikinInternal.h> 32 33namespace minikin { 34 35constexpr uint32_t U32MAX = std::numeric_limits<uint32_t>::max(); 36 37// These could perhaps be optimized to use __builtin_bswap16 and friends. 38static uint32_t readU16(const uint8_t* data, size_t offset) { 39 return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); 40} 41 42static uint32_t readU24(const uint8_t* data, size_t offset) { 43 return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 | 44 ((uint32_t)data[offset + 2]); 45} 46 47static uint32_t readU32(const uint8_t* data, size_t offset) { 48 return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | 49 ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); 50} 51 52static void addRange(vector<uint32_t> &coverage, uint32_t start, uint32_t end) { 53#ifdef VERBOSE_DEBUG 54 ALOGD("adding range %d-%d\n", start, end); 55#endif 56 if (coverage.empty() || coverage.back() < start) { 57 coverage.push_back(start); 58 coverage.push_back(end); 59 } else { 60 coverage.back() = end; 61 } 62} 63 64struct Range { 65 uint32_t start; // inclusive 66 uint32_t end; // exclusive 67 68 static Range InvalidRange() { 69 return Range({ U32MAX, U32MAX }); 70 } 71 72 inline bool isValid() const { 73 return start != U32MAX && end != U32MAX; 74 } 75 76 // Returns true if left and right intersect. 77 inline static bool intersects(const Range& left, const Range& right) { 78 return left.isValid() && right.isValid() && 79 left.start < right.end && right.start < left.end; 80 } 81 82 // Returns merged range. This method assumes left and right are not invalid ranges and they have 83 // an intersection. 84 static Range merge(const Range& left, const Range& right) { 85 return Range({ std::min(left.start, right.start), std::max(left.end, right.end) }); 86 } 87}; 88 89// Returns Range from given ranges vector. Returns InvalidRange if i is out of range. 90static inline Range getRange(const std::vector<uint32_t>& r, size_t i) { 91 return i + 1 < r.size() ? Range({ r[i], r[i + 1] }) : Range::InvalidRange(); 92} 93 94// Merge two sorted lists of ranges into one sorted list. 95static std::vector<uint32_t> mergeRanges( 96 const std::vector<uint32_t>& lRanges, const std::vector<uint32_t>& rRanges) { 97 std::vector<uint32_t> out; 98 99 const size_t lsize = lRanges.size(); 100 const size_t rsize = rRanges.size(); 101 out.reserve(lsize + rsize); 102 size_t ri = 0; 103 size_t li = 0; 104 while (li < lsize || ri < rsize) { 105 Range left = getRange(lRanges, li); 106 Range right = getRange(rRanges, ri); 107 108 if (!right.isValid()) { 109 // No ranges left in rRanges. Just put all remaining ranges in lRanges. 110 do { 111 Range r = getRange(lRanges, li); 112 addRange(out, r.start, r.end); 113 li += 2; 114 } while (li < lsize); 115 break; 116 } else if (!left.isValid()) { 117 // No ranges left in lRanges. Just put all remaining ranges in rRanges. 118 do { 119 Range r = getRange(rRanges, ri); 120 addRange(out, r.start, r.end); 121 ri += 2; 122 } while (ri < rsize); 123 break; 124 } else if (!Range::intersects(left, right)) { 125 // No intersection. Add smaller range. 126 if (left.start < right.start) { 127 addRange(out, left.start, left.end); 128 li += 2; 129 } else { 130 addRange(out, right.start, right.end); 131 ri += 2; 132 } 133 } else { 134 Range merged = Range::merge(left, right); 135 li += 2; 136 ri += 2; 137 left = getRange(lRanges, li); 138 right = getRange(rRanges, ri); 139 while (Range::intersects(merged, left) || Range::intersects(merged, right)) { 140 if (Range::intersects(merged, left)) { 141 merged = Range::merge(merged, left); 142 li += 2; 143 left = getRange(lRanges, li); 144 } else { 145 merged = Range::merge(merged, right); 146 ri += 2; 147 right = getRange(rRanges, ri); 148 } 149 } 150 addRange(out, merged.start, merged.end); 151 } 152 } 153 154 return out; 155} 156 157// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector 158static bool getCoverageFormat4(vector<uint32_t>& coverage, const uint8_t* data, size_t size) { 159 const size_t kSegCountOffset = 6; 160 const size_t kEndCountOffset = 14; 161 const size_t kHeaderSize = 16; 162 const size_t kSegmentSize = 8; // total size of array elements for one segment 163 if (kEndCountOffset > size) { 164 return false; 165 } 166 size_t segCount = readU16(data, kSegCountOffset) >> 1; 167 if (kHeaderSize + segCount * kSegmentSize > size) { 168 return false; 169 } 170 for (size_t i = 0; i < segCount; i++) { 171 uint32_t end = readU16(data, kEndCountOffset + 2 * i); 172 uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i)); 173 if (end < start) { 174 // invalid segment range: size must be positive 175 android_errorWriteLog(0x534e4554, "26413177"); 176 return false; 177 } 178 uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); 179 if (rangeOffset == 0) { 180 uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); 181 if (((end + delta) & 0xffff) > end - start) { 182 addRange(coverage, start, end + 1); 183 } else { 184 for (uint32_t j = start; j < end + 1; j++) { 185 if (((j + delta) & 0xffff) != 0) { 186 addRange(coverage, j, j + 1); 187 } 188 } 189 } 190 } else { 191 for (uint32_t j = start; j < end + 1; j++) { 192 uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset + 193 (i + j - start) * 2; 194 if (actualRangeOffset + 2 > size) { 195 // invalid rangeOffset is considered a "warning" by OpenType Sanitizer 196 continue; 197 } 198 uint32_t glyphId = readU16(data, actualRangeOffset); 199 if (glyphId != 0) { 200 addRange(coverage, j, j + 1); 201 } 202 } 203 } 204 } 205 return true; 206} 207 208// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector 209static bool getCoverageFormat12(vector<uint32_t>& coverage, const uint8_t* data, size_t size) { 210 const size_t kNGroupsOffset = 12; 211 const size_t kFirstGroupOffset = 16; 212 const size_t kGroupSize = 12; 213 const size_t kStartCharCodeOffset = 0; 214 const size_t kEndCharCodeOffset = 4; 215 const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow 216 // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits. 217 if (kFirstGroupOffset > size) { 218 return false; 219 } 220 uint32_t nGroups = readU32(data, kNGroupsOffset); 221 if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) { 222 android_errorWriteLog(0x534e4554, "25645298"); 223 return false; 224 } 225 for (uint32_t i = 0; i < nGroups; i++) { 226 uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize; 227 uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset); 228 uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset); 229 if (end < start) { 230 // invalid group range: size must be positive 231 android_errorWriteLog(0x534e4554, "26413177"); 232 return false; 233 } 234 235 // No need to read outside of Unicode code point range. 236 if (start > MAX_UNICODE_CODE_POINT) { 237 return true; 238 } 239 if (end > MAX_UNICODE_CODE_POINT) { 240 // file is inclusive, vector is exclusive 241 addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1); 242 return true; 243 } 244 addRange(coverage, start, end + 1); // file is inclusive, vector is exclusive 245 } 246 return true; 247} 248 249// Lower value has higher priority. 0 for the highest priority table. 250// kLowestPriority for unsupported tables. 251// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it. 252constexpr uint8_t kLowestPriority = 255; 253uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) { 254 if (platformId == 3 && encodingId == 10) { 255 return 0; 256 } 257 if (platformId == 0 && encodingId == 6) { 258 return 1; 259 } 260 if (platformId == 0 && encodingId == 4) { 261 return 2; 262 } 263 if (platformId == 3 && encodingId == 1) { 264 return 3; 265 } 266 if (platformId == 0 && encodingId == 3) { 267 return 4; 268 } 269 if (platformId == 0 && encodingId == 2) { 270 return 5; 271 } 272 if (platformId == 0 && encodingId == 1) { 273 return 6; 274 } 275 if (platformId == 0 && encodingId == 0) { 276 return 7; 277 } 278 // Tables other than above are not supported. 279 return kLowestPriority; 280} 281 282// Get merged coverage information from default UVS Table and non-default UVS Table. Note that this 283// function assumes code points in both default UVS Table and non-default UVS table are stored in 284// ascending order. This is required by the standard. 285static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size, 286 uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset, 287 const SparseBitSet& baseCoverage) { 288 // Need to merge supported ranges from default UVS Table and non-default UVS Table. 289 // First, collect all supported code points from non default UVS table. 290 std::vector<uint32_t> rangesFromNonDefaultUVSTable; 291 if (nonDefaultUVSTableOffset != 0) { 292 constexpr size_t kHeaderSize = 4; 293 constexpr size_t kUVSMappingRecordSize = 5; 294 295 const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset; 296 // This subtraction doesn't underflow since the caller already checked 297 // size > nonDefaultUVSTableOffset. 298 const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset; 299 if (nonDefaultUVSTableRemaining < kHeaderSize) { 300 return false; 301 } 302 const uint32_t numRecords = readU32(nonDefaultUVSTable, 0); 303 if (numRecords * kUVSMappingRecordSize + kHeaderSize > nonDefaultUVSTableRemaining) { 304 return false; 305 } 306 for (uint32_t i = 0; i < numRecords; ++i) { 307 const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i; 308 const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset); 309 addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1); 310 } 311 } 312 313 // Then, construct range from default UVS Table with merging code points from non default UVS 314 // table. 315 std::vector<uint32_t> rangesFromDefaultUVSTable; 316 if (defaultUVSTableOffset != 0) { 317 constexpr size_t kHeaderSize = 4; 318 constexpr size_t kUnicodeRangeRecordSize = 4; 319 320 const uint8_t* defaultUVSTable = data + defaultUVSTableOffset; 321 // This subtraction doesn't underflow since the caller already checked 322 // size > defaultUVSTableOffset. 323 const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset; 324 325 if (defaultUVSTableRemaining < kHeaderSize) { 326 return false; 327 } 328 const uint32_t numRecords = readU32(defaultUVSTable, 0); 329 if (numRecords * kUnicodeRangeRecordSize + kHeaderSize > defaultUVSTableRemaining) { 330 return false; 331 } 332 333 for (uint32_t i = 0; i < numRecords; ++i) { 334 const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i; 335 const uint32_t startCp = readU24(defaultUVSTable, recordOffset); 336 const uint8_t rangeLength = defaultUVSTable[recordOffset + 3]; 337 338 // Then insert range from default UVS Table, but exclude if the base codepoint is not 339 // supported. 340 for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) { 341 // All codepoints in default UVS table should go to the glyphs of the codepoints 342 // without variation selectors. We need to check the default glyph availability and 343 // exclude the codepoint if it is not supported by defualt cmap table. 344 if (baseCoverage.get(cp)) { 345 addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */); 346 } 347 } 348 } 349 } 350 *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable); 351 return true; 352} 353 354static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out, 355 const uint8_t* data, size_t size, const SparseBitSet& baseCoverage) { 356 constexpr size_t kHeaderSize = 10; 357 constexpr size_t kRecordSize = 11; 358 constexpr size_t kLengthOffset = 2; 359 constexpr size_t kNumRecordOffset = 6; 360 361 out->clear(); 362 if (size < kHeaderSize) { 363 return; 364 } 365 366 const uint32_t length = readU32(data, kLengthOffset); 367 if (size < length) { 368 return; 369 } 370 371 uint32_t numRecords = readU32(data, kNumRecordOffset); 372 if (numRecords == 0 || kHeaderSize + kRecordSize * numRecords > length) { 373 return; 374 } 375 376 for (uint32_t i = 0; i < numRecords; ++i) { 377 // Insert from the largest code points since it determines the size of the output vector. 378 const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1); 379 const uint32_t vsCodePoint = readU24(data, recordHeadOffset); 380 const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3); 381 const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7); 382 if (defaultUVSOffset > length || nonDefaultUVSOffset > length) { 383 continue; 384 } 385 386 const uint16_t vsIndex = getVsIndex(vsCodePoint); 387 if (vsIndex == INVALID_VS_INDEX) { 388 continue; 389 } 390 std::vector<uint32_t> ranges; 391 if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset, 392 baseCoverage)) { 393 continue; 394 } 395 if (out->size() < vsIndex + 1) { 396 out->resize(vsIndex + 1); 397 } 398 (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1)); 399 } 400 401 out->shrink_to_fit(); 402} 403 404SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size, 405 std::vector<std::unique_ptr<SparseBitSet>>* out) { 406 constexpr size_t kHeaderSize = 4; 407 constexpr size_t kNumTablesOffset = 2; 408 constexpr size_t kTableSize = 8; 409 constexpr size_t kPlatformIdOffset = 0; 410 constexpr size_t kEncodingIdOffset = 2; 411 constexpr size_t kOffsetOffset = 4; 412 constexpr size_t kFormatOffset = 0; 413 constexpr uint32_t kNoTable = UINT32_MAX; 414 415 if (kHeaderSize > cmap_size) { 416 return SparseBitSet(); 417 } 418 uint32_t numTables = readU16(cmap_data, kNumTablesOffset); 419 if (kHeaderSize + numTables * kTableSize > cmap_size) { 420 return SparseBitSet(); 421 } 422 423 uint32_t bestTableOffset = kNoTable; 424 uint16_t bestTableFormat = 0; 425 uint8_t bestTablePriority = kLowestPriority; 426 uint32_t vsTableOffset = kNoTable; 427 for (uint32_t i = 0; i < numTables; ++i) { 428 const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize; 429 const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset); 430 const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset); 431 const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset); 432 433 if (offset > cmap_size - 2) { 434 continue; // Invalid table: not enough space to read. 435 } 436 const uint16_t format = readU16(cmap_data, offset + kFormatOffset); 437 438 if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) { 439 if (vsTableOffset == kNoTable && format == 14) { 440 vsTableOffset = offset; 441 } else { 442 // Ignore the (0, 5) table if we have already seen another valid one or it's in a 443 // format we don't understand. 444 } 445 } else { 446 uint32_t length; 447 uint32_t language; 448 449 if (format == 4) { 450 constexpr size_t lengthOffset = 2; 451 constexpr size_t languageOffset = 4; 452 constexpr size_t minTableSize = languageOffset + 2; 453 if (offset > cmap_size - minTableSize) { 454 continue; // Invalid table: not enough space to read. 455 } 456 length = readU16(cmap_data, offset + lengthOffset); 457 language = readU16(cmap_data, offset + languageOffset); 458 } else if (format == 12) { 459 constexpr size_t lengthOffset = 4; 460 constexpr size_t languageOffset = 8; 461 constexpr size_t minTableSize = languageOffset + 4; 462 if (offset > cmap_size - minTableSize) { 463 continue; // Invalid table: not enough space to read. 464 } 465 length = readU32(cmap_data, offset + lengthOffset); 466 language = readU32(cmap_data, offset + languageOffset); 467 } else { 468 continue; 469 } 470 471 if (length > cmap_size - offset) { 472 continue; // Invalid table: table length is larger than whole cmap data size. 473 } 474 if (language != 0) { 475 // Unsupported or invalid table: this is either a subtable for the Macintosh 476 // platform (which we don't support), or an invalid subtable since language field 477 // should be zero for non-Macintosh subtables. 478 continue; 479 } 480 const uint8_t priority = getTablePriority(platformId, encodingId); 481 if (priority < bestTablePriority) { 482 bestTableOffset = offset; 483 bestTablePriority = priority; 484 bestTableFormat = format; 485 } 486 } 487 if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) { 488 // Already found the highest priority table and variation sequences table. No need to 489 // look at remaining tables. 490 break; 491 } 492 } 493 494 SparseBitSet coverage; 495 496 if (bestTableOffset != kNoTable) { 497 const uint8_t* tableData = cmap_data + bestTableOffset; 498 const size_t tableSize = cmap_size - bestTableOffset; 499 bool success; 500 vector<uint32_t> coverageVec; 501 if (bestTableFormat == 4) { 502 success = getCoverageFormat4(coverageVec, tableData, tableSize); 503 } else { 504 success = getCoverageFormat12(coverageVec, tableData, tableSize); 505 } 506 507 if (success) { 508 coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1); 509 } 510 } 511 512 if (vsTableOffset != kNoTable) { 513 const uint8_t* tableData = cmap_data + vsTableOffset; 514 const size_t tableSize = cmap_size - vsTableOffset; 515 getCoverageFormat14(out, tableData, tableSize, coverage); 516 } 517 return coverage; 518} 519 520} // namespace minikin 521