164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
51b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationinfo.cpp
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013aug05
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include <stdio.h>
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include <string.h>
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#include "collationdata.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatareader.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationinfo.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#include "uvectr32.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationInfo::printSizes(int32_t sizeWithHeader, const int32_t indexes[]) {
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t totalSize = indexes[CollationDataReader::IX_TOTAL_SIZE];
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(sizeWithHeader > totalSize) {
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  header size:                  %6ld\n", (long)(sizeWithHeader - totalSize));
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length = indexes[CollationDataReader::IX_INDEXES_LENGTH];
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("  indexes:          %6ld *4 = %6ld\n", (long)length, (long)length * 4);
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET);
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  reorder codes:    %6ld *4 = %6ld\n", (long)length / 4, (long)length);
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET);
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(length >= 256);
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  reorder table:                %6ld\n", (long)length);
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_TRIE_OFFSET);
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  trie size:                    %6ld\n", (long)length);
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_RESERVED8_OFFSET);
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  reserved (offset 8):          %6ld\n", (long)length);
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_CES_OFFSET);
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  CEs:              %6ld *8 = %6ld\n", (long)length / 8, (long)length);
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_RESERVED10_OFFSET);
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  reserved (offset 10):         %6ld\n", (long)length);
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_CE32S_OFFSET);
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  CE32s:            %6ld *4 = %6ld\n", (long)length / 4, (long)length);
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET);
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  rootElements:     %6ld *4 = %6ld\n", (long)length / 4, (long)length);
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_CONTEXTS_OFFSET);
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  contexts:         %6ld *2 = %6ld\n", (long)length / 2, (long)length);
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_UNSAFE_BWD_OFFSET);
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  unsafeBwdSet:     %6ld *2 = %6ld\n", (long)length / 2, (long)length);
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET);
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  fastLatin table:  %6ld *2 = %6ld\n", (long)length / 2, (long)length);
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_SCRIPTS_OFFSET);
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  scripts data:     %6ld *2 = %6ld\n", (long)length / 2, (long)length);
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET);
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(length >= 256);
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  compressibleBytes:            %6ld\n", (long)length);
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getDataLength(indexes, CollationDataReader::IX_RESERVED18_OFFSET);
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length != 0) {
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("  reserved (offset 18):         %6ld\n", (long)length);
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf(" collator binary total size:    %6ld\n", (long)sizeWithHeader);
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationInfo::getDataLength(const int32_t indexes[], int32_t startIndex) {
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return indexes[startIndex + 1] - indexes[startIndex];
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertvoid
1201b7d32f919554dda9c193b32188251337bc756f1Fredrik RoubertCollationInfo::printReorderRanges(const CollationData &data, const int32_t *codes, int32_t length) {
1211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UErrorCode errorCode = U_ZERO_ERROR;
1221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UVector32 ranges(errorCode);
1231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    data.makeReorderRanges(codes, length, ranges, errorCode);
1241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if(U_FAILURE(errorCode)) {
1251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        printf("  error building reorder ranges: %s\n", u_errorName(errorCode));
1261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return;
1271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
1281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
1291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t start = 0;
1301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    for(int32_t i = 0; i < ranges.size(); ++i) {
1311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t pair = ranges.elementAti(i);
1321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t limit = (pair >> 16) & 0xffff;
1331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int16_t offset = (int16_t)pair;
1341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(offset == 0) {
1351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            // [inclusive-start, exclusive-limit[
1361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            printf("          [%04x, %04x[\n", start, limit);
1371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        } else if(offset > 0) {
1381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            printf("  reorder [%04x, %04x[ by offset  %02x to [%04x, %04x[\n",
1391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    start, limit, offset,
1401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    start + (offset << 8), limit + (offset << 8));
1411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        } else /* offset < 0 */ {
1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            printf("  reorder [%04x, %04x[ by offset -%02x to [%04x, %04x[\n",
1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    start, limit, -offset,
1441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    start + (offset << 8), limit + (offset << 8));
1451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        start = limit;
1471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
1481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert}
1491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
153