164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2010, International Business Machines
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  denseranges.cpp
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010sep25
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Helper code for finding a small number of dense ranges.
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "denseranges.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Definitions in the anonymous namespace are invisible outside this file.
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehonamespace {
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Collect up to 15 range gaps and sort them by ascending gap size.
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass LargestGaps {
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void add(int32_t gapStart, int64_t gapLength) {
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t i=length;
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        while(i>0 && gapLength>gapLengths[i-1]) {
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            --i;
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(i<maxLength) {
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // The new gap is now one of the maxLength largest.
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Insert the new gap, moving up smaller ones of the previous
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // length largest.
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t j= length<maxLength ? length++ : maxLength-1;
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            while(j>i) {
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                gapStarts[j]=gapStarts[j-1];
44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                gapLengths[j]=gapLengths[j-1];
45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --j;
46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            gapStarts[i]=gapStart;
48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            gapLengths[i]=gapLength;
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void truncate(int32_t newLength) {
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(newLength<length) {
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=newLength;
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t count() const { return length; }
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t firstAfter(int32_t value) const {
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(length==0) {
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return -1;
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t minValue=0;
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t minIndex=-1;
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(int32_t i=0; i<length; ++i) {
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                minValue=gapStarts[i];
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                minIndex=i;
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return minIndex;
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kCapacity=15;
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t maxLength;
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length;
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t gapStarts[kCapacity];
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int64_t gapLengths[kCapacity];
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}  // namespace
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Does it make sense to write 1..capacity ranges?
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns 0 if not, otherwise the number of ranges.
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param values Sorted array of signed-integer values.
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param length Number of values.
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *                Should be 0x80..0x100, must be 1..0x100.
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param ranges Output ranges array.
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param capacity Maximum number of ranges.
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return Minimum number of ranges (at most capacity) that have the desired density,
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *         or 0 if that density cannot be achieved.
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CAPI int32_t U_EXPORT2
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouprv_makeDenseRanges(const int32_t values[], int32_t length,
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                     int32_t density,
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                     int32_t ranges[][2], int32_t capacity) {
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length<=2) {
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t minValue=values[0];
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Use int64_t variables for intermediate-value precision and to avoid
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // signed-int32_t overflow of maxValue-minValue.
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>=(density*maxLength)/0x100) {
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Use one range.
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ranges[0][0]=minValue;
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ranges[0][1]=maxValue;
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length<=4) {
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // See if we can split [minValue, maxValue] into 2..capacity ranges,
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // divided by the 1..(capacity-1) largest gaps.
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    LargestGaps gaps(capacity-1);
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i;
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t expectedValue=minValue;
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(i=1; i<length; ++i) {
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++expectedValue;
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t actualValue=values[i];
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(expectedValue!=actualValue) {
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            expectedValue=actualValue;
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // We know gaps.count()>=1 because we have fewer values (length) than
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the length of the [minValue..maxValue] range (maxLength).
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // (Otherwise we would have returned with the one range above.)
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t num;
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(i=0, num=2;; ++i, ++num) {
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(i>=gaps.count()) {
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // The values are too sparse for capacity or fewer ranges
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // of the requested density.
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return 0;
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        maxLength-=gaps.gapLength(i);
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(length>num*2 && length>=(density*maxLength)/0x100) {
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Use the num ranges with the num-1 largest gaps.
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    gaps.truncate(num-1);
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ranges[0][0]=minValue;
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(i=0; i<=num-2; ++i) {
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t gapIndex=gaps.firstAfter(minValue);
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t gapStart=gaps.gapStart(gapIndex);
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ranges[i][1]=gapStart-1;
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ranges[num-1][1]=maxValue;
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return num;
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
161