1/*
2*******************************************************************************
3*   Copyright (C) 2010, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  denseranges.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010sep25
12*   created by: Markus W. Scherer
13*
14* Helper code for finding a small number of dense ranges.
15*/
16
17#include "unicode/utypes.h"
18#include "denseranges.h"
19
20// Definitions in the anonymous namespace are invisible outside this file.
21namespace {
22
23/**
24 * Collect up to 15 range gaps and sort them by ascending gap size.
25 */
26class LargestGaps {
27public:
28    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
29
30    void add(int32_t gapStart, int64_t gapLength) {
31        int32_t i=length;
32        while(i>0 && gapLength>gapLengths[i-1]) {
33            --i;
34        }
35        if(i<maxLength) {
36            // The new gap is now one of the maxLength largest.
37            // Insert the new gap, moving up smaller ones of the previous
38            // length largest.
39            int32_t j= length<maxLength ? length++ : maxLength-1;
40            while(j>i) {
41                gapStarts[j]=gapStarts[j-1];
42                gapLengths[j]=gapLengths[j-1];
43                --j;
44            }
45            gapStarts[i]=gapStart;
46            gapLengths[i]=gapLength;
47        }
48    }
49
50    void truncate(int32_t newLength) {
51        if(newLength<length) {
52            length=newLength;
53        }
54    }
55
56    int32_t count() const { return length; }
57    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
58    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
59
60    int32_t firstAfter(int32_t value) const {
61        if(length==0) {
62            return -1;
63        }
64        int32_t minValue=0;
65        int32_t minIndex=-1;
66        for(int32_t i=0; i<length; ++i) {
67            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
68                minValue=gapStarts[i];
69                minIndex=i;
70            }
71        }
72        return minIndex;
73    }
74
75private:
76    static const int32_t kCapacity=15;
77
78    int32_t maxLength;
79    int32_t length;
80    int32_t gapStarts[kCapacity];
81    int64_t gapLengths[kCapacity];
82};
83
84}  // namespace
85
86/**
87 * Does it make sense to write 1..capacity ranges?
88 * Returns 0 if not, otherwise the number of ranges.
89 * @param values Sorted array of signed-integer values.
90 * @param length Number of values.
91 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
92 *                Should be 0x80..0x100, must be 1..0x100.
93 * @param ranges Output ranges array.
94 * @param capacity Maximum number of ranges.
95 * @return Minimum number of ranges (at most capacity) that have the desired density,
96 *         or 0 if that density cannot be achieved.
97 */
98U_CAPI int32_t U_EXPORT2
99uprv_makeDenseRanges(const int32_t values[], int32_t length,
100                     int32_t density,
101                     int32_t ranges[][2], int32_t capacity) {
102    if(length<=2) {
103        return 0;
104    }
105    int32_t minValue=values[0];
106    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
107    // Use int64_t variables for intermediate-value precision and to avoid
108    // signed-int32_t overflow of maxValue-minValue.
109    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
110    if(length>=(density*maxLength)/0x100) {
111        // Use one range.
112        ranges[0][0]=minValue;
113        ranges[0][1]=maxValue;
114        return 1;
115    }
116    if(length<=4) {
117        return 0;
118    }
119    // See if we can split [minValue, maxValue] into 2..capacity ranges,
120    // divided by the 1..(capacity-1) largest gaps.
121    LargestGaps gaps(capacity-1);
122    int32_t i;
123    int32_t expectedValue=minValue;
124    for(i=1; i<length; ++i) {
125        ++expectedValue;
126        int32_t actualValue=values[i];
127        if(expectedValue!=actualValue) {
128            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
129            expectedValue=actualValue;
130        }
131    }
132    // We know gaps.count()>=1 because we have fewer values (length) than
133    // the length of the [minValue..maxValue] range (maxLength).
134    // (Otherwise we would have returned with the one range above.)
135    int32_t num;
136    for(i=0, num=2;; ++i, ++num) {
137        if(i>=gaps.count()) {
138            // The values are too sparse for capacity or fewer ranges
139            // of the requested density.
140            return 0;
141        }
142        maxLength-=gaps.gapLength(i);
143        if(length>num*2 && length>=(density*maxLength)/0x100) {
144            break;
145        }
146    }
147    // Use the num ranges with the num-1 largest gaps.
148    gaps.truncate(num-1);
149    ranges[0][0]=minValue;
150    for(i=0; i<=num-2; ++i) {
151        int32_t gapIndex=gaps.firstAfter(minValue);
152        int32_t gapStart=gaps.gapStart(gapIndex);
153        ranges[i][1]=gapStart-1;
154        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
155    }
156    ranges[num-1][1]=maxValue;
157    return num;
158}
159