16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2010, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  denseranges.cpp
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2010sep25
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Helper code for finding a small number of dense ranges.
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "denseranges.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Definitions in the anonymous namespace are invisible outside this file.
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgnamespace {
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Collect up to 15 range gaps and sort them by ascending gap size.
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass LargestGaps {
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic:
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    void add(int32_t gapStart, int64_t gapLength) {
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t i=length;
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(i>0 && gapLength>gapLengths[i-1]) {
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            --i;
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(i<maxLength) {
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The new gap is now one of the maxLength largest.
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Insert the new gap, moving up smaller ones of the previous
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // length largest.
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t j= length<maxLength ? length++ : maxLength-1;
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while(j>i) {
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                gapStarts[j]=gapStarts[j-1];
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                gapLengths[j]=gapLengths[j-1];
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                --j;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            gapStarts[i]=gapStart;
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            gapLengths[i]=gapLength;
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    void truncate(int32_t newLength) {
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(newLength<length) {
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length=newLength;
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count() const { return length; }
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t firstAfter(int32_t value) const {
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length==0) {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return -1;
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t minValue=0;
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t minIndex=-1;
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(int32_t i=0; i<length; ++i) {
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                minValue=gapStarts[i];
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                minIndex=i;
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return minIndex;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprivate:
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const int32_t kCapacity=15;
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t maxLength;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t gapStarts[kCapacity];
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t gapLengths[kCapacity];
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}  // namespace
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does it make sense to write 1..capacity ranges?
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Returns 0 if not, otherwise the number of ranges.
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param values Sorted array of signed-integer values.
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param length Number of values.
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                Should be 0x80..0x100, must be 1..0x100.
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ranges Output ranges array.
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param capacity Maximum number of ranges.
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return Minimum number of ranges (at most capacity) that have the desired density,
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *         or 0 if that density cannot be achieved.
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguprv_makeDenseRanges(const int32_t values[], int32_t length,
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     int32_t density,
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     int32_t ranges[][2], int32_t capacity) {
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<=2) {
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t minValue=values[0];
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Use int64_t variables for intermediate-value precision and to avoid
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // signed-int32_t overflow of maxValue-minValue.
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length>=(density*maxLength)/0x100) {
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Use one range.
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ranges[0][0]=minValue;
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ranges[0][1]=maxValue;
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 1;
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length<=4) {
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // See if we can split [minValue, maxValue] into 2..capacity ranges,
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // divided by the 1..(capacity-1) largest gaps.
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    LargestGaps gaps(capacity-1);
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t expectedValue=minValue;
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=1; i<length; ++i) {
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++expectedValue;
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t actualValue=values[i];
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(expectedValue!=actualValue) {
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            expectedValue=actualValue;
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We know gaps.count()>=1 because we have fewer values (length) than
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // the length of the [minValue..maxValue] range (maxLength).
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // (Otherwise we would have returned with the one range above.)
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t num;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0, num=2;; ++i, ++num) {
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(i>=gaps.count()) {
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The values are too sparse for capacity or fewer ranges
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // of the requested density.
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        maxLength-=gaps.gapLength(i);
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length>num*2 && length>=(density*maxLength)/0x100) {
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Use the num ranges with the num-1 largest gaps.
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    gaps.truncate(num-1);
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ranges[0][0]=minValue;
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0; i<=num-2; ++i) {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t gapIndex=gaps.firstAfter(minValue);
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t gapStart=gaps.gapStart(gapIndex);
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ranges[i][1]=gapStart-1;
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ranges[num-1][1]=maxValue;
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return num;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
159