16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2010, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: denseranges.cpp 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2010sep25 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Helper code for finding a small number of dense ranges. 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "denseranges.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Definitions in the anonymous namespace are invisible outside this file. 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgnamespace { 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Collect up to 15 range gaps and sort them by ascending gap size. 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass LargestGaps { 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic: 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void add(int32_t gapStart, int64_t gapLength) { 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=length; 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(i>0 && gapLength>gapLengths[i-1]) { 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --i; 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<maxLength) { 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The new gap is now one of the maxLength largest. 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Insert the new gap, moving up smaller ones of the previous 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // length largest. 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t j= length<maxLength ? length++ : maxLength-1; 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(j>i) { 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gapStarts[j]=gapStarts[j-1]; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gapLengths[j]=gapLengths[j-1]; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --j; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gapStarts[i]=gapStart; 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gapLengths[i]=gapLength; 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void truncate(int32_t newLength) { 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newLength<length) { 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=newLength; 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count() const { return length; } 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t gapStart(int32_t i) const { return gapStarts[i]; } 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t gapLength(int32_t i) const { return gapLengths[i]; } 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t firstAfter(int32_t value) const { 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length==0) { 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minValue=0; 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minIndex=-1; 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(int32_t i=0; i<length; ++i) { 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=gapStarts[i]; 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minIndex=i; 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return minIndex; 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprivate: 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const int32_t kCapacity=15; 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxLength; 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t gapStarts[kCapacity]; 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t gapLengths[kCapacity]; 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} // namespace 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does it make sense to write 1..capacity ranges? 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Returns 0 if not, otherwise the number of ranges. 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param values Sorted array of signed-integer values. 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param length Number of values. 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Should be 0x80..0x100, must be 1..0x100. 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param ranges Output ranges array. 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param capacity Maximum number of ranges. 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return Minimum number of ranges (at most capacity) that have the desired density, 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or 0 if that density cannot be achieved. 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguprv_makeDenseRanges(const int32_t values[], int32_t length, 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t density, 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t ranges[][2], int32_t capacity) { 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<=2) { 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minValue=values[0]; 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Use int64_t variables for intermediate-value precision and to avoid 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // signed-int32_t overflow of maxValue-minValue. 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>=(density*maxLength)/0x100) { 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Use one range. 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[0][0]=minValue; 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[0][1]=maxValue; 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 1; 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<=4) { 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // See if we can split [minValue, maxValue] into 2..capacity ranges, 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // divided by the 1..(capacity-1) largest gaps. 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org LargestGaps gaps(capacity-1); 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t expectedValue=minValue; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=1; i<length; ++i) { 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++expectedValue; 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t actualValue=values[i]; 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(expectedValue!=actualValue) { 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org expectedValue=actualValue; 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We know gaps.count()>=1 because we have fewer values (length) than 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the length of the [minValue..maxValue] range (maxLength). 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Otherwise we would have returned with the one range above.) 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t num; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0, num=2;; ++i, ++num) { 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i>=gaps.count()) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The values are too sparse for capacity or fewer ranges 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of the requested density. 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maxLength-=gaps.gapLength(i); 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>num*2 && length>=(density*maxLength)/0x100) { 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Use the num ranges with the num-1 largest gaps. 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gaps.truncate(num-1); 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[0][0]=minValue; 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<=num-2; ++i) { 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t gapIndex=gaps.firstAfter(minValue); 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t gapStart=gaps.gapStart(gapIndex); 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[i][1]=gapStart-1; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ranges[num-1][1]=maxValue; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return num; 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 159