164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2010, International Business Machines 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: denseranges.cpp 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010sep25 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Helper code for finding a small number of dense ranges. 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "denseranges.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Definitions in the anonymous namespace are invisible outside this file. 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehonamespace { 24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Collect up to 15 range gaps and sort them by ascending gap size. 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass LargestGaps { 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void add(int32_t gapStart, int64_t gapLength) { 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i=length; 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(i>0 && gapLength>gapLengths[i-1]) { 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --i; 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<maxLength) { 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The new gap is now one of the maxLength largest. 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Insert the new gap, moving up smaller ones of the previous 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // length largest. 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t j= length<maxLength ? length++ : maxLength-1; 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(j>i) { 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gapStarts[j]=gapStarts[j-1]; 44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gapLengths[j]=gapLengths[j-1]; 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --j; 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gapStarts[i]=gapStart; 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gapLengths[i]=gapLength; 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void truncate(int32_t newLength) { 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newLength<length) { 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=newLength; 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t count() const { return length; } 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t gapStart(int32_t i) const { return gapStarts[i]; } 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int64_t gapLength(int32_t i) const { return gapLengths[i]; } 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t firstAfter(int32_t value) const { 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length==0) { 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return -1; 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t minValue=0; 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t minIndex=-1; 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(int32_t i=0; i<length; ++i) { 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho minValue=gapStarts[i]; 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho minIndex=i; 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return minIndex; 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const int32_t kCapacity=15; 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t maxLength; 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t gapStarts[kCapacity]; 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int64_t gapLengths[kCapacity]; 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} // namespace 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Does it make sense to write 1..capacity ranges? 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Returns 0 if not, otherwise the number of ranges. 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param values Sorted array of signed-integer values. 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param length Number of values. 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Should be 0x80..0x100, must be 1..0x100. 95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param ranges Output ranges array. 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param capacity Maximum number of ranges. 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return Minimum number of ranges (at most capacity) that have the desired density, 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * or 0 if that density cannot be achieved. 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CAPI int32_t U_EXPORT2 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehouprv_makeDenseRanges(const int32_t values[], int32_t length, 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t density, 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t ranges[][2], int32_t capacity) { 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length<=2) { 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t minValue=values[0]; 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use int64_t variables for intermediate-value precision and to avoid 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // signed-int32_t overflow of maxValue-minValue. 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>=(density*maxLength)/0x100) { 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use one range. 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[0][0]=minValue; 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[0][1]=maxValue; 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length<=4) { 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // See if we can split [minValue, maxValue] into 2..capacity ranges, 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // divided by the 1..(capacity-1) largest gaps. 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho LargestGaps gaps(capacity-1); 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i; 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t expectedValue=minValue; 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(i=1; i<length; ++i) { 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++expectedValue; 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t actualValue=values[i]; 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(expectedValue!=actualValue) { 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho expectedValue=actualValue; 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We know gaps.count()>=1 because we have fewer values (length) than 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the length of the [minValue..maxValue] range (maxLength). 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (Otherwise we would have returned with the one range above.) 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t num; 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(i=0, num=2;; ++i, ++num) { 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i>=gaps.count()) { 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The values are too sparse for capacity or fewer ranges 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // of the requested density. 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho maxLength-=gaps.gapLength(i); 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>num*2 && length>=(density*maxLength)/0x100) { 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use the num ranges with the num-1 largest gaps. 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho gaps.truncate(num-1); 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[0][0]=minValue; 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(i=0; i<=num-2; ++i) { 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t gapIndex=gaps.firstAfter(minValue); 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t gapStart=gaps.gapStart(gapIndex); 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[i][1]=gapStart-1; 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ranges[num-1][1]=maxValue; 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return num; 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 161