denseranges.cpp (5228B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: denseranges.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010sep25 14 * created by: Markus W. Scherer 15 * 16 * Helper code for finding a small number of dense ranges. 17 */ 18 19 #include "unicode/utypes.h" 20 #include "denseranges.h" 21 22 // Definitions in the anonymous namespace are invisible outside this file. 23 namespace { 24 25 /** 26 * Collect up to 15 range gaps and sort them by ascending gap size. 27 */ 28 class LargestGaps { 29 public: 30 LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} 31 32 void add(int32_t gapStart, int64_t gapLength) { 33 int32_t i=length; 34 while(i>0 && gapLength>gapLengths[i-1]) { 35 --i; 36 } 37 if(i<maxLength) { 38 // The new gap is now one of the maxLength largest. 39 // Insert the new gap, moving up smaller ones of the previous 40 // length largest. 41 int32_t j= length<maxLength ? length++ : maxLength-1; 42 while(j>i) { 43 gapStarts[j]=gapStarts[j-1]; 44 gapLengths[j]=gapLengths[j-1]; 45 --j; 46 } 47 gapStarts[i]=gapStart; 48 gapLengths[i]=gapLength; 49 } 50 } 51 52 void truncate(int32_t newLength) { 53 if(newLength<length) { 54 length=newLength; 55 } 56 } 57 58 int32_t count() const { return length; } 59 int32_t gapStart(int32_t i) const { return gapStarts[i]; } 60 int64_t gapLength(int32_t i) const { return gapLengths[i]; } 61 62 int32_t firstAfter(int32_t value) const { 63 if(length==0) { 64 return -1; 65 } 66 int32_t minValue=0; 67 int32_t minIndex=-1; 68 for(int32_t i=0; i<length; ++i) { 69 if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { 70 minValue=gapStarts[i]; 71 minIndex=i; 72 } 73 } 74 return minIndex; 75 } 76 77 private: 78 static const int32_t kCapacity=15; 79 80 int32_t maxLength; 81 int32_t length; 82 int32_t gapStarts[kCapacity]; 83 int64_t gapLengths[kCapacity]; 84 }; 85 86 } // namespace 87 88 /** 89 * Does it make sense to write 1..capacity ranges? 90 * Returns 0 if not, otherwise the number of ranges. 91 * @param values Sorted array of signed-integer values. 92 * @param length Number of values. 93 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) 94 * Should be 0x80..0x100, must be 1..0x100. 95 * @param ranges Output ranges array. 96 * @param capacity Maximum number of ranges. 97 * @return Minimum number of ranges (at most capacity) that have the desired density, 98 * or 0 if that density cannot be achieved. 99 */ 100 U_CAPI int32_t U_EXPORT2 101 uprv_makeDenseRanges(const int32_t values[], int32_t length, 102 int32_t density, 103 int32_t ranges[][2], int32_t capacity) { 104 if(length<=2) { 105 return 0; 106 } 107 int32_t minValue=values[0]; 108 int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. 109 // Use int64_t variables for intermediate-value precision and to avoid 110 // signed-int32_t overflow of maxValue-minValue. 111 int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; 112 if(length>=(density*maxLength)/0x100) { 113 // Use one range. 114 ranges[0][0]=minValue; 115 ranges[0][1]=maxValue; 116 return 1; 117 } 118 if(length<=4) { 119 return 0; 120 } 121 // See if we can split [minValue, maxValue] into 2..capacity ranges, 122 // divided by the 1..(capacity-1) largest gaps. 123 LargestGaps gaps(capacity-1); 124 int32_t i; 125 int32_t expectedValue=minValue; 126 for(i=1; i<length; ++i) { 127 ++expectedValue; 128 int32_t actualValue=values[i]; 129 if(expectedValue!=actualValue) { 130 gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); 131 expectedValue=actualValue; 132 } 133 } 134 // We know gaps.count()>=1 because we have fewer values (length) than 135 // the length of the [minValue..maxValue] range (maxLength). 136 // (Otherwise we would have returned with the one range above.) 137 int32_t num; 138 for(i=0, num=2;; ++i, ++num) { 139 if(i>=gaps.count()) { 140 // The values are too sparse for capacity or fewer ranges 141 // of the requested density. 142 return 0; 143 } 144 maxLength-=gaps.gapLength(i); 145 if(length>num*2 && length>=(density*maxLength)/0x100) { 146 break; 147 } 148 } 149 // Use the num ranges with the num-1 largest gaps. 150 gaps.truncate(num-1); 151 ranges[0][0]=minValue; 152 for(i=0; i<=num-2; ++i) { 153 int32_t gapIndex=gaps.firstAfter(minValue); 154 int32_t gapStart=gaps.gapStart(gapIndex); 155 ranges[i][1]=gapStart-1; 156 ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); 157 } 158 ranges[num-1][1]=maxValue; 159 return num; 160 }