collation.cpp (5524B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * collation.cpp 9 * 10 * created on: 2010oct27 11 * created by: Markus W. Scherer 12 */ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_COLLATION 17 18 #include "collation.h" 19 #include "uassert.h" 20 21 U_NAMESPACE_BEGIN 22 23 uint32_t 24 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 25 // Extract the second byte, minus the minimum byte value, 26 // plus the offset, modulo the number of usable byte values, plus the minimum. 27 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 28 uint32_t primary; 29 if(isCompressible) { 30 offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 4; 31 primary = static_cast<uint32_t>((offset % 251) + 4) << 16; 32 offset /= 251; 33 } else { 34 offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 2; 35 primary = static_cast<uint32_t>((offset % 254) + 2) << 16; 36 offset /= 254; 37 } 38 // First byte, assume no further overflow. 39 return primary | ((basePrimary & 0xff000000) + static_cast<uint32_t>(offset << 24)); 40 } 41 42 uint32_t 43 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 44 // Extract the third byte, minus the minimum byte value, 45 // plus the offset, modulo the number of usable byte values, plus the minimum. 46 offset += (static_cast<int32_t>(basePrimary >> 8) & 0xff) - 2; 47 uint32_t primary = static_cast<uint32_t>((offset % 254) + 2) << 8; 48 offset /= 254; 49 // Same with the second byte, 50 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 51 if(isCompressible) { 52 offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 4; 53 primary |= static_cast<uint32_t>((offset % 251) + 4) << 16; 54 offset /= 251; 55 } else { 56 offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 2; 57 primary |= static_cast<uint32_t>((offset % 254) + 2) << 16; 58 offset /= 254; 59 } 60 // First byte, assume no further overflow. 61 return primary | ((basePrimary & 0xff000000) + static_cast<uint32_t>(offset << 24)); 62 } 63 64 uint32_t 65 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 66 // Extract the second byte, minus the minimum byte value, 67 // minus the step, modulo the number of usable byte values, plus the minimum. 68 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 69 // Assume no further underflow for the first byte. 70 U_ASSERT(0 < step && step <= 0x7f); 71 int32_t byte2 = (static_cast<int32_t>(basePrimary >> 16) & 0xff) - step; 72 if(isCompressible) { 73 if(byte2 < 4) { 74 byte2 += 251; 75 basePrimary -= 0x1000000; 76 } 77 } else { 78 if(byte2 < 2) { 79 byte2 += 254; 80 basePrimary -= 0x1000000; 81 } 82 } 83 return (basePrimary & 0xff000000) | (static_cast<uint32_t>(byte2) << 16); 84 } 85 86 uint32_t 87 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 88 // Extract the third byte, minus the minimum byte value, 89 // minus the step, modulo the number of usable byte values, plus the minimum. 90 U_ASSERT(0 < step && step <= 0x7f); 91 int32_t byte3 = (static_cast<int32_t>(basePrimary >> 8) & 0xff) - step; 92 if(byte3 >= 2) { 93 return (basePrimary & 0xffff0000) | (static_cast<uint32_t>(byte3) << 8); 94 } 95 byte3 += 254; 96 // Same with the second byte, 97 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 98 int32_t byte2 = (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 1; 99 if(isCompressible) { 100 if(byte2 < 4) { 101 byte2 = 0xfe; 102 basePrimary -= 0x1000000; 103 } 104 } else { 105 if(byte2 < 2) { 106 byte2 = 0xff; 107 basePrimary -= 0x1000000; 108 } 109 } 110 // First byte, assume no further underflow. 111 return (basePrimary & 0xff000000) | (static_cast<uint32_t>(byte2) << 16) | (static_cast<uint32_t>(byte3) << 8); 112 } 113 114 uint32_t 115 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { 116 uint32_t p = static_cast<uint32_t>(dataCE >> 32); // three-byte primary pppppp00 117 int32_t lower32 = static_cast<int32_t>(dataCE); // base code point b & step s: bbbbbbss (bit 7: isCompressible) 118 int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 119 UBool isCompressible = (lower32 & 0x80) != 0; 120 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset); 121 } 122 123 uint32_t 124 Collation::unassignedPrimaryFromCodePoint(UChar32 c) { 125 // Create a gap before U+0000. Use c=-1 for [first unassigned]. 126 ++c; 127 // Fourth byte: 18 values, every 14th byte value (gap of 13). 128 uint32_t primary = 2 + (c % 18) * 14; 129 c /= 18; 130 // Third byte: 254 values. 131 primary |= (2 + (c % 254)) << 8; 132 c /= 254; 133 // Second byte: 251 values 04..FE excluding the primary compression bytes. 134 primary |= (4 + (c % 251)) << 16; 135 // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 136 return primary | (UNASSIGNED_IMPLICIT_BYTE << 24); 137 } 138 139 U_NAMESPACE_END 140 141 #endif // !UCONFIG_NO_COLLATION