utrie_swap.cpp (11236B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // utrie_swap.cpp 5 // created: 2018aug08 Markus W. Scherer 6 7 #include "unicode/utypes.h" 8 #include "cmemory.h" 9 #include "ucptrie_impl.h" 10 #include "udataswp.h" 11 #include "utrie.h" 12 #include "utrie2_impl.h" 13 14 // These functions for swapping different generations of ICU code point tries are here 15 // so that their implementation files need not depend on swapper code, 16 // need not depend on each other, and so that other swapper code 17 // need not depend on other trie code. 18 19 namespace { 20 21 constexpr int32_t ASCII_LIMIT = 0x80; 22 23 } // namespace 24 25 U_CAPI int32_t U_EXPORT2 26 utrie_swap(const UDataSwapper *ds, 27 const void *inData, int32_t length, void *outData, 28 UErrorCode *pErrorCode) { 29 const UTrieHeader *inTrie; 30 UTrieHeader trie; 31 int32_t size; 32 UBool dataIs32; 33 34 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 35 return 0; 36 } 37 if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { 38 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 39 return 0; 40 } 41 42 /* setup and swapping */ 43 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { 44 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 45 return 0; 46 } 47 48 inTrie=(const UTrieHeader *)inData; 49 trie.signature=ds->readUInt32(inTrie->signature); 50 trie.options=ds->readUInt32(inTrie->options); 51 trie.indexLength=udata_readInt32(ds, inTrie->indexLength); 52 trie.dataLength=udata_readInt32(ds, inTrie->dataLength); 53 54 if( trie.signature!=0x54726965 || 55 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || 56 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || 57 trie.indexLength<UTRIE_BMP_INDEX_LENGTH || 58 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || 59 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || 60 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || 61 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) 62 ) { 63 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 64 return 0; 65 } 66 67 dataIs32 = (trie.options & UTRIE_OPTIONS_DATA_IS_32_BIT) != 0; 68 size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); 69 70 if(length>=0) { 71 UTrieHeader *outTrie; 72 73 if(length<size) { 74 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 75 return 0; 76 } 77 78 outTrie=(UTrieHeader *)outData; 79 80 /* swap the header */ 81 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); 82 83 /* swap the index and the data */ 84 if(dataIs32) { 85 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 86 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, 87 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 88 } else { 89 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); 90 } 91 } 92 93 return size; 94 } 95 96 U_CAPI int32_t U_EXPORT2 97 utrie2_swap(const UDataSwapper *ds, 98 const void *inData, int32_t length, void *outData, 99 UErrorCode *pErrorCode) { 100 const UTrie2Header *inTrie; 101 UTrie2Header trie; 102 int32_t dataLength, size; 103 UTrie2ValueBits valueBits; 104 105 if(U_FAILURE(*pErrorCode)) { 106 return 0; 107 } 108 if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { 109 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 110 return 0; 111 } 112 113 /* setup and swapping */ 114 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { 115 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 116 return 0; 117 } 118 119 inTrie=(const UTrie2Header *)inData; 120 trie.signature=ds->readUInt32(inTrie->signature); 121 trie.options=ds->readUInt16(inTrie->options); 122 trie.indexLength=ds->readUInt16(inTrie->indexLength); 123 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); 124 125 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); 126 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; 127 128 if( trie.signature!=UTRIE2_SIG || 129 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || 130 trie.indexLength<UTRIE2_INDEX_1_OFFSET || 131 dataLength<UTRIE2_DATA_START_OFFSET 132 ) { 133 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 134 return 0; 135 } 136 137 size=sizeof(UTrie2Header)+trie.indexLength*2; 138 switch(valueBits) { 139 case UTRIE2_16_VALUE_BITS: 140 size+=dataLength*2; 141 break; 142 case UTRIE2_32_VALUE_BITS: 143 size+=dataLength*4; 144 break; 145 default: 146 *pErrorCode=U_INVALID_FORMAT_ERROR; 147 return 0; 148 } 149 150 if(length>=0) { 151 UTrie2Header *outTrie; 152 153 if(length<size) { 154 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 155 return 0; 156 } 157 158 outTrie=(UTrie2Header *)outData; 159 160 /* swap the header */ 161 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 162 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 163 164 /* swap the index and the data */ 165 switch(valueBits) { 166 case UTRIE2_16_VALUE_BITS: 167 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); 168 break; 169 case UTRIE2_32_VALUE_BITS: 170 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 171 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, 172 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 173 break; 174 default: 175 *pErrorCode=U_INVALID_FORMAT_ERROR; 176 return 0; 177 } 178 } 179 180 return size; 181 } 182 183 U_CAPI int32_t U_EXPORT2 184 ucptrie_swap(const UDataSwapper *ds, 185 const void *inData, int32_t length, void *outData, 186 UErrorCode *pErrorCode) { 187 const UCPTrieHeader *inTrie; 188 UCPTrieHeader trie; 189 int32_t dataLength, size; 190 UCPTrieValueWidth valueWidth; 191 192 if(U_FAILURE(*pErrorCode)) { 193 return 0; 194 } 195 if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { 196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 197 return 0; 198 } 199 200 /* setup and swapping */ 201 if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { 202 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 203 return 0; 204 } 205 206 inTrie=(const UCPTrieHeader *)inData; 207 trie.signature=ds->readUInt32(inTrie->signature); 208 trie.options=ds->readUInt16(inTrie->options); 209 trie.indexLength=ds->readUInt16(inTrie->indexLength); 210 trie.dataLength = ds->readUInt16(inTrie->dataLength); 211 212 UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); 213 valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); 214 dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; 215 216 int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? 217 UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; 218 if( trie.signature!=UCPTRIE_SIG || 219 type > UCPTRIE_TYPE_SMALL || 220 (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || 221 valueWidth > UCPTRIE_VALUE_BITS_8 || 222 trie.indexLength < minIndexLength || 223 dataLength < ASCII_LIMIT 224 ) { 225 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ 226 return 0; 227 } 228 229 size=sizeof(UCPTrieHeader)+trie.indexLength*2; 230 switch(valueWidth) { 231 case UCPTRIE_VALUE_BITS_16: 232 size+=dataLength*2; 233 break; 234 case UCPTRIE_VALUE_BITS_32: 235 size+=dataLength*4; 236 break; 237 case UCPTRIE_VALUE_BITS_8: 238 size+=dataLength; 239 break; 240 default: 241 *pErrorCode=U_INVALID_FORMAT_ERROR; 242 return 0; 243 } 244 245 if(length>=0) { 246 UCPTrieHeader *outTrie; 247 248 if(length<size) { 249 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 250 return 0; 251 } 252 253 outTrie=(UCPTrieHeader *)outData; 254 255 /* swap the header */ 256 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 257 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 258 259 /* swap the index */ 260 const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1); 261 uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1); 262 ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode); 263 264 /* swap the data */ 265 const uint16_t *inData=inIndex+trie.indexLength; 266 uint16_t *outData=outIndex+trie.indexLength; 267 switch(valueWidth) { 268 case UCPTRIE_VALUE_BITS_16: 269 ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode); 270 break; 271 case UCPTRIE_VALUE_BITS_32: 272 ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode); 273 break; 274 case UCPTRIE_VALUE_BITS_8: 275 if(inTrie!=outTrie) { 276 uprv_memmove(outData, inData, dataLength); 277 } 278 break; 279 default: 280 *pErrorCode=U_INVALID_FORMAT_ERROR; 281 return 0; 282 } 283 } 284 285 return size; 286 } 287 288 namespace { 289 290 /** 291 * Gets the trie version from 32-bit-aligned memory containing the serialized form 292 * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). 293 * 294 * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie 295 * @param length the number of bytes available at data; 296 * can be more than necessary (see return value) 297 * @param anyEndianOk If false, only platform-endian serialized forms are recognized. 298 * If true, opposite-endian serialized forms are recognized as well. 299 * @return the trie version of the serialized form, or 0 if it is not 300 * recognized as a serialized trie 301 */ 302 int32_t 303 getVersion(const void *data, int32_t length, UBool anyEndianOk) { 304 uint32_t signature; 305 if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { 306 return 0; 307 } 308 signature = *static_cast<const uint32_t*>(data); 309 if(signature==UCPTRIE_SIG) { 310 return 3; 311 } 312 if(anyEndianOk && signature==UCPTRIE_OE_SIG) { 313 return 3; 314 } 315 if(signature==UTRIE2_SIG) { 316 return 2; 317 } 318 if(anyEndianOk && signature==UTRIE2_OE_SIG) { 319 return 2; 320 } 321 if(signature==UTRIE_SIG) { 322 return 1; 323 } 324 if(anyEndianOk && signature==UTRIE_OE_SIG) { 325 return 1; 326 } 327 return 0; 328 } 329 330 } // namespace 331 332 U_CAPI int32_t U_EXPORT2 333 utrie_swapAnyVersion(const UDataSwapper *ds, 334 const void *inData, int32_t length, void *outData, 335 UErrorCode *pErrorCode) { 336 if(U_FAILURE(*pErrorCode)) { return 0; } 337 switch(getVersion(inData, length, true)) { 338 case 1: 339 return utrie_swap(ds, inData, length, outData, pErrorCode); 340 case 2: 341 return utrie2_swap(ds, inData, length, outData, pErrorCode); 342 case 3: 343 return ucptrie_swap(ds, inData, length, outData, pErrorCode); 344 default: 345 *pErrorCode=U_INVALID_FORMAT_ERROR; 346 return 0; 347 } 348 }