extract_unsafe_backwards.cpp (5930B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 * Copyright (c) 1999-2016, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 * 7 * Generator for source/i18n/collunsafe.h 8 * see Makefile 9 */ 10 11 #include <stdio.h> 12 #include "unicode/uversion.h" 13 #include "unicode/uniset.h" 14 #include "collationroot.h" 15 #include "collationtailoring.h" 16 17 using icu::CollationCacheEntry; 18 using icu::CollationRoot; 19 using icu::UnicodeSet; 20 21 /** 22 * Define the type of generator to use. Choose one. 23 */ 24 #define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor 25 #define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp) 26 #define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp) 27 28 int main(int argc, const char *argv[]) { 29 UErrorCode errorCode = U_ZERO_ERROR; 30 31 // Get the unsafeBackwardsSet 32 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); 33 if(U_FAILURE(errorCode)) { 34 fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode)); 35 return 1; 36 } 37 const UVersionInfo &version = rootEntry->tailoring->version; 38 const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; 39 char verString[20]; 40 u_versionToString(version, verString); 41 fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString); 42 int32_t rangeCount = unsafeBackwardSet->getRangeCount(); 43 44 #if SERIALIZE 45 fprintf(stderr, ".. serializing\n"); 46 // UnicodeSet serialization 47 48 UErrorCode preflightCode = U_ZERO_ERROR; 49 // preflight 50 int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode); 51 if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) { 52 fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode)); 53 return 1; 54 } 55 uint16_t *serializedData = new uint16_t[serializedCount]; 56 // serialize 57 unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode); 58 if(U_FAILURE(errorCode)) { 59 delete [] serializedData; 60 fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode)); 61 return 1; 62 } 63 #endif 64 65 #if PATTERN 66 fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); 67 // attempt to use pattern 68 69 UnicodeString pattern; 70 UnicodeSet set(*unsafeBackwardSet); 71 set.compact(); 72 set.toPattern(pattern, false); 73 74 if(U_SUCCESS(errorCode)) { 75 // This fails (bug# ?) - which is why this method was abandoned. 76 77 // UnicodeSet usA(pattern, errorCode); 78 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 79 // return 1; 80 } 81 82 83 const char16_t *buf = pattern.getBuffer(); 84 int32_t needed = pattern.length(); 85 86 // print 87 { 88 char buf2[2048]; 89 int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8"); 90 buf2[len2]=0; 91 fprintf(stderr,"===\n%s\n===\n", buf2); 92 } 93 94 const UnicodeString unsafeBackwardPattern(false, buf, needed); 95 if(U_SUCCESS(errorCode)) { 96 //UnicodeSet us(unsafeBackwardPattern, errorCode); 97 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 98 } else { 99 fprintf(stderr, "Uset OK - \n"); 100 } 101 #endif 102 103 104 // Generate the output file. 105 106 printf("// © 2016 and later: Unicode, Inc. and others.\n"); 107 printf("// License & terms of use: http://www.unicode.org/copyright.html\n"); 108 printf("// collunsafe.h\n"); 109 printf("// Copyright (C) 2015-2016, International Business Machines Corporation and others.\n"); 110 printf("// All Rights Reserved.\n"); 111 printf("\n"); 112 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); 113 printf("// Machine generated, do not edit.\n"); 114 printf("\n"); 115 printf("#ifndef COLLUNSAFE_H\n" 116 "#define COLLUNSAFE_H\n" 117 "\n" 118 "#include \"unicode/utypes.h\"\n" 119 "\n" 120 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n"); 121 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString); 122 123 124 125 #if PATTERN 126 printf("#define COLLUNSAFE_PATTERN 1\n"); 127 printf("static const int32_t collunsafe_len = %d;\n", needed); 128 printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n"); 129 for(int i=0;i<needed;i++) { 130 if( (i>0) && (i%8 == 0) ) { 131 printf(" // %d\n", i); 132 } 133 printf("0x%04X", buf[i]); // TODO check 134 if(i != (needed-1)) { 135 printf(", "); 136 } 137 } 138 printf(" //%d\n};\n", (needed-1)); 139 #endif 140 141 #if RANGE 142 fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); 143 printf("#define COLLUNSAFE_RANGE 1\n"); 144 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount); 145 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2); 146 for(int32_t i=0;i<rangeCount;i++) { 147 printf(" 0x%04X, 0x%04X, // %d\n", 148 unsafeBackwardSet->getRangeStart(i), 149 unsafeBackwardSet->getRangeEnd(i), 150 i); 151 } 152 printf("};\n"); 153 #endif 154 155 #if SERIALIZE 156 printf("#define COLLUNSAFE_SERIALIZE 1\n"); 157 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount); 158 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount); 159 for(int32_t i=0;i<serializedCount;i++) { 160 if( (i>0) && (i%8 == 0) ) { 161 printf(" // %d\n", i); 162 } 163 printf("0x%04X", serializedData[i]); // TODO check 164 if(i != (serializedCount-1)) { 165 printf(", "); 166 } 167 } 168 printf("};\n"); 169 #endif 170 171 printf("#endif\n"); 172 fflush(stderr); 173 fflush(stdout); 174 return(U_SUCCESS(errorCode)?0:1); 175 }