tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

extract_unsafe_backwards.cpp (5930B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /**
      4 * Copyright (c) 1999-2016, International Business Machines Corporation and
      5 * others. All Rights Reserved.
      6 *
      7 * Generator for source/i18n/collunsafe.h
      8 * see Makefile
      9 */
     10 
     11 #include <stdio.h>
     12 #include "unicode/uversion.h"
     13 #include "unicode/uniset.h"
     14 #include "collationroot.h"
     15 #include "collationtailoring.h"
     16 
     17 using icu::CollationCacheEntry;
     18 using icu::CollationRoot;
     19 using icu::UnicodeSet;
     20 
     21 /**
     22 * Define the type of generator to use. Choose one.
     23 */
     24 #define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
     25 #define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
     26 #define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
     27 
     28 int main(int argc, const char *argv[]) {
     29    UErrorCode errorCode = U_ZERO_ERROR;
     30 
     31    // Get the unsafeBackwardsSet
     32    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
     33    if(U_FAILURE(errorCode)) {
     34      fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
     35      return 1;
     36    }
     37    const UVersionInfo &version = rootEntry->tailoring->version;
     38    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
     39    char verString[20];
     40    u_versionToString(version, verString);
     41    fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
     42    int32_t rangeCount = unsafeBackwardSet->getRangeCount();
     43    
     44 #if SERIALIZE
     45    fprintf(stderr, ".. serializing\n");
     46    // UnicodeSet serialization
     47    
     48    UErrorCode preflightCode = U_ZERO_ERROR;
     49    // preflight
     50    int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
     51    if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
     52      fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
     53      return 1;
     54    }
     55    uint16_t *serializedData = new uint16_t[serializedCount];
     56    // serialize
     57    unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
     58    if(U_FAILURE(errorCode)) {
     59      delete [] serializedData;
     60      fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
     61      return 1;
     62    }
     63 #endif
     64    
     65 #if PATTERN
     66    fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
     67    // attempt to use pattern
     68    
     69    UnicodeString pattern;
     70    UnicodeSet set(*unsafeBackwardSet);
     71    set.compact();
     72    set.toPattern(pattern, false);
     73 
     74    if(U_SUCCESS(errorCode)) {
     75      // This fails (bug# ?) - which is why this method was abandoned.
     76      
     77      // UnicodeSet usA(pattern, errorCode);
     78      // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
     79      // return 1;
     80    }
     81 
     82 
     83    const char16_t *buf = pattern.getBuffer();
     84    int32_t needed = pattern.length();
     85 
     86    // print
     87    {
     88      char buf2[2048];
     89      int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
     90      buf2[len2]=0;
     91      fprintf(stderr,"===\n%s\n===\n", buf2);
     92    }
     93 
     94    const UnicodeString unsafeBackwardPattern(false, buf, needed);
     95  if(U_SUCCESS(errorCode)) {
     96    //UnicodeSet us(unsafeBackwardPattern, errorCode);
     97    //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
     98  } else {
     99    fprintf(stderr, "Uset OK - \n");
    100  }
    101 #endif
    102 
    103 
    104  // Generate the output file.
    105 
    106  printf("// © 2016 and later: Unicode, Inc. and others.\n");
    107  printf("// License & terms of use: http://www.unicode.org/copyright.html\n");
    108  printf("// collunsafe.h\n");
    109  printf("//  Copyright (C) 2015-2016, International Business Machines Corporation and others.\n");
    110  printf("//  All Rights Reserved.\n");
    111  printf("\n");
    112  printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
    113  printf("// Machine generated, do not edit.\n");
    114  printf("\n");
    115  printf("#ifndef COLLUNSAFE_H\n"
    116         "#define COLLUNSAFE_H\n"
    117         "\n"
    118         "#include \"unicode/utypes.h\"\n"
    119         "\n"
    120         "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
    121  printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
    122 
    123  
    124  
    125 #if PATTERN
    126  printf("#define COLLUNSAFE_PATTERN 1\n");
    127  printf("static const int32_t collunsafe_len = %d;\n", needed);
    128  printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
    129  for(int i=0;i<needed;i++) {
    130    if( (i>0) && (i%8 == 0) ) {
    131      printf(" // %d\n", i);
    132    }
    133    printf("0x%04X", buf[i]); // TODO check
    134    if(i != (needed-1)) {
    135      printf(", ");
    136    }
    137    }
    138  printf(" //%d\n};\n", (needed-1));
    139 #endif
    140 
    141 #if RANGE
    142    fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
    143    printf("#define COLLUNSAFE_RANGE 1\n");
    144    printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
    145    printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
    146    for(int32_t i=0;i<rangeCount;i++) {
    147      printf(" 0x%04X, 0x%04X, // %d\n",
    148             unsafeBackwardSet->getRangeStart(i),
    149             unsafeBackwardSet->getRangeEnd(i),
    150             i);
    151    }
    152    printf("};\n");
    153 #endif
    154 
    155 #if SERIALIZE
    156    printf("#define COLLUNSAFE_SERIALIZE 1\n");    
    157    printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
    158    printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
    159    for(int32_t i=0;i<serializedCount;i++) {
    160      if( (i>0) && (i%8 == 0) ) {
    161        printf(" // %d\n", i);
    162      }
    163      printf("0x%04X", serializedData[i]); // TODO check
    164      if(i != (serializedCount-1)) {
    165        printf(", ");
    166      }
    167    }  
    168    printf("};\n");
    169 #endif
    170    
    171    printf("#endif\n");
    172    fflush(stderr);
    173    fflush(stdout);
    174    return(U_SUCCESS(errorCode)?0:1);
    175 }