tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

gencjkcisvs.py (2740B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 # You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import os.path
      6 import re
      7 import sys
      8 
      9 f = open(sys.argv[1] if len(sys.argv) > 1 else "StandardizedVariants.txt")
     10 
     11 line = f.readline()
     12 m = re.compile(r"^# (StandardizedVariants(-\d+(\.\d+)*)?\.txt)").search(line)
     13 fileversion = m.group(1)
     14 vsdict = {}
     15 r = re.compile(
     16    "^([0-9A-F]{4,6}) (FE0[0-9A-F]); CJK COMPATIBILITY IDEOGRAPH-([0-9A-F]{4,6});"
     17 )
     18 while True:
     19    line = f.readline()
     20    if not line:
     21        break
     22    if "CJK COMPATIBILITY IDEOGRAPH-" not in line:
     23        continue
     24 
     25    m = r.search(line)
     26    unified = int(m.group(1), 16)
     27    vs = int(m.group(2), 16)
     28    compat = int(m.group(3), 16)
     29 
     30    if vs not in vsdict:
     31        vsdict[vs] = {}
     32    vsdict[vs][unified] = compat
     33 
     34 f.close
     35 
     36 offsets = []
     37 length = 10 + 11 * len(vsdict)
     38 for k, mappings in sorted(vsdict.items()):
     39    offsets.append(length)
     40    length += 4 + 5 * len(mappings)
     41 
     42 f = open(sys.argv[2] if len(sys.argv) > 2 else "CJKCompatSVS.cpp", "wb")
     43 f.write(
     44    """// Generated by %s. Do not edit.
     45 
     46 #include <stdint.h>
     47 
     48 #define U16(v) (((v) >> 8) & 0xFF), ((v) & 0xFF)
     49 #define U24(v) (((v) >> 16) & 0xFF), (((v) >> 8) & 0xFF), ((v) & 0xFF)
     50 #define U32(v) (((v) >> 24) & 0xFF), (((v) >> 16) & 0xFF), (((v) >> 8) & 0xFF), ((v) & 0xFF)
     51 #define GLYPH(v) U16(v >= 0x2F800 ? (v) - (0x2F800 - 0xFB00) : (v))
     52 
     53 // Fallback mappings for CJK Compatibility Ideographs Standardized Variants
     54 // taken from %s.
     55 // Using OpenType format 14 cmap subtable structure to reuse the lookup code
     56 // for fonts. The glyphID field is used to store the corresponding codepoints
     57 // CJK Compatibility Ideographs. To fit codepoints into the 16-bit glyphID
     58 // field, CJK Compatibility Ideographs Supplement (U+2F800..U+2FA1F) will be
     59 // mapped to 0xFB00..0xFD1F.
     60 extern const uint8_t sCJKCompatSVSTable[] = {
     61 """
     62    % (os.path.basename(sys.argv[0]), fileversion)
     63 )
     64 f.write("  U16(14), // format\n")
     65 f.write("  U32(%d), // length\n" % length)
     66 f.write("  U32(%d), // numVarSelectorRecords\n" % len(vsdict))
     67 for i, k in enumerate(sorted(vsdict.keys())):
     68    f.write(
     69        "    U24(0x%04X), U32(0), U32(%d), // varSelectorRecord[%d]\n"
     70        % (k, offsets[i], i)
     71    )
     72 for k, mappings in sorted(vsdict.items()):
     73    f.write("  // 0x%04X\n" % k)
     74    f.write("  U32(%d), // numUVSMappings\n" % len(mappings))
     75    for unified, compat in sorted(mappings.items()):
     76        f.write("    U24(0x%04X), GLYPH(0x%04X),\n" % (unified, compat))
     77 f.write(
     78    """};
     79 
     80 #undef U16
     81 #undef U24
     82 #undef U32
     83 #undef GLYPH
     84 
     85 static_assert(sizeof sCJKCompatSVSTable == %d, "Table generator has a bug.");
     86 """
     87    % length
     88 )