is_combining_diacritic.py (3289B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 from unicodedata import combining 6 7 UNICODE_LIMIT = 0x110000 8 9 UNICODE_COMBINING_CLASS_NOT_REORDERED = 0 10 UNICODE_COMBINING_CLASS_KANA_VOICING = 8 11 UNICODE_COMBINING_CLASS_VIRAMA = 9 12 13 14 # Keep this function in sync with IsCombiningDiacritic in nsUnicodeProperties.h. 15 def is_combining_diacritic(char): 16 return combining(char) not in ( 17 UNICODE_COMBINING_CLASS_NOT_REORDERED, 18 UNICODE_COMBINING_CLASS_KANA_VOICING, 19 UNICODE_COMBINING_CLASS_VIRAMA, 20 91, 21 129, 22 130, 23 132, 24 ) 25 26 27 # See gfxFontUtils.h for the SharedBitSet that we're creating a const instance of here. 28 BLOCK_SIZE = 32 29 BLOCK_SIZE_BITS = BLOCK_SIZE * 8 30 31 32 def main(header): 33 blockIndex = [] 34 blocks = [] 35 36 # Figure out the contents of each 256-char block, and see if it is unique 37 # or can share an already-allocated block. 38 block = [0] * BLOCK_SIZE 39 byte = 0 40 bit = 0x01 41 for char in range(UNICODE_LIMIT): 42 if is_combining_diacritic(chr(char)): 43 block[byte] |= bit 44 bit <<= 1 45 if bit == 0x100: 46 bit = 0x01 47 byte += 1 48 if byte == BLOCK_SIZE: 49 found = False 50 for b in range(len(blocks)): 51 if block == blocks[b]: 52 blockIndex.append(b) 53 found = True 54 break 55 if not found: 56 blockIndex.append(len(blocks)) 57 blocks.append(block) 58 byte = 0 59 block = [0] * BLOCK_SIZE 60 61 # Strip trailing empty blocks from the index. 62 while blockIndex[len(blockIndex) - 1] == 0: 63 del blockIndex[len(blockIndex) - 1] 64 65 # Write the SharedBitSet as data in a C++ header file. 66 header.write("/* !GENERATED DATA -- DO NOT EDIT! */\n") 67 header.write("/* (see is_combining_diacritic.py) */\n") 68 header.write("\n") 69 header.write('#include "gfxFontUtils.h"\n') 70 header.write("\n") 71 72 header.write("typedef struct {\n") 73 header.write(" uint16_t mBlockIndexCount;\n") 74 header.write(" uint16_t mBlockCount;\n") 75 header.write(" uint16_t mBlockIndex[" + str(len(blockIndex)) + "];\n") 76 header.write(" uint8_t mBlockData[" + str(len(blocks) * BLOCK_SIZE) + "];\n") 77 header.write("} CombiningDiacriticsBitset_t;\n") 78 header.write("\n") 79 80 header.write( 81 "static const CombiningDiacriticsBitset_t COMBINING_DIACRITICS_BITSET_DATA = {\n" 82 ) 83 header.write(" " + str(len(blockIndex)) + ",\n") 84 header.write(" " + str(len(blocks)) + ",\n") 85 header.write(" {\n") 86 for b in blockIndex: 87 header.write(" " + str(b) + ",\n") 88 header.write(" },\n") 89 header.write(" {\n") 90 for b in blocks: 91 header.write(" ") 92 for i in b: 93 header.write(str(i) + ",") 94 header.write("\n") 95 header.write(" },\n") 96 header.write("};\n") 97 header.write("\n") 98 header.write("static const SharedBitSet* sCombiningDiacriticsSet =\n") 99 header.write( 100 " reinterpret_cast<const SharedBitSet*>(&COMBINING_DIACRITICS_BITSET_DATA);\n" 101 ) 102 header.write("\n")