jisx4051pairtable.txt (9263B)
1 /* 2 3 Simplification of Pair Table in JIS X 4051 4 5 1. The Origion Table - in 4.1.3 6 7 In JIS x 4051. The pair table is defined as below 8 9 Class of 10 Leading Class of Trailing Char Class 11 Char 12 13 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20 14 * # * # 15 1 X X X X X X X X X X X X X X X X X X X X X E 16 2 X X X X X X 17 3 X X X X X X 18 4 X X X X X X 19 5 X X X X X X 20 6 X X X X X X 21 7 X X X X X X X 22 8 X X X X X X E 23 9 X X X X X X 24 10 X X X X X X 25 11 X X X X X X 26 12 X X X X X X 27 13 X X X X X X X 28 14 X X X X X X X 29 15 X X X X X X X X X 30 16 X X X X X X X X 31 17 X X X X X E 32 18 X X X X X X X X X 33 19 X E E E E E X X X X X X X X X X X X E X E E 34 20 X X X X X E 35 36 * Same Char 37 # Other Char 38 39 2. Simplified by remove the class which we do not care 40 41 However, since we do not care about class 13(Subscript), 14(Ruby), 42 19(split line note begin quote), and 20(split line note end quote) 43 we can simplify this par table into the following 44 45 Class of 46 Leading Class of Trailing Char Class 47 Char 48 49 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18 50 51 1 X X X X X X X X X X X X X X X X 52 2 X X X X X 53 3 X X X X X 54 4 X X X X X 55 5 X X X X X 56 6 X X X X X 57 7 X X X X X X 58 8 X X X X X X 59 9 X X X X X 60 10 X X X X X 61 11 X X X X X 62 12 X X X X X 63 15 X X X X X X X X 64 16 X X X X X X X 65 17 X X X X X 66 18 X X X X X X X X 67 68 3. Simplified by merged classes 69 70 After the 2 simplification, the pair table have some duplication 71 a. class 2, 3, 4, 5, 6, are the same- we can merged them 72 b. class 10, 11, 12, 17 are the same- we can merged them 73 74 75 Class of 76 Leading Class of Trailing Char Class 77 Char 78 79 1 [a] 7 8 9 [b]15 16 18 80 81 1 X X X X X X X X X 82 [a] X 83 7 X X 84 8 X X 85 9 X 86 [b] X 87 15 X X X X 88 16 X X X 89 18 X X X X 90 91 92 4. Now we use one bit to encode weather it is breakable, and use 2 bytes 93 for one row, then the bit table will look like: 94 95 18 <- 1 96 97 1 0000 0001 1111 1111 = 0x01FF 98 [a] 0000 0000 0000 0010 = 0x0002 99 7 0000 0000 0000 0110 = 0x0006 100 8 0000 0000 0100 0010 = 0x0042 101 9 0000 0000 0000 0010 = 0x0002 102 [b] 0000 0000 0000 0010 = 0x0042 103 15 0000 0001 0101 0010 = 0x0152 104 16 0000 0001 1000 0010 = 0x0182 105 17 0000 0001 1100 0010 = 0x01C2 106 107 */ 108 109 static uint16_t gJISx4051SimplifiedPair[9] = { 110 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2 111 }; 112 113 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1) 114 { 115 NS_ASSERTION( (aCls1 < 9) "invalid class"); 116 NS_ASSERTION( (aCls2 < 9) "invalid class"); 117 return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) )); 118 } 119 120 121 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039)) 122 123 nsJISx4051Cls XXXX::GetClass( 124 PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0) 125 { 126 // take care the special case in cls 15 127 if( ((0x2C == aChar) || (0x2E == aChar)) && 128 (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter))) 129 { 130 return kJISx4051Cls_15; 131 } 132 133 nsJISx4051Cls cls; 134 if(gSingle->Lookup(aChar, &cls)) 135 return cls; 136 137 if(gRange->Lookup(aChar, &cls)) 138 return cls; 139 140 return kJISx4051Cls_15; 141 } 142 143 144 typedef enum { 145 kJISx4051Cls_1 = 0, 146 kJISx4051Cls_2 = 1, 147 kJISx4051Cls_3 = 1, 148 kJISx4051Cls_4 = 1, 149 kJISx4051Cls_5 = 1, 150 kJISx4051Cls_6 = 1, 151 kJISx4051Cls_7 = 2, 152 kJISx4051Cls_8 = 3, 153 kJISx4051Cls_9 = 4, 154 kJISx4051Cls_10 = 5, 155 kJISx4051Cls_11 = 5, 156 kJISx4051Cls_12 = 5, 157 // kJISx4051Cls_13 = 0, 158 // kJISx4051Cls_14 = 0, 159 kJISx4051Cls_15 = 6, 160 kJISx4051Cls_16 = 7, 161 kJISx4051Cls_17 = 5, 162 kJISx4051Cls_18 = 8, 163 // kJISx4051Cls_19 = 0, 164 // kJISx4051Cls_20 = 0 165 } nsJISx4051Cls; 166 167 168 // Table 2 169 YYYY(kJISx4051Cls_1 , 0x0028), 170 YYYY(kJISx4051Cls_1 , 0x005B), 171 YYYY(kJISx4051Cls_1 , 0x007B), 172 YYYY(kJISx4051Cls_1 , 0x2018), 173 YYYY(kJISx4051Cls_1 , 0x201B), 174 YYYY(kJISx4051Cls_1 , 0x201C), 175 YYYY(kJISx4051Cls_1 , 0x201F), 176 YYYY(kJISx4051Cls_1 , 0x3008), 177 YYYY(kJISx4051Cls_1 , 0x300A), 178 YYYY(kJISx4051Cls_1 , 0x300C), 179 YYYY(kJISx4051Cls_1 , 0x300E), 180 YYYY(kJISx4051Cls_1 , 0x3010), 181 YYYY(kJISx4051Cls_1 , 0x3014), 182 YYYY(kJISx4051Cls_1 , 0x3016), 183 YYYY(kJISx4051Cls_1 , 0x3018), 184 YYYY(kJISx4051Cls_1 , 0x301A), 185 YYYY(kJISx4051Cls_1 , 0x301D), 186 187 // Table 3 188 YYYY(kJISx4051Cls_2 , 0x0029), 189 YYYY(kJISx4051Cls_2 , 0x002C), 190 YYYY(kJISx4051Cls_2 , 0x005D), 191 YYYY(kJISx4051Cls_2 , 0x007D), 192 YYYY(kJISx4051Cls_2 , 0x2019), 193 YYYY(kJISx4051Cls_2 , 0x201A), 194 YYYY(kJISx4051Cls_2 , 0x201D), 195 YYYY(kJISx4051Cls_2 , 0x201E), 196 YYYY(kJISx4051Cls_2 , 0x3001), 197 YYYY(kJISx4051Cls_2 , 0x3009), 198 YYYY(kJISx4051Cls_2 , 0x300B), 199 YYYY(kJISx4051Cls_2 , 0x300D), 200 YYYY(kJISx4051Cls_2 , 0x300F), 201 YYYY(kJISx4051Cls_2 , 0x3011), 202 YYYY(kJISx4051Cls_2 , 0x3015), 203 YYYY(kJISx4051Cls_2 , 0x3017), 204 YYYY(kJISx4051Cls_2 , 0x3019), 205 YYYY(kJISx4051Cls_2 , 0x301B), 206 YYYY(kJISx4051Cls_2 , 0x301E), 207 YYYY(kJISx4051Cls_2 , 0x301F), 208 209 // Table 4 210 YYYY(kJISx4051Cls_3 , 0x203C), 211 YYYY(kJISx4051Cls_3 , 0x2044), 212 YYYY(kJISx4051Cls_3 , 0x301C), 213 YYYY(kJISx4051Cls_3 , 0x3041), 214 YYYY(kJISx4051Cls_3 , 0x3043), 215 YYYY(kJISx4051Cls_3 , 0x3045), 216 YYYY(kJISx4051Cls_3 , 0x3047), 217 YYYY(kJISx4051Cls_3 , 0x3049), 218 YYYY(kJISx4051Cls_3 , 0x3063), 219 YYYY(kJISx4051Cls_3 , 0x3083), 220 YYYY(kJISx4051Cls_3 , 0x3085), 221 YYYY(kJISx4051Cls_3 , 0x3087), 222 YYYY(kJISx4051Cls_3 , 0x308E), 223 YYYY(kJISx4051Cls_3 , 0x309D), 224 YYYY(kJISx4051Cls_3 , 0x309E), 225 YYYY(kJISx4051Cls_3 , 0x30A1), 226 YYYY(kJISx4051Cls_3 , 0x30A3), 227 YYYY(kJISx4051Cls_3 , 0x30A5), 228 YYYY(kJISx4051Cls_3 , 0x30A7), 229 YYYY(kJISx4051Cls_3 , 0x30A9), 230 YYYY(kJISx4051Cls_3 , 0x30C3), 231 YYYY(kJISx4051Cls_3 , 0x30E3), 232 YYYY(kJISx4051Cls_3 , 0x30E5), 233 YYYY(kJISx4051Cls_3 , 0x30E7), 234 YYYY(kJISx4051Cls_3 , 0x30EE), 235 YYYY(kJISx4051Cls_3 , 0x30F5), 236 YYYY(kJISx4051Cls_3 , 0x30F6), 237 YYYY(kJISx4051Cls_3 , 0x30FC), 238 YYYY(kJISx4051Cls_3 , 0x30FD), 239 YYYY(kJISx4051Cls_3 , 0x30FE), 240 241 // Table 5 242 YYYY(kJISx4051Cls_4 , 0x0021), 243 YYYY(kJISx4051Cls_4 , 0x003F), 244 245 // Table 6 246 YYYY(kJISx4051Cls_5 , 0x003A), 247 YYYY(kJISx4051Cls_5 , 0x003B), 248 YYYY(kJISx4051Cls_5 , 0x30FB), 249 250 // Table 7 251 YYYY(kJISx4051Cls_6 , 0x002E), 252 YYYY(kJISx4051Cls_6 , 0x3002), 253 254 // Table 8 255 YYYY(kJISx4051Cls_7 , 0x2014), 256 YYYY(kJISx4051Cls_7 , 0x2024), 257 YYYY(kJISx4051Cls_7 , 0x2025), 258 YYYY(kJISx4051Cls_7 , 0x2026), 259 260 // Table 9 261 YYYY(kJISx4051Cls_8 , 0x0024), 262 YYYY(kJISx4051Cls_8 , 0x00A3), 263 YYYY(kJISx4051Cls_8 , 0x00A5), 264 YYYY(kJISx4051Cls_8 , 0x2116), 265 266 // Table 10 267 YYYY(kJISx4051Cls_9 , 0x0025), 268 YYYY(kJISx4051Cls_9 , 0x00A2), 269 YYYY(kJISx4051Cls_9 , 0x00B0), 270 YYYY(kJISx4051Cls_9 , 0x2030), 271 YYYY(kJISx4051Cls_9 , 0x2031), 272 YYYY(kJISx4051Cls_9 , 0x2032), 273 YYYY(kJISx4051Cls_9 , 0x2033), 274 275 // Table 1 276 YYYY(kJISx4051Cls_10, 0x3000), 277 278 // Table 1 279 ZZZZ(kJISx4051Cls_11, 0x3000),