tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jisx4051pairtable.txt (9263B)


      1 /* 
      2 
      3   Simplification of Pair Table in JIS X 4051
      4 
      5   1. The Origion Table - in 4.1.3
      6 
      7   In JIS x 4051. The pair table is defined as below
      8 
      9   Class of
     10   Leading    Class of Trailing Char Class
     11   Char        
     12 
     13              1  2  3  4  5  6  7  8  9 10 11 12 13 13 14 14 15 16 17 18 19 20
     14                                                 *  #  *  #
     15        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  E
     16        2        X  X  X  X  X                                               X
     17        3        X  X  X  X  X                                               X
     18        4        X  X  X  X  X                                               X
     19        5        X  X  X  X  X                                               X
     20        6        X  X  X  X  X                                               X
     21        7        X  X  X  X  X  X                                            X 
     22        8        X  X  X  X  X                                X              E 
     23        9        X  X  X  X  X                                               X
     24       10        X  X  X  X  X                                               X
     25       11        X  X  X  X  X                                               X
     26       12        X  X  X  X  X                                               X  
     27       13        X  X  X  X  X                    X                          X
     28       14        X  X  X  X  X                          X                    X
     29       15        X  X  X  X  X        X                       X        X     X 
     30       16        X  X  X  X  X                                   X     X     X
     31       17        X  X  X  X  X                                               E 
     32       18        X  X  X  X  X                                X  X     X     X 
     33       19     X  E  E  E  E  E  X  X  X  X  X  X  X  X  X  X  X  X  E  X  E  E
     34       20        X  X  X  X  X                                               E
     35 
     36   * Same Char
     37   # Other Char
     38 
     39   2. Simplified by remove the class which we do not care
     40 
     41   However, since we do not care about class 13(Subscript), 14(Ruby), 
     42   19(split line note begin quote), and 20(split line note end quote) 
     43   we can simplify this par table into the following 
     44 
     45   Class of
     46   Leading    Class of Trailing Char Class
     47   Char        
     48 
     49              1  2  3  4  5  6  7  8  9 10 11 12 15 16 17 18 
     50                                                 
     51        1     X  X  X  X  X  X  X  X  X  X  X  X  X  X  X  X
     52        2        X  X  X  X  X                             
     53        3        X  X  X  X  X                            
     54        4        X  X  X  X  X                           
     55        5        X  X  X  X  X                          
     56        6        X  X  X  X  X                         
     57        7        X  X  X  X  X  X                      
     58        8        X  X  X  X  X                    X    
     59        9        X  X  X  X  X                                   
     60       10        X  X  X  X  X                                  
     61       11        X  X  X  X  X                                 
     62       12        X  X  X  X  X                                
     63       15        X  X  X  X  X        X           X        X    
     64       16        X  X  X  X  X                       X     X    
     65       17        X  X  X  X  X                                  
     66       18        X  X  X  X  X                    X  X     X    
     67 
     68   3. Simplified by merged classes
     69 
     70   After the 2 simplification, the pair table have some duplication 
     71   a. class 2, 3, 4, 5, 6,  are the same- we can merged them
     72   b. class 10, 11, 12, 17  are the same- we can merged them
     73 
     74 
     75   Class of
     76   Leading    Class of Trailing Char Class
     77   Char        
     78 
     79              1 [a] 7  8  9 [b]15 16 18 
     80                                     
     81        1     X  X  X  X  X  X  X  X  X
     82      [a]        X                             
     83        7        X  X                      
     84        8        X              X    
     85        9        X                                   
     86      [b]        X                                  
     87       15        X        X     X     X    
     88       16        X                 X  X    
     89       18        X              X  X  X    
     90 
     91 
     92   4. Now we use one bit to encode weather it is breakable, and use 2 bytes
     93      for one row, then the bit table will look like:
     94 
     95                 18    <-   1
     96            
     97       1  0000 0001 1111 1111  = 0x01FF
     98      [a] 0000 0000 0000 0010  = 0x0002
     99       7  0000 0000 0000 0110  = 0x0006
    100       8  0000 0000 0100 0010  = 0x0042
    101       9  0000 0000 0000 0010  = 0x0002
    102      [b] 0000 0000 0000 0010  = 0x0042
    103      15  0000 0001 0101 0010  = 0x0152
    104      16  0000 0001 1000 0010  = 0x0182
    105      17  0000 0001 1100 0010  = 0x01C2
    106 
    107 */
    108 
    109 static uint16_t gJISx4051SimplifiedPair[9] = {
    110  0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2
    111 };
    112 
    113 PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1)
    114 {
    115  NS_ASSERTION( (aCls1 < 9) "invalid class");
    116  NS_ASSERTION( (aCls2 < 9) "invalid class");
    117  return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) ));
    118 }
    119 
    120 
    121 #define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039))
    122 
    123 nsJISx4051Cls XXXX::GetClass(
    124   PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0)
    125 {
    126   // take care the special case in cls 15
    127   if( ((0x2C == aChar) || (0x2E == aChar)) &&
    128       (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter)))
    129   {
    130     return kJISx4051Cls_15;
    131   }
    132   
    133   nsJISx4051Cls cls;
    134   if(gSingle->Lookup(aChar, &cls))
    135     return cls;
    136 
    137   if(gRange->Lookup(aChar, &cls))
    138     return cls;
    139 
    140   return kJISx4051Cls_15;
    141 }
    142 
    143 
    144 typedef enum {
    145  kJISx4051Cls_1 = 0,
    146  kJISx4051Cls_2 = 1,
    147  kJISx4051Cls_3 = 1,
    148  kJISx4051Cls_4 = 1,
    149  kJISx4051Cls_5 = 1,
    150  kJISx4051Cls_6 = 1,
    151  kJISx4051Cls_7 = 2,
    152  kJISx4051Cls_8 = 3,
    153  kJISx4051Cls_9 = 4,
    154  kJISx4051Cls_10 = 5,
    155  kJISx4051Cls_11 = 5,
    156  kJISx4051Cls_12 = 5,
    157  // kJISx4051Cls_13 = 0,
    158  // kJISx4051Cls_14 = 0,
    159  kJISx4051Cls_15 = 6,
    160  kJISx4051Cls_16 = 7,
    161  kJISx4051Cls_17 = 5,
    162  kJISx4051Cls_18 = 8,
    163  // kJISx4051Cls_19 = 0,
    164  // kJISx4051Cls_20 = 0
    165 } nsJISx4051Cls;
    166 
    167 
    168  // Table 2
    169  YYYY(kJISx4051Cls_1 , 0x0028),
    170  YYYY(kJISx4051Cls_1 , 0x005B),
    171  YYYY(kJISx4051Cls_1 , 0x007B),
    172  YYYY(kJISx4051Cls_1 , 0x2018),
    173  YYYY(kJISx4051Cls_1 , 0x201B),
    174  YYYY(kJISx4051Cls_1 , 0x201C),
    175  YYYY(kJISx4051Cls_1 , 0x201F),
    176  YYYY(kJISx4051Cls_1 , 0x3008),
    177  YYYY(kJISx4051Cls_1 , 0x300A),
    178  YYYY(kJISx4051Cls_1 , 0x300C),
    179  YYYY(kJISx4051Cls_1 , 0x300E),
    180  YYYY(kJISx4051Cls_1 , 0x3010),
    181  YYYY(kJISx4051Cls_1 , 0x3014),
    182  YYYY(kJISx4051Cls_1 , 0x3016),
    183  YYYY(kJISx4051Cls_1 , 0x3018),
    184  YYYY(kJISx4051Cls_1 , 0x301A),
    185  YYYY(kJISx4051Cls_1 , 0x301D),
    186 
    187  // Table 3
    188  YYYY(kJISx4051Cls_2 , 0x0029),
    189  YYYY(kJISx4051Cls_2 , 0x002C),
    190  YYYY(kJISx4051Cls_2 , 0x005D),
    191  YYYY(kJISx4051Cls_2 , 0x007D),
    192  YYYY(kJISx4051Cls_2 , 0x2019),
    193  YYYY(kJISx4051Cls_2 , 0x201A),
    194  YYYY(kJISx4051Cls_2 , 0x201D),
    195  YYYY(kJISx4051Cls_2 , 0x201E),
    196  YYYY(kJISx4051Cls_2 , 0x3001),
    197  YYYY(kJISx4051Cls_2 , 0x3009),
    198  YYYY(kJISx4051Cls_2 , 0x300B),
    199  YYYY(kJISx4051Cls_2 , 0x300D),
    200  YYYY(kJISx4051Cls_2 , 0x300F),
    201  YYYY(kJISx4051Cls_2 , 0x3011),
    202  YYYY(kJISx4051Cls_2 , 0x3015),
    203  YYYY(kJISx4051Cls_2 , 0x3017),
    204  YYYY(kJISx4051Cls_2 , 0x3019),
    205  YYYY(kJISx4051Cls_2 , 0x301B),
    206  YYYY(kJISx4051Cls_2 , 0x301E),
    207  YYYY(kJISx4051Cls_2 , 0x301F),
    208 
    209  // Table 4
    210  YYYY(kJISx4051Cls_3 , 0x203C),
    211  YYYY(kJISx4051Cls_3 , 0x2044),
    212  YYYY(kJISx4051Cls_3 , 0x301C),
    213  YYYY(kJISx4051Cls_3 , 0x3041),
    214  YYYY(kJISx4051Cls_3 , 0x3043),
    215  YYYY(kJISx4051Cls_3 , 0x3045),
    216  YYYY(kJISx4051Cls_3 , 0x3047),
    217  YYYY(kJISx4051Cls_3 , 0x3049),
    218  YYYY(kJISx4051Cls_3 , 0x3063),
    219  YYYY(kJISx4051Cls_3 , 0x3083),
    220  YYYY(kJISx4051Cls_3 , 0x3085),
    221  YYYY(kJISx4051Cls_3 , 0x3087),
    222  YYYY(kJISx4051Cls_3 , 0x308E),
    223  YYYY(kJISx4051Cls_3 , 0x309D),
    224  YYYY(kJISx4051Cls_3 , 0x309E),
    225  YYYY(kJISx4051Cls_3 , 0x30A1),
    226  YYYY(kJISx4051Cls_3 , 0x30A3),
    227  YYYY(kJISx4051Cls_3 , 0x30A5),
    228  YYYY(kJISx4051Cls_3 , 0x30A7),
    229  YYYY(kJISx4051Cls_3 , 0x30A9),
    230  YYYY(kJISx4051Cls_3 , 0x30C3),
    231  YYYY(kJISx4051Cls_3 , 0x30E3),
    232  YYYY(kJISx4051Cls_3 , 0x30E5),
    233  YYYY(kJISx4051Cls_3 , 0x30E7),
    234  YYYY(kJISx4051Cls_3 , 0x30EE),
    235  YYYY(kJISx4051Cls_3 , 0x30F5),
    236  YYYY(kJISx4051Cls_3 , 0x30F6),
    237  YYYY(kJISx4051Cls_3 , 0x30FC),
    238  YYYY(kJISx4051Cls_3 , 0x30FD),
    239  YYYY(kJISx4051Cls_3 , 0x30FE),
    240 
    241  // Table 5
    242  YYYY(kJISx4051Cls_4 , 0x0021),
    243  YYYY(kJISx4051Cls_4 , 0x003F),
    244   
    245  // Table 6
    246  YYYY(kJISx4051Cls_5 , 0x003A),
    247  YYYY(kJISx4051Cls_5 , 0x003B),
    248  YYYY(kJISx4051Cls_5 , 0x30FB),
    249 
    250  // Table 7
    251  YYYY(kJISx4051Cls_6 , 0x002E),
    252  YYYY(kJISx4051Cls_6 , 0x3002),
    253 
    254  // Table 8
    255  YYYY(kJISx4051Cls_7 , 0x2014),
    256  YYYY(kJISx4051Cls_7 , 0x2024),
    257  YYYY(kJISx4051Cls_7 , 0x2025),
    258  YYYY(kJISx4051Cls_7 , 0x2026),
    259 
    260  // Table 9
    261  YYYY(kJISx4051Cls_8 , 0x0024),
    262  YYYY(kJISx4051Cls_8 , 0x00A3),
    263  YYYY(kJISx4051Cls_8 , 0x00A5),
    264  YYYY(kJISx4051Cls_8 , 0x2116),
    265 
    266  // Table 10
    267  YYYY(kJISx4051Cls_9 , 0x0025),
    268  YYYY(kJISx4051Cls_9 , 0x00A2),
    269  YYYY(kJISx4051Cls_9 , 0x00B0),
    270  YYYY(kJISx4051Cls_9 , 0x2030),
    271  YYYY(kJISx4051Cls_9 , 0x2031),
    272  YYYY(kJISx4051Cls_9 , 0x2032),
    273  YYYY(kJISx4051Cls_9 , 0x2033),
    274 
    275  // Table 1
    276  YYYY(kJISx4051Cls_10, 0x3000),
    277 
    278  // Table 1
    279  ZZZZ(kJISx4051Cls_11, 0x3000),