tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

anzx4051.pl (9024B)


      1 #!/usr/bin/perl 
      2 #
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this
      5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 ######################################################################
      8 #
      9 # Initial global variable
     10 #
     11 ######################################################################
     12 %utot = ();
     13 $ui=0;
     14 $li=0;
     15 
     16 ######################################################################
     17 #
     18 # Open the unicode database file
     19 #
     20 ######################################################################
     21 open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt") 
     22   || die "cannot find UnicodeData-Latest.txt";
     23 
     24 ######################################################################
     25 #
     26 # Open the JIS X 4051 Class file
     27 #
     28 ######################################################################
     29 open ( CLASS , "< jisx4051class.txt") 
     30   || die "cannot find jisx4051class.txt";
     31 
     32 ######################################################################
     33 #
     34 # Open the JIS X 4051 Class simplified mapping
     35 #
     36 ######################################################################
     37 open ( SIMP , "< jisx4051simp.txt") 
     38   || die "cannot find jisx4051simp.txt";
     39 
     40 ######################################################################
     41 #
     42 # Open the output file
     43 #
     44 ######################################################################
     45 open ( OUT , "> anzx4051.html") 
     46  || die "cannot open output anzx4051.html file";
     47 
     48 ######################################################################
     49 #
     50 # Open the output file
     51 #
     52 ######################################################################
     53 open ( HEADER , "> ../jisx4051class.h")
     54  || die "cannot open output ../jisx4051class.h file";
     55 
     56 ######################################################################
     57 #
     58 # Generate license and header
     59 #
     60 ######################################################################
     61 $hthmlheader = <<END_OF_HTML;
     62 <!-- This Source Code Form is subject to the terms of the Mozilla Public
     63   - License, v. 2.0. If a copy of the MPL was not distributed with this
     64   - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
     65 
     66 <HTML>
     67 <HEAD>
     68 <TITLE>
     69 Analysis of JIS X 4051 to Unicode General Category Mapping
     70 </TITLE>
     71 </HEAD>
     72 <BODY>
     73 <H1>
     74 Analysis of JIS X 4051 to Unicode General Category Mapping
     75 </H1>
     76 END_OF_HTML
     77 print OUT $hthmlheader;
     78 
     79 ######################################################################
     80 #
     81 # Generate license and header
     82 #
     83 ######################################################################
     84 $npl = <<END_OF_NPL;
     85 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     86 /* This Source Code Form is subject to the terms of the Mozilla Public
     87 * License, v. 2.0. If a copy of the MPL was not distributed with this
     88 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     89 /*
     90    DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
     91    mozilla/intl/lwbrk/tools/anzx4051.pl
     92 */
     93 END_OF_NPL
     94 print HEADER $npl;
     95 
     96 %occ = ();
     97 %gcat = ();
     98 %dcat = ();
     99 %simp = ();
    100 %gcount = ();
    101 %dcount = ();
    102 %sccount = ();
    103 %rangecount = ();
    104 
    105 ######################################################################
    106 #
    107 # Process the file line by line
    108 #
    109 ######################################################################
    110 while(<UNICODATA>) {
    111   chop;
    112   ######################################################################
    113   #
    114   # Get value from fields
    115   #
    116   ######################################################################
    117   @f = split(/;/ , $_); 
    118   $c = $f[0];   # The unicode value
    119   $g = $f[2]; 
    120   $d = substr($g, 0, 1);
    121 
    122   $gcat{$c} = $g;
    123   $dcat{$c} = $d;
    124   $gcount{$g}++;
    125   $dcount{$d}++;
    126 }
    127 close(UNIDATA);
    128 
    129 while(<SIMP>) {
    130   chop;
    131   ######################################################################
    132   #
    133   # Get value from fields
    134   #
    135   ######################################################################
    136   @f = split(/;/ , $_); 
    137 
    138   $simp{$f[0]} = $f[1];
    139   $sccount{$f[1]}++;
    140 }
    141 close(SIMP);
    142 
    143 sub GetClass{
    144  my ($u) = @_;
    145  my $hex = DecToHex($u);
    146  $g = $gcat{$hex};
    147  if($g ne "") {
    148    return $g;
    149  } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
    150    return "Han";
    151  } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
    152    return "Lo";
    153  } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
    154    return "Cs";
    155  } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
    156    return "Cs";
    157  } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
    158    return "Cs";
    159  } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
    160    return "Co";
    161  } else {
    162    printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
    163  }
    164 }
    165 sub GetDClass{
    166  my ($u) = @_;
    167  my $hex = DecToHex($u);
    168  $g = $dcat{$hex};
    169  if($g ne "") {
    170    return $g;
    171  } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 )  ) {
    172    return "Han";
    173  } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 )  ) {
    174    return "L";
    175  } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f )  ) {
    176    return "C";
    177  } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff )  ) {
    178    return "C";
    179  } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff )  ) {
    180    return "C";
    181  } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff )  ) {
    182    return "C";
    183  } else {
    184    printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
    185  }
    186 }
    187 sub DecToHex{
    188     my ($d) = @_;
    189     return sprintf("%04X", $d); 
    190 }
    191 %gtotal = ();
    192 %dtotal = ();
    193 while(<CLASS>) {
    194   chop;
    195   ######################################################################
    196   #
    197   # Get value from fields
    198   #
    199   ######################################################################
    200   @f = split(/;/ , $_); 
    201 
    202   if( substr($f[2], 0, 1) ne "a")
    203   {
    204     $sc = $simp{$f[2]};
    205     $l = hex($f[0]);
    206     if($f[1] eq "")
    207     {
    208       $h = $l;
    209     } else {
    210       $h = hex($f[1]);
    211     }
    212     for($k = $l; $k <= $h ; $k++)
    213     {
    214       if( exists($occ{$k}))
    215       {
    216          #  printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n", 
    217          #         DecToHex($k),  $occ{$k} , $f[2] , $sc;
    218       }
    219       else
    220       {
    221           $occ{$k} = $sc . " | " . $f[2];
    222           $gclass = GetClass($k); 
    223           $dclass = GetDClass($k);
    224           $gtotal{$sc . $gclass}++;
    225           $dtotal{$sc . $dclass}++;
    226           $u = DecToHex($k);
    227           $rk = " " . substr($u,0,2) . ":" . $sc;
    228           $rangecount{$rk}++;
    229       }
    230     }
    231  }
    232 }
    233 
    234 #print %gtotal;
    235 #print %dtotal;
    236 
    237 sub printreport 
    238 {
    239    print OUT "<TABLE BORDER=3>\n";
    240    print OUT "<TR BGCOLOR=blue><TH><TH>\n";
    241    
    242    foreach $d (sort(keys %dcount)) {
    243       print OUT "<TD BGCOLOR=red>$d</TD>\n";
    244    }
    245    
    246    print OUT "<TD BGCOLOR=white>Total</TD>\n";
    247    foreach $g (sort(keys %gcount)) {
    248       print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
    249    }
    250    print OUT "</TR>\n";
    251    foreach $sc (sort(keys %sccount)) {
    252    
    253       print OUT "<TR><TH>$sc<TH>\n";
    254    
    255       $total = 0; 
    256       foreach $d (sort (keys %dcount)) {
    257         $count = $dtotal{$sc . $d};
    258         $total += $count;
    259         print OUT "<TD>$count</TD>\n";
    260       }
    261    
    262       print OUT "<TD BGCOLOR=white>$total</TD>\n";
    263    
    264       foreach $g (sort(keys %gcount)) {
    265         $count = $gtotal{$sc . $g};
    266         print OUT "<TD>$count</TD>\n";
    267       }
    268    
    269    
    270       print OUT "</TR>\n";
    271    }
    272    print OUT "</TABLE>\n";
    273    
    274    
    275    print OUT "<TABLE BORDER=3>\n";
    276    print OUT "<TR BGCOLOR=blue><TH><TH>\n";
    277    
    278    foreach $sc (sort(keys %sccount)) 
    279    {
    280       print OUT "<TD BGCOLOR=red>$sc</TD>\n";
    281    }
    282    
    283    print OUT "</TR>\n";
    284    
    285    
    286    for($rr = 0; $rr < 0x4f; $rr++)
    287    {
    288       $empty = 0;
    289       $r = sprintf("%02X" , $rr) ;
    290       $tmp = "<TR><TH>" . $r . "<TH>\n";
    291    
    292       foreach $sc (sort(keys %sccount)) {
    293         $count = $rangecount{ " " .$r . ":" .$sc};
    294         $tmp .= sprintf("<TD>%s</TD>\n", $count);
    295         $empty += $count;
    296       }
    297    
    298       $tmp .=  "</TR>\n";
    299    
    300       if($empty ne 0) 
    301       {
    302          print OUT $tmp;
    303       }
    304    }
    305    print OUT "</TABLE>\n";
    306    
    307 }
    308 printreport();
    309 
    310 sub printarray
    311 {
    312   my($r, $def) = @_;
    313 printf "[%s || %s]\n", $r, $def;
    314   $k = hex($r) * 256;
    315   printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
    316   for($i = 0 ; $i < 256; $i+= 8)
    317   {  
    318      for($j = 7 ; $j >= 0; $j-- )
    319      {  
    320          $v = $k + $i + $j;
    321          if( exists($occ{$v})) 
    322   {
    323             $p = substr($occ{$v}, 1,1);
    324          } else {
    325             $p = $def;
    326          }
    327 
    328          if($j eq 7 ) 
    329          {
    330             printf HEADER "0x%s" , $p;
    331          } else {
    332             printf HEADER "%s", $p ;
    333          }
    334      }
    335      printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
    336   }
    337   print HEADER "};\n\n";
    338 }
    339 printarray("00", "7");
    340 printarray("20", "7");
    341 printarray("21", "7");
    342 printarray("30", "5");
    343 printarray("0E", "8");
    344 printarray("17", "7");
    345 
    346 #print %rangecount;
    347 
    348 ######################################################################
    349 #
    350 # Close files
    351 #
    352 ######################################################################
    353 close(HEADER);
    354 close(CLASS);
    355 close(OUT);