tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Han_Spacedhan.txt (1455B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: Han_Spacedhan.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Only intended for internal use
     10 # Make sure Han are normalized, including characters that contain them.
     11 # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
     12 # Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
     13 :: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
     14 :: fullwidth-halfwidth;
     15 。 → '.';
     16 。→ '.';
     17 、→ ',';
     18 、→ ',';
     19 《→ '«';
     20 》→ '»';
     21 〈 → '‹';
     22 〉→ '›';
     23 「→ '‘';
     24 」→ '’';
     25 「→ '‘';
     26 」→ '’';
     27 『→ '“';
     28 』→ '”';
     29 ・→ '‧';
     30 ・ → '‧';
     31 々→ '⓶';
     32 〜→ '~';
     33 $terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
     34 $initialPunct = [[:Ps:][:Pi:]];
     35 # add space between any Han or terminal punctuation and letters, and
     36 # between letters and Han or initial punct
     37 [[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
     38 [:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
     39 # remove spacing between ideographs and other letters
     40 ← [:Ideographic:] { ' ' } [:Letter:] ;
     41 ← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;