tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

char.txt (2249B)


      1 #
      2 #   Copyright (C) 2016 and later: Unicode, Inc. and others.
      3 #   License & terms of use: http://www.unicode.org/copyright.html
      4 #   Copyright (C) 2002-2016, International Business Machines Corporation and others.
      5 #       All Rights Reserved.
      6 #
      7 #   file:  char.txt
      8 #
      9 #   ICU Character Break Rules
     10 #      These rules are based on the Extended Grapheme Cluster rules from
     11 #      Unicode UAX #29 Revision 34 for Unicode Version 12.0
     12 
     13 !!quoted_literals_only;
     14 
     15 #
     16 #  Character Class Definitions.
     17 #
     18 $CR          = [\p{Grapheme_Cluster_Break = CR}];
     19 $LF          = [\p{Grapheme_Cluster_Break = LF}];
     20 $Control     = [[\p{Grapheme_Cluster_Break = Control}]];
     21 $Extend      = [[\p{Grapheme_Cluster_Break = Extend}]];
     22 $ZWJ         = [\p{Grapheme_Cluster_Break = ZWJ}];
     23 $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
     24 $Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
     25 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
     26 
     27 $InCBConsonant = [\p{InCB=Consonant}];
     28 $InCBExtend = [\p{InCB=Extend}];
     29 $InCBLinker = [\p{InCB=Linker}];
     30 
     31 # Korean Syllable Definitions
     32 #
     33 $L           = [\p{Grapheme_Cluster_Break = L}];
     34 $V           = [\p{Grapheme_Cluster_Break = V}];
     35 $T           = [\p{Grapheme_Cluster_Break = T}];
     36 
     37 $LV          = [\p{Grapheme_Cluster_Break = LV}];
     38 $LVT         = [\p{Grapheme_Cluster_Break = LVT}];
     39 
     40 # Emoji definitions
     41 
     42 $Extended_Pict = [:ExtPict:];
     43 
     44 ## -------------------------------------------------
     45 !!chain;
     46 !!lookAheadHardBreak;
     47 
     48 $CR $LF;
     49 
     50 $L ($L | $V | $LV | $LVT);
     51 ($LV | $V) ($V | $T);
     52 ($LVT | $T) $T;
     53 
     54 # GB 9
     55 [^$Control $CR $LF] ($Extend | $ZWJ);
     56 
     57 # GB 9a
     58 [^$Control $CR $LF] $SpacingMark;
     59 
     60 # GB 9b
     61 $Prepend [^$Control $CR $LF];
     62 
     63 # GB 9c
     64 $InCBConsonant [ $InCBExtend $InCBLinker ]* $InCBLinker [ $InCBExtend $InCBLinker ]* $InCBConsonant;
     65 
     66 # GB 11 Do not break within emoji modifier sequences or emoji zwj sequences.
     67 $Extended_Pict $Extend* $ZWJ $Extended_Pict;
     68 
     69 # GB 12-13. Keep pairs of regional indicators together
     70 #           Note that hard break '/' rule triggers only if there are three or more initial RIs,
     71 
     72 ^$Prepend* $Regional_Indicator $Regional_Indicator / $Regional_Indicator;
     73 ^$Prepend* $Regional_Indicator $Regional_Indicator;
     74 
     75 # GB 999 Match a single code point if no other rule applies.
     76 .;