tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

grapheme.txt (2258B)


      1 #
      2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
      3 # License & terms of use: http://www.unicode.org/copyright.html
      4 # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
      5 
      6 # file: grapheme.txt
      7 #
      8 # Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest
      9 #
     10 #
     11 # Note: Rule syntax and the monkey test itself are still a work in progress.
     12 #       They are expected to change with review and the addition of support for rule tailoring.
     13 
     14 type = grapheme;      # one of grapheme | word | line | sentence
     15 locale = en;
     16 
     17 CR                 = [\p{Grapheme_Cluster_Break = CR}];
     18 LF                 = [\p{Grapheme_Cluster_Break = LF}];
     19 
     20 Control            = [[\p{Grapheme_Cluster_Break = Control}]];
     21 Extend_            = [[\p{Grapheme_Cluster_Break = Extend}]];
     22 ZWJ                = [\p{Grapheme_Cluster_Break = ZWJ}];
     23 Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}];
     24 Prepend            = [\p{Grapheme_Cluster_Break = Prepend}];
     25 SpacingMark        = [\p{Grapheme_Cluster_Break = SpacingMark}];
     26 
     27 #
     28 # Korean Syllable Definitions
     29 #
     30 L                  = [\p{Grapheme_Cluster_Break = L}];
     31 V                  = [\p{Grapheme_Cluster_Break = V}];
     32 T                  = [\p{Grapheme_Cluster_Break = T}];
     33 LV                 = [\p{Grapheme_Cluster_Break = LV}];
     34 LVT                = [\p{Grapheme_Cluster_Break = LVT}];
     35 
     36 # Emoji definitions
     37 
     38 Extended_Pict      = [:ExtPict:];
     39 
     40 # Indic Sequences
     41 InCBLinker    = [\p{InCB=Linker}];
     42 InCBConsonant = [\p{InCB=Consonant}];
     43 InCBExtend    = [\p{InCB=Extend}];
     44 
     45 GB3:     CR LF;
     46 GB4:     (Control | CR | LF) ÷;
     47 GB5:     . ÷ (Control | CR | LF);
     48 
     49 GB6:     L (L | V | LV | LVT);
     50 GB7:     (LV | V) (V | T);
     51 GB8:     (LVT | T) T;
     52 
     53 GB11:    Extended_Pict Extend_* ZWJ Extended_Pict;
     54 GB9c:    InCBConsonant ( InCBExtend | InCBLinker )* InCBLinker ( InCBExtend | InCBLinker )* InCBConsonant;
     55 GB9:     . (Extend_ | ZWJ);
     56 
     57 GB9a:    . SpacingMark;
     58 GB9b:    Prepend .;
     59 
     60 # Regional Indicators, split into pairs.
     61 #      Note that a pair of RIs that is not followed by a third RI will fall into
     62 #      the normal rules for Extend, etc.
     63 #
     64 GB12:  Regional_Indicator Regional_Indicator ÷ Regional_Indicator;
     65 GB13:  Regional_Indicator Regional_Indicator;
     66 
     67 GB999:     . ÷;