grapheme.txt (2258B)
1 # 2 # Copyright (C) 2016 and later: Unicode, Inc. and others. 3 # License & terms of use: http://www.unicode.org/copyright.html 4 # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. 5 6 # file: grapheme.txt 7 # 8 # Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest 9 # 10 # 11 # Note: Rule syntax and the monkey test itself are still a work in progress. 12 # They are expected to change with review and the addition of support for rule tailoring. 13 14 type = grapheme; # one of grapheme | word | line | sentence 15 locale = en; 16 17 CR = [\p{Grapheme_Cluster_Break = CR}]; 18 LF = [\p{Grapheme_Cluster_Break = LF}]; 19 20 Control = [[\p{Grapheme_Cluster_Break = Control}]]; 21 Extend_ = [[\p{Grapheme_Cluster_Break = Extend}]]; 22 ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}]; 23 Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; 24 Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; 25 SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; 26 27 # 28 # Korean Syllable Definitions 29 # 30 L = [\p{Grapheme_Cluster_Break = L}]; 31 V = [\p{Grapheme_Cluster_Break = V}]; 32 T = [\p{Grapheme_Cluster_Break = T}]; 33 LV = [\p{Grapheme_Cluster_Break = LV}]; 34 LVT = [\p{Grapheme_Cluster_Break = LVT}]; 35 36 # Emoji definitions 37 38 Extended_Pict = [:ExtPict:]; 39 40 # Indic Sequences 41 InCBLinker = [\p{InCB=Linker}]; 42 InCBConsonant = [\p{InCB=Consonant}]; 43 InCBExtend = [\p{InCB=Extend}]; 44 45 GB3: CR LF; 46 GB4: (Control | CR | LF) ÷; 47 GB5: . ÷ (Control | CR | LF); 48 49 GB6: L (L | V | LV | LVT); 50 GB7: (LV | V) (V | T); 51 GB8: (LVT | T) T; 52 53 GB11: Extended_Pict Extend_* ZWJ Extended_Pict; 54 GB9c: InCBConsonant ( InCBExtend | InCBLinker )* InCBLinker ( InCBExtend | InCBLinker )* InCBConsonant; 55 GB9: . (Extend_ | ZWJ); 56 57 GB9a: . SpacingMark; 58 GB9b: Prepend .; 59 60 # Regional Indicators, split into pairs. 61 # Note that a pair of RIs that is not followed by a third RI will fall into 62 # the normal rules for Extend, etc. 63 # 64 GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator; 65 GB13: Regional_Indicator Regional_Indicator; 66 67 GB999: . ÷;