nsBidiUtils.h (9970B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef nsBidiUtils_h__ 7 #define nsBidiUtils_h__ 8 9 #include "mozilla/intl/BidiClass.h" 10 11 #include "nsString.h" 12 #include "encoding_rs_mem.h" 13 14 /** 15 * definitions of bidirection character types by category 16 */ 17 18 #define BIDICLASS_IS_RTL(val) \ 19 (((val) == mozilla::intl::BidiClass::RightToLeft) || \ 20 ((val) == mozilla::intl::BidiClass::RightToLeftArabic)) 21 22 #define BIDICLASS_IS_WEAK(val) \ 23 (((val) == mozilla::intl::BidiClass::EuropeanNumberSeparator) || \ 24 ((val) == mozilla::intl::BidiClass::EuropeanNumberTerminator) || \ 25 (((val) > mozilla::intl::BidiClass::ArabicNumber) && \ 26 ((val) != mozilla::intl::BidiClass::RightToLeftArabic))) 27 28 /** 29 * Inspects a Unichar, converting numbers to Arabic or Hindi forms and 30 * returning them 31 * @param aChar is the character 32 * @param aPrevCharArabic is true if the previous character in the string is 33 * an Arabic char 34 * @param aNumFlag specifies the conversion to perform: 35 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion 36 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms 37 * (Unicode 0660-0669) 38 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms 39 * (Unicode 0030-0039) 40 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to 41 * Hindi, otherwise to Arabic 42 * @return the converted Unichar 43 */ 44 char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, 45 uint32_t aNumFlag); 46 47 /** 48 * Scan a Unichar string, converting numbers to Arabic or Hindi forms in 49 * place 50 * @param aBuffer is the string 51 * @param aSize is the size of aBuffer 52 * @param aNumFlag specifies the conversion to perform: 53 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion 54 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms 55 * (Unicode 0660-0669) 56 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms 57 * (Unicode 0030-0039) 58 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to 59 * Hindi, otherwise to Arabic 60 */ 61 nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); 62 63 /** 64 * Give a UTF-32 codepoint 65 * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; 66 * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). 67 * Return false, otherwise 68 */ 69 #define LRM_CHAR 0x200e 70 #define RLM_CHAR 0x200f 71 72 #define LRE_CHAR 0x202a 73 #define RLE_CHAR 0x202b 74 #define PDF_CHAR 0x202c 75 #define LRO_CHAR 0x202d 76 #define RLO_CHAR 0x202e 77 78 #define LRI_CHAR 0x2066 79 #define RLI_CHAR 0x2067 80 #define FSI_CHAR 0x2068 81 #define PDI_CHAR 0x2069 82 83 #define ALM_CHAR 0x061C 84 inline bool IsBidiControl(uint32_t aChar) { 85 return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || 86 (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || (aChar == ALM_CHAR) || 87 (aChar & 0xfffffe) == LRM_CHAR); 88 } 89 90 /** 91 * Give a UTF-32 codepoint 92 * Return true if the codepoint is a Bidi control character that may result 93 * in RTL directionality and therefore needs to trigger bidi resolution; 94 * return false otherwise. 95 */ 96 inline bool IsBidiControlRTL(uint32_t aChar) { 97 return aChar == RLM_CHAR || aChar == RLE_CHAR || aChar == RLO_CHAR || 98 aChar == RLI_CHAR || aChar == ALM_CHAR; 99 } 100 101 /** 102 * Give a 16-bit (UTF-16) text buffer 103 * @return true if the string contains right-to-left characters 104 */ 105 inline bool HasRTLChars(mozilla::Span<const char16_t> aBuffer) { 106 // Span ensures we never pass a nullptr to Rust--even if the 107 // length of the buffer is zero. 108 return encoding_mem_is_utf16_bidi(aBuffer.Elements(), aBuffer.Length()); 109 } 110 111 // These values are shared with Preferences dialog 112 // ------------------ 113 // If Pref values are to be changed 114 // in the XUL file of Prefs. the values 115 // Must be changed here too.. 116 // ------------------ 117 // 118 #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" 119 #define IBMBIDI_TEXTTYPE_STR "bidi.texttype" 120 #define IBMBIDI_NUMERAL_STR "bidi.numeral" 121 122 // ------------------ 123 // Text Direction 124 // ------------------ 125 // bidi.direction 126 #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * 127 #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi 128 // ------------------ 129 // Text Type 130 // ------------------ 131 // bidi.texttype 132 #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * 133 #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi 134 #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi 135 // ------------------ 136 // Numeral Style 137 // ------------------ 138 // bidi.numeral 139 #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * 140 #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi 141 #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi 142 #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi 143 #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi 144 #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi 145 #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi 146 147 #define IBMBIDI_DEFAULT_BIDI_OPTIONS \ 148 ((IBMBIDI_TEXTDIRECTION_LTR << 0) | (IBMBIDI_TEXTTYPE_CHARSET << 4) | \ 149 (IBMBIDI_NUMERAL_NOMINAL << 8)) 150 151 #define GET_BIDI_OPTION_DIRECTION(bo) \ 152 (((bo) >> 0) & 0x0000000F) /* 4 bits for DIRECTION */ 153 #define GET_BIDI_OPTION_TEXTTYPE(bo) \ 154 (((bo) >> 4) & 0x0000000F) /* 4 bits for TEXTTYPE */ 155 #define GET_BIDI_OPTION_NUMERAL(bo) \ 156 (((bo) >> 8) & 0x0000000F) /* 4 bits for NUMERAL */ 157 158 #define SET_BIDI_OPTION_DIRECTION(bo, dir) \ 159 { \ 160 (bo) = ((bo) & 0xFFFFFFF0) | (((dir) & 0x0000000F) << 0); \ 161 } 162 #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) \ 163 { \ 164 (bo) = ((bo) & 0xFFFFFF0F) | (((tt) & 0x0000000F) << 4); \ 165 } 166 #define SET_BIDI_OPTION_NUMERAL(bo, num) \ 167 { \ 168 (bo) = ((bo) & 0xFFFFF0FF) | (((num) & 0x0000000F) << 8); \ 169 } 170 171 /* Constants related to the position of numerics in the codepage */ 172 #define START_HINDI_DIGITS 0x0660 173 #define END_HINDI_DIGITS 0x0669 174 #define START_ARABIC_DIGITS 0x0030 175 #define END_ARABIC_DIGITS 0x0039 176 #define START_FARSI_DIGITS 0x06f0 177 #define END_FARSI_DIGITS 0x06f9 178 #define IS_HINDI_DIGIT(u) \ 179 (((u) >= START_HINDI_DIGITS) && ((u) <= END_HINDI_DIGITS)) 180 #define IS_ARABIC_DIGIT(u) \ 181 (((u) >= START_ARABIC_DIGITS) && ((u) <= END_ARABIC_DIGITS)) 182 #define IS_FARSI_DIGIT(u) \ 183 (((u) >= START_FARSI_DIGITS) && ((u) <= END_FARSI_DIGITS)) 184 /** 185 * Arabic numeric separator and numeric formatting characters: 186 * U+0600;ARABIC NUMBER SIGN 187 * U+0601;ARABIC SIGN SANAH 188 * U+0602;ARABIC FOOTNOTE MARKER 189 * U+0603;ARABIC SIGN SAFHA 190 * U+066A;ARABIC PERCENT SIGN 191 * U+066B;ARABIC DECIMAL SEPARATOR 192 * U+066C;ARABIC THOUSANDS SEPARATOR 193 * U+06DD;ARABIC END OF AYAH 194 */ 195 #define IS_ARABIC_SEPARATOR(u) \ 196 ((/*(u) >= 0x0600 &&*/ (u) <= 0x0603) || ((u) >= 0x066A && (u) <= 0x066C) || \ 197 ((u) == 0x06DD)) 198 199 #define IS_BIDI_DIACRITIC(u) \ 200 (((u) >= 0x0591 && (u) <= 0x05A1) || ((u) >= 0x05A3 && (u) <= 0x05B9) || \ 201 ((u) >= 0x05BB && (u) <= 0x05BD) || ((u) == 0x05BF) || ((u) == 0x05C1) || \ 202 ((u) == 0x05C2) || ((u) == 0x05C4) || ((u) >= 0x064B && (u) <= 0x0652) || \ 203 ((u) == 0x0670) || ((u) >= 0x06D7 && (u) <= 0x06E4) || ((u) == 0x06E7) || \ 204 ((u) == 0x06E8) || ((u) >= 0x06EA && (u) <= 0x06ED)) 205 206 #define IS_HEBREW_CHAR(c) \ 207 (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) 208 #define IS_ARABIC_CHAR(c) \ 209 ((0x0600 <= (c) && (c) <= 0x08FF) && \ 210 ((c) <= 0x06ff || ((c) >= 0x0750 && (c) <= 0x077f) || (c) >= 0x08a0)) 211 #define IS_ARABIC_ALPHABETIC(c) \ 212 (IS_ARABIC_CHAR(c) && \ 213 !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) 214 215 /** 216 * The codepoint ranges in the following macros are based on the blocks 217 * allocated, or planned to be allocated, to right-to-left characters in the 218 * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) 219 * according to 220 * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and 221 * http://www.unicode.org/roadmaps/ 222 */ 223 224 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) 225 #define IS_RTL_PRESENTATION_FORM(c) \ 226 (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || ((0xfe70 <= (c)) && ((c) <= 0xfefe))) 227 #define IS_IN_SMP_RTL_BLOCK(c) \ 228 (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ 229 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) 230 // Due to the supplementary-plane RTL blocks being identifiable from the 231 // high surrogate without examining the low surrogate, it is correct to 232 // use this by-code-unit check on potentially astral text without doing 233 // the math to decode surrogate pairs into code points. However, unpaired 234 // high surrogates that are RTL high surrogates then count as RTL even 235 // though, if replaced by the REPLACEMENT CHARACTER, it would not be 236 // RTL. 237 #define UTF16_CODE_UNIT_IS_BIDI(c) \ 238 ((IS_IN_BMP_RTL_BLOCK(c)) || (IS_RTL_PRESENTATION_FORM(c)) || \ 239 (c) == 0xD802 || (c) == 0xD803 || (c) == 0xD83A || (c) == 0xD83B) 240 #define UTF32_CHAR_IS_BIDI(c) \ 241 ((IS_IN_BMP_RTL_BLOCK(c)) || (IS_RTL_PRESENTATION_FORM(c)) || \ 242 (IS_IN_SMP_RTL_BLOCK(c))) 243 #endif /* nsBidiUtils_h__ */