Bidi.h (6474B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #ifndef intl_components_Bidi_h_ 5 #define intl_components_Bidi_h_ 6 7 #include "mozilla/intl/BidiEmbeddingLevel.h" 8 #include "mozilla/intl/ICU4CGlue.h" 9 10 // Use the Rust unicode-bidi crate to back the Bidi component. 11 // (Define to 0 to use the legacy ICU4C implementation instead, 12 // until that code is removed altogether.) 13 #define USE_RUST_UNICODE_BIDI 1 14 15 #if USE_RUST_UNICODE_BIDI 16 # include "mozilla/intl/unicode_bidi_ffi_generated.h" 17 #else 18 struct UBiDi; 19 #endif 20 21 namespace mozilla::intl { 22 23 /** 24 * This component is a Mozilla-focused API for working with bidirectional (bidi) 25 * text. Text is commonly displayed left to right (LTR), especially for 26 * Latin-based alphabets. However, languages like Arabic and Hebrew displays 27 * text right to left (RTL). When displaying text, LTR and RTL text can be 28 * combined together in the same paragraph. This class gives tools for working 29 * with unidirectional, and mixed direction paragraphs. 30 * 31 * See the Unicode Bidirectional Algorithm document for implementation details: 32 * https://unicode.org/reports/tr9/ 33 */ 34 class Bidi final { 35 public: 36 Bidi(); 37 ~Bidi(); 38 39 // Not copyable or movable 40 Bidi(const Bidi&) = delete; 41 Bidi& operator=(const Bidi&) = delete; 42 43 /** 44 * This enum indicates the text direction for the set paragraph. Some 45 * paragraphs are unidirectional, where they only have one direction, or a 46 * paragraph could use both LTR and RTL. In this case the paragraph's 47 * direction would be mixed. 48 */ 49 enum class ParagraphDirection { LTR, RTL, Mixed }; 50 51 /** 52 * Set the current paragraph of text to analyze for its bidi properties. This 53 * performs the Unicode bidi algorithm as specified by: 54 * https://unicode.org/reports/tr9/ 55 * 56 * After setting the text, the other getter methods can be used to find out 57 * the directionality of the paragraph text. 58 */ 59 ICUResult SetParagraph(Span<const char16_t> aParagraph, 60 BidiEmbeddingLevel aLevel); 61 62 /** 63 * Get the embedding level for the paragraph that was set by SetParagraph. 64 */ 65 BidiEmbeddingLevel GetParagraphEmbeddingLevel() const; 66 67 /** 68 * Get the directionality of the paragraph text that was set by SetParagraph. 69 */ 70 ParagraphDirection GetParagraphDirection() const; 71 72 /** 73 * Get the number of runs. This function may invoke the actual reordering on 74 * the Bidi object, after SetParagraph may have resolved only the levels of 75 * the text. Therefore, `CountRuns` may have to allocate memory, and may fail 76 * doing so. 77 */ 78 Result<int32_t, ICUError> CountRuns(); 79 80 /** 81 * Get the next logical run. The logical runs are a run of text that has the 82 * same directionality and embedding level. These runs are in memory order, 83 * and not in display order. 84 * 85 * Important! `Bidi::CountRuns` must be called before calling this method. 86 * 87 * @param aLogicalStart is the offset into the paragraph text that marks the 88 * logical start of the text. 89 * @param aLogicalLimitOut is an out param that is the length of the string 90 * that makes up the logical run. 91 * @param aLevelOut is an out parameter that returns the embedding level for 92 * the run 93 */ 94 void GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut, 95 BidiEmbeddingLevel* aLevelOut); 96 97 /** 98 * This is a convenience function that does not use the ICU Bidi object. 99 * It is intended to be used for when an application has determined the 100 * embedding levels of objects (character sequences) and just needs to have 101 * them reordered (L2). 102 * 103 * @param aLevels is an array with `aLength` levels that have been 104 * determined by the application. 105 * 106 * @param aLength is the number of levels in the array, or, semantically, 107 * the number of objects to be reordered. It must be greater than 0. 108 * 109 * @param aIndexMap is a pointer to an array of `aLength` 110 * indexes which will reflect the reordering of the characters. 111 * The array does not need to be initialized. 112 * The index map will result in 113 * `aIndexMap[aVisualIndex]==aLogicalIndex`. 114 */ 115 static void ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength, 116 int32_t* aIndexMap); 117 118 /** 119 * This enum indicates the bidi character type of the first strong character 120 * for the set paragraph. 121 * LTR: bidi character type 'L'. 122 * RTL: bidi character type 'R' or 'AL'. 123 * Neutral: The rest of bidi character types. 124 */ 125 enum class BaseDirection { LTR, RTL, Neutral }; 126 127 /** 128 * Get the base direction of the text. 129 */ 130 static BaseDirection GetBaseDirection(Span<const char16_t> aText); 131 132 /** 133 * Get one run's logical start, length, and directionality. In an RTL run, the 134 * character at the logical start is visually on the right of the displayed 135 * run. The length is the number of characters in the run. 136 * `Bidi::CountRuns` should be called before the runs are retrieved. 137 * 138 * @param aRunIndex is the number of the run in visual order, in the 139 * range `[0..CountRuns-1]`. 140 * 141 * @param aLogicalStart is the first logical character index in the text. 142 * The pointer may be `nullptr` if this index is not needed. 143 * 144 * @param aLength is the number of characters (at least one) in the run. 145 * The pointer may be `nullptr` if this is not needed. 146 * 147 * Note that in right-to-left runs, the code places modifier letters before 148 * base characters and second surrogates before first ones. 149 */ 150 BidiDirection GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, 151 int32_t* aLength); 152 153 private: 154 #if USE_RUST_UNICODE_BIDI 155 using UnicodeBidi = mozilla::intl::ffi::UnicodeBidi; 156 struct BidiFreePolicy { 157 void operator()(void* aPtr) { 158 bidi_destroy(static_cast<UnicodeBidi*>(aPtr)); 159 } 160 }; 161 mozilla::UniquePtr<UnicodeBidi, BidiFreePolicy> mBidi; 162 #else 163 ICUPointer<UBiDi> mBidi = ICUPointer<UBiDi>(nullptr); 164 165 /** 166 * An array of levels that is the same length as the paragraph from 167 * `Bidi::SetParagraph`. 168 */ 169 const BidiEmbeddingLevel* mLevels = nullptr; 170 171 /** 172 * The length of the paragraph from `Bidi::SetParagraph`. 173 */ 174 int32_t mLength = 0; 175 #endif 176 }; 177 178 } // namespace mozilla::intl 179 #endif