TestBidi.cpp (11331B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #include "gtest/gtest.h" 5 6 #include "mozilla/intl/Bidi.h" 7 #include "mozilla/Span.h" 8 namespace mozilla::intl { 9 10 struct VisualRun { 11 Span<const char16_t> string; 12 BidiDirection direction; 13 }; 14 15 /** 16 * An iterator for visual runs in a paragraph. See Bug 1736597 for integrating 17 * this into the public API. 18 */ 19 class MOZ_STACK_CLASS VisualRunIter { 20 public: 21 VisualRunIter(Bidi& aBidi, Span<const char16_t> aParagraph, 22 BidiEmbeddingLevel aLevel) 23 : mBidi(aBidi), mParagraph(aParagraph) { 24 // Crash in case of errors by calling unwrap. If this were a real API, this 25 // would be a TryCreate call. 26 mBidi.SetParagraph(aParagraph, aLevel).unwrap(); 27 mRunCount = mBidi.CountRuns().unwrap(); 28 } 29 30 Maybe<VisualRun> Next() { 31 if (mRunIndex >= mRunCount) { 32 return Nothing(); 33 } 34 35 int32_t stringIndex = -1; 36 int32_t stringLength = -1; 37 38 BidiDirection direction = 39 mBidi.GetVisualRun(mRunIndex, &stringIndex, &stringLength); 40 41 Span<const char16_t> string(mParagraph.Elements() + stringIndex, 42 stringLength); 43 mRunIndex++; 44 return Some(VisualRun{string, direction}); 45 } 46 47 private: 48 Bidi& mBidi; 49 Span<const char16_t> mParagraph = Span<const char16_t>(); 50 int32_t mRunIndex = 0; 51 int32_t mRunCount = 0; 52 }; 53 54 struct LogicalRun { 55 Span<const char16_t> string; 56 BidiEmbeddingLevel embeddingLevel; 57 }; 58 59 /** 60 * An iterator for logical runs in a paragraph. See Bug 1736597 for integrating 61 * this into the public API. 62 */ 63 class MOZ_STACK_CLASS LogicalRunIter { 64 public: 65 LogicalRunIter(Bidi& aBidi, Span<const char16_t> aParagraph, 66 BidiEmbeddingLevel aLevel) 67 : mBidi(aBidi), mParagraph(aParagraph) { 68 // Crash in case of errors by calling unwrap. If this were a real API, this 69 // would be a TryCreate call. 70 mBidi.SetParagraph(aParagraph, aLevel).unwrap(); 71 mBidi.CountRuns().unwrap(); 72 } 73 74 Maybe<LogicalRun> Next() { 75 if (mRunIndex >= static_cast<int32_t>(mParagraph.Length())) { 76 return Nothing(); 77 } 78 79 int32_t logicalLimit; 80 81 BidiEmbeddingLevel embeddingLevel; 82 mBidi.GetLogicalRun(mRunIndex, &logicalLimit, &embeddingLevel); 83 84 Span<const char16_t> string(mParagraph.Elements() + mRunIndex, 85 logicalLimit - mRunIndex); 86 87 mRunIndex = logicalLimit; 88 return Some(LogicalRun{string, embeddingLevel}); 89 } 90 91 private: 92 Bidi& mBidi; 93 Span<const char16_t> mParagraph = Span<const char16_t>(); 94 int32_t mRunIndex = 0; 95 }; 96 97 TEST(IntlBidi, SimpleLTR) 98 { 99 Bidi bidi{}; 100 LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"this is a paragraph"), 101 BidiEmbeddingLevel::DefaultLTR()); 102 ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); 103 ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::LTR); 104 105 { 106 auto logicalRun = logicalRunIter.Next(); 107 ASSERT_TRUE(logicalRun.isSome()); 108 ASSERT_EQ(logicalRun->string, MakeStringSpan(u"this is a paragraph")); 109 ASSERT_EQ(logicalRun->embeddingLevel, 0); 110 ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); 111 } 112 113 { 114 auto logicalRun = logicalRunIter.Next(); 115 ASSERT_TRUE(logicalRun.isNothing()); 116 } 117 } 118 119 TEST(IntlBidi, SimpleRTL) 120 { 121 Bidi bidi{}; 122 LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"فايرفوكس رائع"), 123 BidiEmbeddingLevel::DefaultLTR()); 124 ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 1); 125 ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::RTL); 126 127 { 128 auto logicalRun = logicalRunIter.Next(); 129 ASSERT_TRUE(logicalRun.isSome()); 130 ASSERT_EQ(logicalRun->string, MakeStringSpan(u"فايرفوكس رائع")); 131 ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL); 132 ASSERT_EQ(logicalRun->embeddingLevel, 1); 133 } 134 135 { 136 auto logicalRun = logicalRunIter.Next(); 137 ASSERT_TRUE(logicalRun.isNothing()); 138 } 139 } 140 141 TEST(IntlBidi, MultiLevel) 142 { 143 Bidi bidi{}; 144 LogicalRunIter logicalRunIter( 145 bidi, MakeStringSpan(u"Firefox is awesome: رائع Firefox"), 146 BidiEmbeddingLevel::DefaultLTR()); 147 ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); 148 ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed); 149 150 { 151 auto logicalRun = logicalRunIter.Next(); 152 ASSERT_TRUE(logicalRun.isSome()); 153 ASSERT_EQ(logicalRun->string, MakeStringSpan(u"Firefox is awesome: ")); 154 ASSERT_EQ(logicalRun->embeddingLevel, 0); 155 } 156 { 157 auto logicalRun = logicalRunIter.Next(); 158 ASSERT_TRUE(logicalRun.isSome()); 159 ASSERT_EQ(logicalRun->string, MakeStringSpan(u"رائع")); 160 ASSERT_EQ(logicalRun->embeddingLevel, 1); 161 } 162 { 163 auto logicalRun = logicalRunIter.Next(); 164 ASSERT_TRUE(logicalRun.isSome()); 165 ASSERT_EQ(logicalRun->string, MakeStringSpan(u" Firefox")); 166 ASSERT_EQ(logicalRun->embeddingLevel, 0); 167 } 168 { 169 auto logicalRun = logicalRunIter.Next(); 170 ASSERT_TRUE(logicalRun.isNothing()); 171 } 172 } 173 174 TEST(IntlBidi, RtlOverride) 175 { 176 Bidi bidi{}; 177 // Set the paragraph using the RTL embedding mark U+202B, and the LTR 178 // embedding mark U+202A to increase the embedding level. This mark switches 179 // the weakly directional character "_". This demonstrates that embedding 180 // levels can be computed. 181 LogicalRunIter logicalRunIter( 182 bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr__"), 183 BidiEmbeddingLevel::DefaultLTR()); 184 ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); 185 ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed); 186 187 // Note that the Unicode Bidi Algorithm explicitly does NOT require any 188 // specific placement or levels for the embedding controls (see 189 // rule https://www.unicode.org/reports/tr9/#X9). 190 // Further, the implementation notes at 191 // https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters 192 // advise to "Resolve any LRE, RLE, LRO, RLO, PDF, or BN to the level of the 193 // preceding character if there is one...", which means the embedding marks 194 // here will each become part of the *preceding* run. This is how the Rust 195 // unicode-bidi implementation behaves. 196 // However, ICU4C behavior is such that they take on the level of the *next* 197 // character, and become part of the following run. 198 // For now, we accept either result here. 199 { 200 auto logicalRun = logicalRunIter.Next(); 201 ASSERT_TRUE(logicalRun.isSome()); 202 ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"ltr") || 203 logicalRun->string == MakeStringSpan(u"ltr\u202b")); 204 ASSERT_EQ(logicalRun->embeddingLevel, 0); 205 ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); 206 } 207 { 208 auto logicalRun = logicalRunIter.Next(); 209 ASSERT_TRUE(logicalRun.isSome()); 210 ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202b___رائع___") || 211 logicalRun->string == MakeStringSpan(u"___رائع___\u202a")); 212 ASSERT_EQ(logicalRun->embeddingLevel, 1); 213 ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL); 214 } 215 { 216 auto logicalRun = logicalRunIter.Next(); 217 ASSERT_TRUE(logicalRun.isSome()); 218 ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202a___ltr__") || 219 logicalRun->string == MakeStringSpan(u"___ltr__")); 220 ASSERT_EQ(logicalRun->embeddingLevel, 2); 221 ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); 222 } 223 { 224 auto logicalRun = logicalRunIter.Next(); 225 ASSERT_TRUE(logicalRun.isNothing()); 226 } 227 } 228 229 TEST(IntlBidi, VisualRuns) 230 { 231 Bidi bidi{}; 232 233 VisualRunIter visualRunIter( 234 bidi, 235 MakeStringSpan( 236 u"first visual run التشغيل البصري الثاني third visual run"), 237 BidiEmbeddingLevel::DefaultLTR()); 238 { 239 Maybe<VisualRun> run = visualRunIter.Next(); 240 ASSERT_TRUE(run.isSome()); 241 ASSERT_EQ(run->string, MakeStringSpan(u"first visual run ")); 242 ASSERT_EQ(run->direction, BidiDirection::LTR); 243 } 244 { 245 Maybe<VisualRun> run = visualRunIter.Next(); 246 ASSERT_TRUE(run.isSome()); 247 ASSERT_EQ(run->string, MakeStringSpan(u"التشغيل البصري الثاني")); 248 ASSERT_EQ(run->direction, BidiDirection::RTL); 249 } 250 { 251 Maybe<VisualRun> run = visualRunIter.Next(); 252 ASSERT_TRUE(run.isSome()); 253 ASSERT_EQ(run->string, MakeStringSpan(u" third visual run")); 254 ASSERT_EQ(run->direction, BidiDirection::LTR); 255 } 256 { 257 Maybe<VisualRun> run = visualRunIter.Next(); 258 ASSERT_TRUE(run.isNothing()); 259 } 260 } 261 262 TEST(IntlBidi, VisualRunsWithEmbeds) 263 { 264 // Compare this test to the logical order test. 265 Bidi bidi{}; 266 VisualRunIter visualRunIter( 267 bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr___"), 268 BidiEmbeddingLevel::DefaultLTR()); 269 { 270 Maybe<VisualRun> run = visualRunIter.Next(); 271 ASSERT_TRUE(run.isSome()); 272 ASSERT_TRUE(run->string == MakeStringSpan(u"ltr") || 273 run->string == MakeStringSpan(u"ltr\u202b")); 274 ASSERT_EQ(run->direction, BidiDirection::LTR); 275 } 276 { 277 Maybe<VisualRun> run = visualRunIter.Next(); 278 ASSERT_TRUE(run.isSome()); 279 ASSERT_TRUE(run->string == MakeStringSpan(u"\u202a___ltr___") || 280 run->string == MakeStringSpan(u"___ltr___")); 281 ASSERT_EQ(run->direction, BidiDirection::LTR); 282 } 283 { 284 Maybe<VisualRun> run = visualRunIter.Next(); 285 ASSERT_TRUE(run.isSome()); 286 ASSERT_TRUE(run->string == MakeStringSpan(u"\u202b___رائع___") || 287 run->string == MakeStringSpan(u"___رائع___\u202a")); 288 ASSERT_EQ(run->direction, BidiDirection::RTL); 289 } 290 { 291 Maybe<VisualRun> run = visualRunIter.Next(); 292 ASSERT_TRUE(run.isNothing()); 293 } 294 } 295 296 // The full Bidi class can be found in [1]. 297 // 298 // [1]: https://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt 299 TEST(IntlBidi, GetBaseDirection) 300 { 301 // Return Neutral as default if empty string is provided. 302 ASSERT_EQ(Bidi::GetBaseDirection(nullptr), Bidi::BaseDirection::Neutral); 303 304 // White space(WS) is classified as Neutral. 305 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ")), 306 Bidi::BaseDirection::Neutral); 307 308 // 000A and 000D are paragraph separators(BS), which are also classified as 309 // Neutral. 310 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000A")), 311 Bidi::BaseDirection::Neutral); 312 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000D")), 313 Bidi::BaseDirection::Neutral); 314 315 // 0620..063f are Arabic letters, which is of type AL. 316 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622")), 317 Bidi::BaseDirection::RTL); 318 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" \u0620\u0621\u0622")), 319 Bidi::BaseDirection::RTL); 320 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622ABC")), 321 Bidi::BaseDirection::RTL); 322 323 // First strong character is of English letters. 324 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC")), 325 Bidi::BaseDirection::LTR); 326 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ABC")), 327 Bidi::BaseDirection::LTR); 328 ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC\u0620")), 329 Bidi::BaseDirection::LTR); 330 } 331 332 } // namespace mozilla::intl