tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

TestBidi.cpp (11331B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #include "gtest/gtest.h"
      5 
      6 #include "mozilla/intl/Bidi.h"
      7 #include "mozilla/Span.h"
      8 namespace mozilla::intl {
      9 
     10 struct VisualRun {
     11  Span<const char16_t> string;
     12  BidiDirection direction;
     13 };
     14 
     15 /**
     16 * An iterator for visual runs in a paragraph. See Bug 1736597 for integrating
     17 * this into the public API.
     18 */
     19 class MOZ_STACK_CLASS VisualRunIter {
     20 public:
     21  VisualRunIter(Bidi& aBidi, Span<const char16_t> aParagraph,
     22                BidiEmbeddingLevel aLevel)
     23      : mBidi(aBidi), mParagraph(aParagraph) {
     24    // Crash in case of errors by calling unwrap. If this were a real API, this
     25    // would be a TryCreate call.
     26    mBidi.SetParagraph(aParagraph, aLevel).unwrap();
     27    mRunCount = mBidi.CountRuns().unwrap();
     28  }
     29 
     30  Maybe<VisualRun> Next() {
     31    if (mRunIndex >= mRunCount) {
     32      return Nothing();
     33    }
     34 
     35    int32_t stringIndex = -1;
     36    int32_t stringLength = -1;
     37 
     38    BidiDirection direction =
     39        mBidi.GetVisualRun(mRunIndex, &stringIndex, &stringLength);
     40 
     41    Span<const char16_t> string(mParagraph.Elements() + stringIndex,
     42                                stringLength);
     43    mRunIndex++;
     44    return Some(VisualRun{string, direction});
     45  }
     46 
     47 private:
     48  Bidi& mBidi;
     49  Span<const char16_t> mParagraph = Span<const char16_t>();
     50  int32_t mRunIndex = 0;
     51  int32_t mRunCount = 0;
     52 };
     53 
     54 struct LogicalRun {
     55  Span<const char16_t> string;
     56  BidiEmbeddingLevel embeddingLevel;
     57 };
     58 
     59 /**
     60 * An iterator for logical runs in a paragraph. See Bug 1736597 for integrating
     61 * this into the public API.
     62 */
     63 class MOZ_STACK_CLASS LogicalRunIter {
     64 public:
     65  LogicalRunIter(Bidi& aBidi, Span<const char16_t> aParagraph,
     66                 BidiEmbeddingLevel aLevel)
     67      : mBidi(aBidi), mParagraph(aParagraph) {
     68    // Crash in case of errors by calling unwrap. If this were a real API, this
     69    // would be a TryCreate call.
     70    mBidi.SetParagraph(aParagraph, aLevel).unwrap();
     71    mBidi.CountRuns().unwrap();
     72  }
     73 
     74  Maybe<LogicalRun> Next() {
     75    if (mRunIndex >= static_cast<int32_t>(mParagraph.Length())) {
     76      return Nothing();
     77    }
     78 
     79    int32_t logicalLimit;
     80 
     81    BidiEmbeddingLevel embeddingLevel;
     82    mBidi.GetLogicalRun(mRunIndex, &logicalLimit, &embeddingLevel);
     83 
     84    Span<const char16_t> string(mParagraph.Elements() + mRunIndex,
     85                                logicalLimit - mRunIndex);
     86 
     87    mRunIndex = logicalLimit;
     88    return Some(LogicalRun{string, embeddingLevel});
     89  }
     90 
     91 private:
     92  Bidi& mBidi;
     93  Span<const char16_t> mParagraph = Span<const char16_t>();
     94  int32_t mRunIndex = 0;
     95 };
     96 
     97 TEST(IntlBidi, SimpleLTR)
     98 {
     99  Bidi bidi{};
    100  LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"this is a paragraph"),
    101                                BidiEmbeddingLevel::DefaultLTR());
    102  ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0);
    103  ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::LTR);
    104 
    105  {
    106    auto logicalRun = logicalRunIter.Next();
    107    ASSERT_TRUE(logicalRun.isSome());
    108    ASSERT_EQ(logicalRun->string, MakeStringSpan(u"this is a paragraph"));
    109    ASSERT_EQ(logicalRun->embeddingLevel, 0);
    110    ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR);
    111  }
    112 
    113  {
    114    auto logicalRun = logicalRunIter.Next();
    115    ASSERT_TRUE(logicalRun.isNothing());
    116  }
    117 }
    118 
    119 TEST(IntlBidi, SimpleRTL)
    120 {
    121  Bidi bidi{};
    122  LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"فايرفوكس رائع"),
    123                                BidiEmbeddingLevel::DefaultLTR());
    124  ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 1);
    125  ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::RTL);
    126 
    127  {
    128    auto logicalRun = logicalRunIter.Next();
    129    ASSERT_TRUE(logicalRun.isSome());
    130    ASSERT_EQ(logicalRun->string, MakeStringSpan(u"فايرفوكس رائع"));
    131    ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL);
    132    ASSERT_EQ(logicalRun->embeddingLevel, 1);
    133  }
    134 
    135  {
    136    auto logicalRun = logicalRunIter.Next();
    137    ASSERT_TRUE(logicalRun.isNothing());
    138  }
    139 }
    140 
    141 TEST(IntlBidi, MultiLevel)
    142 {
    143  Bidi bidi{};
    144  LogicalRunIter logicalRunIter(
    145      bidi, MakeStringSpan(u"Firefox is awesome: رائع Firefox"),
    146      BidiEmbeddingLevel::DefaultLTR());
    147  ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0);
    148  ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed);
    149 
    150  {
    151    auto logicalRun = logicalRunIter.Next();
    152    ASSERT_TRUE(logicalRun.isSome());
    153    ASSERT_EQ(logicalRun->string, MakeStringSpan(u"Firefox is awesome: "));
    154    ASSERT_EQ(logicalRun->embeddingLevel, 0);
    155  }
    156  {
    157    auto logicalRun = logicalRunIter.Next();
    158    ASSERT_TRUE(logicalRun.isSome());
    159    ASSERT_EQ(logicalRun->string, MakeStringSpan(u"رائع"));
    160    ASSERT_EQ(logicalRun->embeddingLevel, 1);
    161  }
    162  {
    163    auto logicalRun = logicalRunIter.Next();
    164    ASSERT_TRUE(logicalRun.isSome());
    165    ASSERT_EQ(logicalRun->string, MakeStringSpan(u" Firefox"));
    166    ASSERT_EQ(logicalRun->embeddingLevel, 0);
    167  }
    168  {
    169    auto logicalRun = logicalRunIter.Next();
    170    ASSERT_TRUE(logicalRun.isNothing());
    171  }
    172 }
    173 
    174 TEST(IntlBidi, RtlOverride)
    175 {
    176  Bidi bidi{};
    177  // Set the paragraph using the RTL embedding mark U+202B, and the LTR
    178  // embedding mark U+202A to increase the embedding level. This mark switches
    179  // the weakly directional character "_". This demonstrates that embedding
    180  // levels can be computed.
    181  LogicalRunIter logicalRunIter(
    182      bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr__"),
    183      BidiEmbeddingLevel::DefaultLTR());
    184  ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0);
    185  ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed);
    186 
    187  // Note that the Unicode Bidi Algorithm explicitly does NOT require any
    188  // specific placement or levels for the embedding controls (see
    189  // rule https://www.unicode.org/reports/tr9/#X9).
    190  // Further, the implementation notes at
    191  // https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters
    192  // advise to "Resolve any LRE, RLE, LRO, RLO, PDF, or BN to the level of the
    193  // preceding character if there is one...", which means the embedding marks
    194  // here will each become part of the *preceding* run. This is how the Rust
    195  // unicode-bidi implementation behaves.
    196  // However, ICU4C behavior is such that they take on the level of the *next*
    197  // character, and become part of the following run.
    198  // For now, we accept either result here.
    199  {
    200    auto logicalRun = logicalRunIter.Next();
    201    ASSERT_TRUE(logicalRun.isSome());
    202    ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"ltr") ||
    203                logicalRun->string == MakeStringSpan(u"ltr\u202b"));
    204    ASSERT_EQ(logicalRun->embeddingLevel, 0);
    205    ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR);
    206  }
    207  {
    208    auto logicalRun = logicalRunIter.Next();
    209    ASSERT_TRUE(logicalRun.isSome());
    210    ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202b___رائع___") ||
    211                logicalRun->string == MakeStringSpan(u"___رائع___\u202a"));
    212    ASSERT_EQ(logicalRun->embeddingLevel, 1);
    213    ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL);
    214  }
    215  {
    216    auto logicalRun = logicalRunIter.Next();
    217    ASSERT_TRUE(logicalRun.isSome());
    218    ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202a___ltr__") ||
    219                logicalRun->string == MakeStringSpan(u"___ltr__"));
    220    ASSERT_EQ(logicalRun->embeddingLevel, 2);
    221    ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR);
    222  }
    223  {
    224    auto logicalRun = logicalRunIter.Next();
    225    ASSERT_TRUE(logicalRun.isNothing());
    226  }
    227 }
    228 
    229 TEST(IntlBidi, VisualRuns)
    230 {
    231  Bidi bidi{};
    232 
    233  VisualRunIter visualRunIter(
    234      bidi,
    235      MakeStringSpan(
    236          u"first visual run التشغيل البصري الثاني third visual run"),
    237      BidiEmbeddingLevel::DefaultLTR());
    238  {
    239    Maybe<VisualRun> run = visualRunIter.Next();
    240    ASSERT_TRUE(run.isSome());
    241    ASSERT_EQ(run->string, MakeStringSpan(u"first visual run "));
    242    ASSERT_EQ(run->direction, BidiDirection::LTR);
    243  }
    244  {
    245    Maybe<VisualRun> run = visualRunIter.Next();
    246    ASSERT_TRUE(run.isSome());
    247    ASSERT_EQ(run->string, MakeStringSpan(u"التشغيل البصري الثاني"));
    248    ASSERT_EQ(run->direction, BidiDirection::RTL);
    249  }
    250  {
    251    Maybe<VisualRun> run = visualRunIter.Next();
    252    ASSERT_TRUE(run.isSome());
    253    ASSERT_EQ(run->string, MakeStringSpan(u" third visual run"));
    254    ASSERT_EQ(run->direction, BidiDirection::LTR);
    255  }
    256  {
    257    Maybe<VisualRun> run = visualRunIter.Next();
    258    ASSERT_TRUE(run.isNothing());
    259  }
    260 }
    261 
    262 TEST(IntlBidi, VisualRunsWithEmbeds)
    263 {
    264  // Compare this test to the logical order test.
    265  Bidi bidi{};
    266  VisualRunIter visualRunIter(
    267      bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr___"),
    268      BidiEmbeddingLevel::DefaultLTR());
    269  {
    270    Maybe<VisualRun> run = visualRunIter.Next();
    271    ASSERT_TRUE(run.isSome());
    272    ASSERT_TRUE(run->string == MakeStringSpan(u"ltr") ||
    273                run->string == MakeStringSpan(u"ltr\u202b"));
    274    ASSERT_EQ(run->direction, BidiDirection::LTR);
    275  }
    276  {
    277    Maybe<VisualRun> run = visualRunIter.Next();
    278    ASSERT_TRUE(run.isSome());
    279    ASSERT_TRUE(run->string == MakeStringSpan(u"\u202a___ltr___") ||
    280                run->string == MakeStringSpan(u"___ltr___"));
    281    ASSERT_EQ(run->direction, BidiDirection::LTR);
    282  }
    283  {
    284    Maybe<VisualRun> run = visualRunIter.Next();
    285    ASSERT_TRUE(run.isSome());
    286    ASSERT_TRUE(run->string == MakeStringSpan(u"\u202b___رائع___") ||
    287                run->string == MakeStringSpan(u"___رائع___\u202a"));
    288    ASSERT_EQ(run->direction, BidiDirection::RTL);
    289  }
    290  {
    291    Maybe<VisualRun> run = visualRunIter.Next();
    292    ASSERT_TRUE(run.isNothing());
    293  }
    294 }
    295 
    296 // The full Bidi class can be found in [1].
    297 //
    298 // [1]: https://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt
    299 TEST(IntlBidi, GetBaseDirection)
    300 {
    301  // Return Neutral as default if empty string is provided.
    302  ASSERT_EQ(Bidi::GetBaseDirection(nullptr), Bidi::BaseDirection::Neutral);
    303 
    304  // White space(WS) is classified as Neutral.
    305  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ")),
    306            Bidi::BaseDirection::Neutral);
    307 
    308  // 000A and 000D are paragraph separators(BS), which are also classified as
    309  // Neutral.
    310  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000A")),
    311            Bidi::BaseDirection::Neutral);
    312  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000D")),
    313            Bidi::BaseDirection::Neutral);
    314 
    315  // 0620..063f are Arabic letters, which is of type AL.
    316  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622")),
    317            Bidi::BaseDirection::RTL);
    318  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" \u0620\u0621\u0622")),
    319            Bidi::BaseDirection::RTL);
    320  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622ABC")),
    321            Bidi::BaseDirection::RTL);
    322 
    323  // First strong character is of English letters.
    324  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC")),
    325            Bidi::BaseDirection::LTR);
    326  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ABC")),
    327            Bidi::BaseDirection::LTR);
    328  ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC\u0620")),
    329            Bidi::BaseDirection::LTR);
    330 }
    331 
    332 }  // namespace mozilla::intl