tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Bidi.h (6474B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #ifndef intl_components_Bidi_h_
      5 #define intl_components_Bidi_h_
      6 
      7 #include "mozilla/intl/BidiEmbeddingLevel.h"
      8 #include "mozilla/intl/ICU4CGlue.h"
      9 
     10 // Use the Rust unicode-bidi crate to back the Bidi component.
     11 // (Define to 0 to use the legacy ICU4C implementation instead,
     12 // until that code is removed altogether.)
     13 #define USE_RUST_UNICODE_BIDI 1
     14 
     15 #if USE_RUST_UNICODE_BIDI
     16 #  include "mozilla/intl/unicode_bidi_ffi_generated.h"
     17 #else
     18 struct UBiDi;
     19 #endif
     20 
     21 namespace mozilla::intl {
     22 
     23 /**
     24 * This component is a Mozilla-focused API for working with bidirectional (bidi)
     25 * text. Text is commonly displayed left to right (LTR), especially for
     26 * Latin-based alphabets. However, languages like Arabic and Hebrew displays
     27 * text right to left (RTL). When displaying text, LTR and RTL text can be
     28 * combined together in the same paragraph. This class gives tools for working
     29 * with unidirectional, and mixed direction paragraphs.
     30 *
     31 * See the Unicode Bidirectional Algorithm document for implementation details:
     32 * https://unicode.org/reports/tr9/
     33 */
     34 class Bidi final {
     35 public:
     36  Bidi();
     37  ~Bidi();
     38 
     39  // Not copyable or movable
     40  Bidi(const Bidi&) = delete;
     41  Bidi& operator=(const Bidi&) = delete;
     42 
     43  /**
     44   * This enum indicates the text direction for the set paragraph. Some
     45   * paragraphs are unidirectional, where they only have one direction, or a
     46   * paragraph could use both LTR and RTL. In this case the paragraph's
     47   * direction would be mixed.
     48   */
     49  enum class ParagraphDirection { LTR, RTL, Mixed };
     50 
     51  /**
     52   * Set the current paragraph of text to analyze for its bidi properties. This
     53   * performs the Unicode bidi algorithm as specified by:
     54   * https://unicode.org/reports/tr9/
     55   *
     56   * After setting the text, the other getter methods can be used to find out
     57   * the directionality of the paragraph text.
     58   */
     59  ICUResult SetParagraph(Span<const char16_t> aParagraph,
     60                         BidiEmbeddingLevel aLevel);
     61 
     62  /**
     63   * Get the embedding level for the paragraph that was set by SetParagraph.
     64   */
     65  BidiEmbeddingLevel GetParagraphEmbeddingLevel() const;
     66 
     67  /**
     68   * Get the directionality of the paragraph text that was set by SetParagraph.
     69   */
     70  ParagraphDirection GetParagraphDirection() const;
     71 
     72  /**
     73   * Get the number of runs. This function may invoke the actual reordering on
     74   * the Bidi object, after SetParagraph may have resolved only the levels of
     75   * the text. Therefore, `CountRuns` may have to allocate memory, and may fail
     76   * doing so.
     77   */
     78  Result<int32_t, ICUError> CountRuns();
     79 
     80  /**
     81   * Get the next logical run. The logical runs are a run of text that has the
     82   * same directionality and embedding level. These runs are in memory order,
     83   * and not in display order.
     84   *
     85   * Important! `Bidi::CountRuns` must be called before calling this method.
     86   *
     87   * @param aLogicalStart is the offset into the paragraph text that marks the
     88   *      logical start of the text.
     89   * @param aLogicalLimitOut is an out param that is the length of the string
     90   *      that makes up the logical run.
     91   * @param aLevelOut is an out parameter that returns the embedding level for
     92   *      the run
     93   */
     94  void GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut,
     95                     BidiEmbeddingLevel* aLevelOut);
     96 
     97  /**
     98   * This is a convenience function that does not use the ICU Bidi object.
     99   * It is intended to be used for when an application has determined the
    100   * embedding levels of objects (character sequences) and just needs to have
    101   * them reordered (L2).
    102   *
    103   * @param aLevels is an array with `aLength` levels that have been
    104   *      determined by the application.
    105   *
    106   * @param aLength is the number of levels in the array, or, semantically,
    107   *      the number of objects to be reordered. It must be greater than 0.
    108   *
    109   * @param aIndexMap is a pointer to an array of `aLength`
    110   *      indexes which will reflect the reordering of the characters.
    111   *      The array does not need to be initialized.
    112   *      The index map will result in
    113   *        `aIndexMap[aVisualIndex]==aLogicalIndex`.
    114   */
    115  static void ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength,
    116                            int32_t* aIndexMap);
    117 
    118  /**
    119   * This enum indicates the bidi character type of the first strong character
    120   * for the set paragraph.
    121   * LTR: bidi character type 'L'.
    122   * RTL: bidi character type 'R' or 'AL'.
    123   * Neutral: The rest of bidi character types.
    124   */
    125  enum class BaseDirection { LTR, RTL, Neutral };
    126 
    127  /**
    128   * Get the base direction of the text.
    129   */
    130  static BaseDirection GetBaseDirection(Span<const char16_t> aText);
    131 
    132  /**
    133   * Get one run's logical start, length, and directionality. In an RTL run, the
    134   * character at the logical start is visually on the right of the displayed
    135   * run. The length is the number of characters in the run.
    136   * `Bidi::CountRuns` should be called before the runs are retrieved.
    137   *
    138   * @param aRunIndex is the number of the run in visual order, in the
    139   *      range `[0..CountRuns-1]`.
    140   *
    141   * @param aLogicalStart is the first logical character index in the text.
    142   *      The pointer may be `nullptr` if this index is not needed.
    143   *
    144   * @param aLength is the number of characters (at least one) in the run.
    145   *      The pointer may be `nullptr` if this is not needed.
    146   *
    147   * Note that in right-to-left runs, the code places modifier letters before
    148   * base characters and second surrogates before first ones.
    149   */
    150  BidiDirection GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart,
    151                             int32_t* aLength);
    152 
    153 private:
    154 #if USE_RUST_UNICODE_BIDI
    155  using UnicodeBidi = mozilla::intl::ffi::UnicodeBidi;
    156  struct BidiFreePolicy {
    157    void operator()(void* aPtr) {
    158      bidi_destroy(static_cast<UnicodeBidi*>(aPtr));
    159    }
    160  };
    161  mozilla::UniquePtr<UnicodeBidi, BidiFreePolicy> mBidi;
    162 #else
    163  ICUPointer<UBiDi> mBidi = ICUPointer<UBiDi>(nullptr);
    164 
    165  /**
    166   * An array of levels that is the same length as the paragraph from
    167   * `Bidi::SetParagraph`.
    168   */
    169  const BidiEmbeddingLevel* mLevels = nullptr;
    170 
    171  /**
    172   * The length of the paragraph from `Bidi::SetParagraph`.
    173   */
    174  int32_t mLength = 0;
    175 #endif
    176 };
    177 
    178 }  // namespace mozilla::intl
    179 #endif