tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lib.rs (5846B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 use icu_properties::CodePointMapData;
      6 
      7 use unicode_bidi::level::Level;
      8 use unicode_bidi::utf16;
      9 use unicode_bidi::Direction;
     10 
     11 use core::ops::Range;
     12 use core::slice;
     13 
     14 /// LevelRun type to be returned to C++.
     15 /// 32-bit indexes (rather than usize) are sufficient here because Gecko works
     16 /// with 32-bit indexes when collecting the text buffer for a paragraph.
     17 #[repr(C)]
     18 pub struct LevelRun {
     19    start: u32,
     20    length: u32,
     21    level: u8,
     22 }
     23 
     24 /// Bidi object to be exposed to Gecko via FFI.
     25 pub struct UnicodeBidi<'a> {
     26    paragraph_info: utf16::ParagraphBidiInfo<'a>,
     27    resolved: Option<(Vec<Level>, Vec<Range<usize>>)>,
     28 }
     29 
     30 impl UnicodeBidi<'_> {
     31    /// Create a new UnicodeBidi object representing the given text. This creates
     32    /// the unicode-bidi ParagraphBidiInfo struct, and will cache the resolved
     33    /// levels and visual-runs array once created.
     34    /// The caller is responsible to ensure the text buffer remains valid
     35    /// as long as the UnicodeBidi object exists.
     36    fn new<'a>(text: *const u16, length: usize, level: u8) -> Box<Self> {
     37        let text = unsafe { slice::from_raw_parts(text, length) };
     38        let level = if let Ok(level) = Level::new(level) {
     39            Some(level)
     40        } else {
     41            None
     42        };
     43        let adapter = CodePointMapData::<icu_properties::props::BidiClass>::new();
     44        Box::new(UnicodeBidi {
     45            paragraph_info: utf16::ParagraphBidiInfo::new_with_data_source(&adapter, text, level),
     46            resolved: None,
     47        })
     48    }
     49 
     50    #[inline]
     51    fn resolved(&mut self) -> &(Vec<Level>, Vec<Range<usize>>) {
     52        if self.resolved.is_none() {
     53            let len = self.paragraph_info.text.len();
     54            self.resolved = Some(self.paragraph_info.visual_runs(0..len));
     55        }
     56        self.resolved.as_ref().unwrap()
     57    }
     58 }
     59 
     60 /// Create a new UnicodeBidi object for the given text.
     61 /// NOTE that the text buffer must remain valid for the lifetime of this object!
     62 #[no_mangle]
     63 pub extern "C" fn bidi_new<'a>(text: *const u16, length: usize, level: u8) -> *mut UnicodeBidi<'a> {
     64    Box::into_raw(UnicodeBidi::<'a>::new(text, length, level))
     65 }
     66 
     67 /// Destroy the Bidi object.
     68 #[no_mangle]
     69 pub extern "C" fn bidi_destroy(bidi: *mut UnicodeBidi) {
     70    if bidi.is_null() {
     71        return;
     72    }
     73    let _ = unsafe { Box::from_raw(bidi) };
     74 }
     75 
     76 /// Get the length of the text covered by the Bidi object.
     77 #[no_mangle]
     78 pub extern "C" fn bidi_get_length(bidi: &UnicodeBidi) -> i32 {
     79    bidi.paragraph_info.text.len().try_into().unwrap()
     80 }
     81 
     82 /// Get the paragraph direction: LTR=1, RTL=-1, mixed=0.
     83 #[no_mangle]
     84 pub extern "C" fn bidi_get_direction(bidi: &UnicodeBidi) -> i8 {
     85    match bidi.paragraph_info.direction() {
     86        Direction::Mixed => 0,
     87        Direction::Ltr => 1,
     88        Direction::Rtl => -1,
     89    }
     90 }
     91 
     92 /// Get the paragraph level.
     93 #[no_mangle]
     94 pub extern "C" fn bidi_get_paragraph_level(bidi: &UnicodeBidi) -> u8 {
     95    bidi.paragraph_info.paragraph_level.into()
     96 }
     97 
     98 /// Get the number of runs present.
     99 #[no_mangle]
    100 pub extern "C" fn bidi_count_runs(bidi: &mut UnicodeBidi) -> i32 {
    101    if bidi.paragraph_info.text.is_empty() {
    102        return 0;
    103    }
    104    bidi.resolved().1.len().try_into().unwrap()
    105 }
    106 
    107 /// Get a pointer to the Levels array. The resulting pointer is valid only as long as
    108 /// the UnicodeBidi object exists!
    109 #[no_mangle]
    110 pub extern "C" fn bidi_get_levels(bidi: &mut UnicodeBidi) -> *const Level {
    111    bidi.resolved().0.as_ptr()
    112 }
    113 
    114 /// Get the extent of the run at the given index in the visual runs array.
    115 /// This would panic!() if run_index is out of range (see bidi_count_runs),
    116 /// or if the run's start or length exceeds u32::MAX (which cannot happen
    117 /// because Gecko can't create such a huge text buffer).
    118 #[no_mangle]
    119 pub extern "C" fn bidi_get_visual_run(bidi: &mut UnicodeBidi, run_index: u32) -> LevelRun {
    120    let level_runs = &bidi.resolved().1;
    121    let start = level_runs[run_index as usize].start;
    122    let length = level_runs[run_index as usize].end - start;
    123    LevelRun {
    124        start: start.try_into().unwrap(),
    125        length: length.try_into().unwrap(),
    126        level: bidi.resolved().0[start].into(),
    127    }
    128 }
    129 
    130 /// Return index map showing the result of reordering using the given levels array.
    131 /// (This is a generic helper that does not use a UnicodeBidi object, it just takes an
    132 /// arbitrary array of levels.)
    133 #[no_mangle]
    134 pub extern "C" fn bidi_reorder_visual(levels: *const u8, length: usize, index_map: *mut i32) {
    135    let levels = unsafe { slice::from_raw_parts(levels as *const Level, length) };
    136    let result = unsafe { slice::from_raw_parts_mut(index_map, length) };
    137    let reordered = utf16::BidiInfo::reorder_visual(levels);
    138    for i in 0..length {
    139        result[i] = reordered[i].try_into().unwrap();
    140    }
    141 }
    142 
    143 /// Get the base direction for the given text, returning 1 for LTR, -1 for RTL,
    144 /// and 0 for neutral. If first_paragraph is true, only the first paragraph will be considered;
    145 /// if false, subsequent paragraphs may be considered until a non-neutral character is found.
    146 #[no_mangle]
    147 pub extern "C" fn bidi_get_base_direction(
    148    text: *const u16,
    149    length: usize,
    150    first_paragraph: bool,
    151 ) -> i8 {
    152    let text = unsafe { slice::from_raw_parts(text, length) };
    153    let adapter = CodePointMapData::<icu_properties::props::BidiClass>::new();
    154    let direction = if first_paragraph {
    155        unicode_bidi::get_base_direction_with_data_source(&adapter, text)
    156    } else {
    157        unicode_bidi::get_base_direction_full_with_data_source(&adapter, text)
    158    };
    159    match direction {
    160        Direction::Mixed => 0,
    161        Direction::Ltr => 1,
    162        Direction::Rtl => -1,
    163    }
    164 }