tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mod.rs (15084B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 #![allow(unsafe_code)]
      6 // This is needed for the constants in atom_macro.rs, because we have some
      7 // atoms whose names differ only by case, e.g. datetime and dateTime.
      8 #![allow(non_upper_case_globals)]
      9 
     10 //! A drop-in replacement for string_cache, but backed by Gecko `nsAtom`s.
     11 
     12 use crate::gecko_bindings::bindings::Gecko_AddRefAtom;
     13 use crate::gecko_bindings::bindings::Gecko_Atomize;
     14 use crate::gecko_bindings::bindings::Gecko_Atomize16;
     15 use crate::gecko_bindings::bindings::Gecko_ReleaseAtom;
     16 use crate::gecko_bindings::structs::root::mozilla::detail::gGkAtoms;
     17 use crate::gecko_bindings::structs::root::mozilla::detail::GkAtoms_Atoms_AtomsCount;
     18 use crate::gecko_bindings::structs::{nsAtom, nsDynamicAtom, nsStaticAtom};
     19 use nsstring::{nsAString, nsStr};
     20 use precomputed_hash::PrecomputedHash;
     21 use serde::{Deserialize, Serialize};
     22 use std::borrow::{Borrow, Cow};
     23 use std::char::{self, DecodeUtf16};
     24 use std::fmt::{self, Write};
     25 use std::hash::{Hash, Hasher};
     26 use std::iter::Cloned;
     27 use std::mem::{self, ManuallyDrop};
     28 use std::num::NonZeroUsize;
     29 use std::ops::Deref;
     30 use std::{slice, str};
     31 use style_traits::SpecifiedValueInfo;
     32 use to_shmem::{SharedMemoryBuilder, ToShmem};
     33 
     34 #[macro_use]
     35 #[allow(improper_ctypes, non_camel_case_types, missing_docs)]
     36 pub mod atom_macro {
     37    include!(concat!(env!("OUT_DIR"), "/gecko/atom_macro.rs"));
     38 }
     39 
     40 #[macro_use]
     41 pub mod namespace;
     42 
     43 pub use self::namespace::{Namespace, WeakNamespace};
     44 
     45 /// A handle to a Gecko atom. This is a type that can represent either:
     46 ///
     47 ///  * A strong reference to a dynamic atom (an `nsAtom` pointer), in which case
     48 ///    the `usize` just holds the pointer value.
     49 ///
     50 ///  * An index from `gGkAtoms` to the `nsStaticAtom` object (shifted to the left one bit, and with
     51 ///    the lower bit set to `1` to differentiate it from the above), so `(index << 1 | 1)`.
     52 ///
     53 #[derive(Eq, PartialEq)]
     54 #[repr(C)]
     55 pub struct Atom(NonZeroUsize);
     56 
     57 impl Serialize for Atom {
     58    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     59    where
     60        S: serde::Serializer,
     61    {
     62        // TODO(dshin, Bug 1929015): Optimization for static atoms is possible.
     63        self.deref().with_str(|s| serializer.serialize_str(s))
     64    }
     65 }
     66 
     67 struct AtomStrVisitor;
     68 impl<'de> serde::de::Visitor<'de> for AtomStrVisitor {
     69    type Value = Atom;
     70 
     71    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
     72        write!(formatter, "A string to atomize")
     73    }
     74 
     75    fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
     76    where
     77        E: serde::de::Error,
     78    {
     79        Ok(Atom::from(s))
     80    }
     81 }
     82 
     83 impl<'de> Deserialize<'de> for Atom {
     84    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     85    where
     86        D: serde::Deserializer<'de>,
     87    {
     88        deserializer.deserialize_str(AtomStrVisitor)
     89    }
     90 }
     91 
     92 /// An atom *without* a strong reference.
     93 ///
     94 /// Only usable as `&'a WeakAtom`,
     95 /// where `'a` is the lifetime of something that holds a strong reference to that atom.
     96 pub struct WeakAtom(nsAtom);
     97 
     98 /// The number of static atoms we have.
     99 const STATIC_ATOM_COUNT: usize = GkAtoms_Atoms_AtomsCount as usize;
    100 
    101 impl Deref for Atom {
    102    type Target = WeakAtom;
    103 
    104    #[inline]
    105    fn deref(&self) -> &WeakAtom {
    106        unsafe {
    107            let addr = if self.is_static() {
    108                // This is really hot.
    109                &gGkAtoms.mAtoms.get_unchecked(self.0.get() >> 1)._base as *const nsAtom
    110            } else {
    111                self.0.get() as *const nsAtom
    112            };
    113            WeakAtom::new(addr as *const nsAtom)
    114        }
    115    }
    116 }
    117 
    118 impl PrecomputedHash for Atom {
    119    #[inline]
    120    fn precomputed_hash(&self) -> u32 {
    121        self.get_hash()
    122    }
    123 }
    124 
    125 impl Borrow<WeakAtom> for Atom {
    126    #[inline]
    127    fn borrow(&self) -> &WeakAtom {
    128        self
    129    }
    130 }
    131 
    132 impl ToShmem for Atom {
    133    fn to_shmem(&self, _builder: &mut SharedMemoryBuilder) -> to_shmem::Result<Self> {
    134        if !self.is_static() {
    135            return Err(format!(
    136                "ToShmem failed for Atom: must be a static atom: {}",
    137                self
    138            ));
    139        }
    140 
    141        Ok(ManuallyDrop::new(Atom(self.0)))
    142    }
    143 }
    144 
    145 impl Eq for WeakAtom {}
    146 impl PartialEq for WeakAtom {
    147    #[inline]
    148    fn eq(&self, other: &Self) -> bool {
    149        let weak: *const WeakAtom = self;
    150        let other: *const WeakAtom = other;
    151        weak == other
    152    }
    153 }
    154 
    155 impl PartialEq<Atom> for WeakAtom {
    156    #[inline]
    157    fn eq(&self, other: &Atom) -> bool {
    158        self == &**other
    159    }
    160 }
    161 
    162 unsafe impl Send for Atom {}
    163 unsafe impl Sync for Atom {}
    164 unsafe impl Sync for WeakAtom {}
    165 
    166 impl WeakAtom {
    167    /// Construct a `WeakAtom` from a raw `nsAtom`.
    168    #[inline]
    169    pub unsafe fn new<'a>(atom: *const nsAtom) -> &'a mut Self {
    170        &mut *(atom as *mut WeakAtom)
    171    }
    172 
    173    /// Clone this atom, bumping the refcount if the atom is not static.
    174    #[inline]
    175    pub fn clone(&self) -> Atom {
    176        unsafe { Atom::from_raw(self.as_ptr()) }
    177    }
    178 
    179    /// Get the atom hash.
    180    #[inline]
    181    pub fn get_hash(&self) -> u32 {
    182        self.0.mHash
    183    }
    184 
    185    /// Get the atom as a slice of utf-16 chars.
    186    #[inline]
    187    pub fn as_slice(&self) -> &[u16] {
    188        let string = if self.is_static() {
    189            let atom_ptr = self.as_ptr() as *const nsStaticAtom;
    190            let string_offset = unsafe { (*atom_ptr).mStringOffset };
    191            let string_offset = -(string_offset as isize);
    192            let u8_ptr = atom_ptr as *const u8;
    193            // It is safe to use offset() here because both addresses are within
    194            // the same struct, e.g. mozilla::detail::gGkAtoms.
    195            unsafe { u8_ptr.offset(string_offset) as *const u16 }
    196        } else {
    197            let atom_ptr = self.as_ptr() as *const nsDynamicAtom;
    198            let buffer_ptr = unsafe { (*atom_ptr).mStringBuffer.mRawPtr };
    199            // Dynamic atom chars are stored at the end of the string buffer.
    200            unsafe { buffer_ptr.offset(1) as *const u16 }
    201        };
    202        unsafe { slice::from_raw_parts(string, self.len() as usize) }
    203    }
    204 
    205    // NOTE: don't expose this, since it's slow, and easy to be misused.
    206    fn chars(&self) -> DecodeUtf16<Cloned<slice::Iter<'_, u16>>> {
    207        char::decode_utf16(self.as_slice().iter().cloned())
    208    }
    209 
    210    /// Execute `cb` with the string that this atom represents.
    211    ///
    212    /// Find alternatives to this function when possible, please, since it's
    213    /// pretty slow.
    214    pub fn with_str<F, Output>(&self, cb: F) -> Output
    215    where
    216        F: FnOnce(&str) -> Output,
    217    {
    218        let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
    219        let buffer = unsafe { &mut *buffer.as_mut_ptr() };
    220 
    221        // The total string length in utf16 is going to be less than or equal
    222        // the slice length (each utf16 character is going to take at least one
    223        // and at most 2 items in the utf16 slice).
    224        //
    225        // Each of those characters will take at most four bytes in the utf8
    226        // one. Thus if the slice is less than 64 / 4 (16) we can guarantee that
    227        // we'll decode it in place.
    228        let owned_string;
    229        let len = self.len();
    230        let utf8_slice = if len <= 16 {
    231            let mut total_len = 0;
    232 
    233            for c in self.chars() {
    234                let c = c.unwrap_or(char::REPLACEMENT_CHARACTER);
    235                let utf8_len = c.encode_utf8(&mut buffer[total_len..]).len();
    236                total_len += utf8_len;
    237            }
    238 
    239            let slice = unsafe { str::from_utf8_unchecked(&buffer[..total_len]) };
    240            debug_assert_eq!(slice, String::from_utf16_lossy(self.as_slice()));
    241            slice
    242        } else {
    243            owned_string = String::from_utf16_lossy(self.as_slice());
    244            &*owned_string
    245        };
    246 
    247        cb(utf8_slice)
    248    }
    249 
    250    /// Returns whether this atom is static.
    251    #[inline]
    252    pub fn is_static(&self) -> bool {
    253        self.0.mIsStatic() != 0
    254    }
    255 
    256    /// Returns whether this atom is ascii lowercase.
    257    #[inline]
    258    fn is_ascii_lowercase(&self) -> bool {
    259        self.0.mIsAsciiLowercase() != 0
    260    }
    261 
    262    /// Returns the length of the atom string.
    263    #[inline]
    264    pub fn len(&self) -> u32 {
    265        self.0.mLength()
    266    }
    267 
    268    /// Returns whether this atom is the empty string.
    269    #[inline]
    270    pub fn is_empty(&self) -> bool {
    271        self.len() == 0
    272    }
    273 
    274    /// Returns the atom as a mutable pointer.
    275    #[inline]
    276    pub fn as_ptr(&self) -> *mut nsAtom {
    277        let const_ptr: *const nsAtom = &self.0;
    278        const_ptr as *mut nsAtom
    279    }
    280 
    281    /// Convert this atom to ASCII lower-case
    282    pub fn to_ascii_lowercase(&self) -> Atom {
    283        if self.is_ascii_lowercase() {
    284            return self.clone();
    285        }
    286 
    287        let slice = self.as_slice();
    288        let mut buffer = mem::MaybeUninit::<[u16; 64]>::uninit();
    289        let buffer = unsafe { &mut *buffer.as_mut_ptr() };
    290        let mut vec;
    291        let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
    292            buffer_prefix.copy_from_slice(slice);
    293            buffer_prefix
    294        } else {
    295            vec = slice.to_vec();
    296            &mut vec
    297        };
    298        for char16 in &mut *mutable_slice {
    299            if *char16 <= 0x7F {
    300                *char16 = (*char16 as u8).to_ascii_lowercase() as u16
    301            }
    302        }
    303        Atom::from(&*mutable_slice)
    304    }
    305 
    306    /// Return whether two atoms are ASCII-case-insensitive matches
    307    #[inline]
    308    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
    309        if self == other {
    310            return true;
    311        }
    312 
    313        // If we know both atoms are ascii-lowercase, then we can stick with
    314        // pointer equality.
    315        if self.is_ascii_lowercase() && other.is_ascii_lowercase() {
    316            debug_assert!(!self.eq_ignore_ascii_case_slow(other));
    317            return false;
    318        }
    319 
    320        self.eq_ignore_ascii_case_slow(other)
    321    }
    322 
    323    fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool {
    324        let a = self.as_slice();
    325        let b = other.as_slice();
    326 
    327        if a.len() != b.len() {
    328            return false;
    329        }
    330 
    331        a.iter().zip(b).all(|(&a16, &b16)| {
    332            if a16 <= 0x7F && b16 <= 0x7F {
    333                (a16 as u8).eq_ignore_ascii_case(&(b16 as u8))
    334            } else {
    335                a16 == b16
    336            }
    337        })
    338    }
    339 }
    340 
    341 impl fmt::Debug for WeakAtom {
    342    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
    343        write!(w, "Gecko WeakAtom({:p}, {})", self, self)
    344    }
    345 }
    346 
    347 impl fmt::Display for WeakAtom {
    348    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
    349        for c in self.chars() {
    350            w.write_char(c.unwrap_or(char::REPLACEMENT_CHARACTER))?
    351        }
    352        Ok(())
    353    }
    354 }
    355 
    356 #[inline]
    357 unsafe fn make_handle(ptr: *const nsAtom) -> NonZeroUsize {
    358    debug_assert!(!ptr.is_null());
    359    if !WeakAtom::new(ptr).is_static() {
    360        NonZeroUsize::new_unchecked(ptr as usize)
    361    } else {
    362        make_static_handle(ptr as *mut nsStaticAtom)
    363    }
    364 }
    365 
    366 #[inline]
    367 unsafe fn make_static_handle(ptr: *const nsStaticAtom) -> NonZeroUsize {
    368    let index = ptr.offset_from(&gGkAtoms.mAtoms[0] as *const _);
    369    debug_assert!(index >= 0, "Should be a non-negative index");
    370    debug_assert!(
    371        (index as usize) < STATIC_ATOM_COUNT,
    372        "Should be a valid static atom index"
    373    );
    374    NonZeroUsize::new_unchecked(((index as usize) << 1) | 1)
    375 }
    376 
    377 impl Atom {
    378    #[inline]
    379    fn is_static(&self) -> bool {
    380        self.0.get() & 1 == 1
    381    }
    382 
    383    /// Execute a callback with the atom represented by `ptr`.
    384    pub unsafe fn with<F, R>(ptr: *const nsAtom, callback: F) -> R
    385    where
    386        F: FnOnce(&Atom) -> R,
    387    {
    388        let atom = Atom(make_handle(ptr as *mut nsAtom));
    389        let ret = callback(&atom);
    390        mem::forget(atom);
    391        ret
    392    }
    393 
    394    /// Creates a static atom from its index in the static atom table, without
    395    /// checking.
    396    #[inline]
    397    pub const unsafe fn from_index_unchecked(index: u16) -> Self {
    398        debug_assert!((index as usize) < STATIC_ATOM_COUNT);
    399        Atom(NonZeroUsize::new_unchecked(((index as usize) << 1) | 1))
    400    }
    401 
    402    /// Creates an atom from an atom pointer.
    403    #[inline(always)]
    404    pub unsafe fn from_raw(ptr: *mut nsAtom) -> Self {
    405        let atom = Atom(make_handle(ptr));
    406        if !atom.is_static() {
    407            Gecko_AddRefAtom(ptr);
    408        }
    409        atom
    410    }
    411 
    412    /// Creates an atom from an atom pointer that has already had AddRef
    413    /// called on it. This may be a static or dynamic atom.
    414    #[inline]
    415    pub unsafe fn from_addrefed(ptr: *mut nsAtom) -> Self {
    416        assert!(!ptr.is_null());
    417        Atom(make_handle(ptr))
    418    }
    419 
    420    /// Convert this atom into an addrefed nsAtom pointer.
    421    #[inline]
    422    pub fn into_addrefed(self) -> *mut nsAtom {
    423        let ptr = self.as_ptr();
    424        mem::forget(self);
    425        ptr
    426    }
    427 }
    428 
    429 impl Hash for Atom {
    430    fn hash<H>(&self, state: &mut H)
    431    where
    432        H: Hasher,
    433    {
    434        state.write_u32(self.get_hash());
    435    }
    436 }
    437 
    438 impl Hash for WeakAtom {
    439    fn hash<H>(&self, state: &mut H)
    440    where
    441        H: Hasher,
    442    {
    443        state.write_u32(self.get_hash());
    444    }
    445 }
    446 
    447 impl Clone for Atom {
    448    #[inline(always)]
    449    fn clone(&self) -> Atom {
    450        unsafe {
    451            let atom = Atom(self.0);
    452            if !atom.is_static() {
    453                Gecko_AddRefAtom(atom.as_ptr());
    454            }
    455            atom
    456        }
    457    }
    458 }
    459 
    460 impl Drop for Atom {
    461    #[inline]
    462    fn drop(&mut self) {
    463        if !self.is_static() {
    464            unsafe {
    465                Gecko_ReleaseAtom(self.as_ptr());
    466            }
    467        }
    468    }
    469 }
    470 
    471 impl Default for Atom {
    472    #[inline]
    473    fn default() -> Self {
    474        atom!("")
    475    }
    476 }
    477 
    478 impl fmt::Debug for Atom {
    479    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
    480        write!(w, "Atom(0x{:08x}, {})", self.0, self)
    481    }
    482 }
    483 
    484 impl fmt::Display for Atom {
    485    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
    486        self.deref().fmt(w)
    487    }
    488 }
    489 
    490 impl<'a> From<&'a str> for Atom {
    491    #[inline]
    492    fn from(string: &str) -> Atom {
    493        debug_assert!(string.len() <= u32::max_value() as usize);
    494        unsafe {
    495            Atom::from_addrefed(Gecko_Atomize(
    496                string.as_ptr() as *const _,
    497                string.len() as u32,
    498            ))
    499        }
    500    }
    501 }
    502 
    503 impl<'a> From<&'a [u16]> for Atom {
    504    #[inline]
    505    fn from(slice: &[u16]) -> Atom {
    506        Atom::from(&*nsStr::from(slice))
    507    }
    508 }
    509 
    510 impl<'a> From<&'a nsAString> for Atom {
    511    #[inline]
    512    fn from(string: &nsAString) -> Atom {
    513        unsafe { Atom::from_addrefed(Gecko_Atomize16(string)) }
    514    }
    515 }
    516 
    517 impl<'a> From<Cow<'a, str>> for Atom {
    518    #[inline]
    519    fn from(string: Cow<'a, str>) -> Atom {
    520        Atom::from(&*string)
    521    }
    522 }
    523 
    524 impl From<String> for Atom {
    525    #[inline]
    526    fn from(string: String) -> Atom {
    527        Atom::from(&*string)
    528    }
    529 }
    530 
    531 malloc_size_of::malloc_size_of_is_0!(Atom);
    532 
    533 impl SpecifiedValueInfo for Atom {}