mod.rs (15084B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ 4 5 #![allow(unsafe_code)] 6 // This is needed for the constants in atom_macro.rs, because we have some 7 // atoms whose names differ only by case, e.g. datetime and dateTime. 8 #![allow(non_upper_case_globals)] 9 10 //! A drop-in replacement for string_cache, but backed by Gecko `nsAtom`s. 11 12 use crate::gecko_bindings::bindings::Gecko_AddRefAtom; 13 use crate::gecko_bindings::bindings::Gecko_Atomize; 14 use crate::gecko_bindings::bindings::Gecko_Atomize16; 15 use crate::gecko_bindings::bindings::Gecko_ReleaseAtom; 16 use crate::gecko_bindings::structs::root::mozilla::detail::gGkAtoms; 17 use crate::gecko_bindings::structs::root::mozilla::detail::GkAtoms_Atoms_AtomsCount; 18 use crate::gecko_bindings::structs::{nsAtom, nsDynamicAtom, nsStaticAtom}; 19 use nsstring::{nsAString, nsStr}; 20 use precomputed_hash::PrecomputedHash; 21 use serde::{Deserialize, Serialize}; 22 use std::borrow::{Borrow, Cow}; 23 use std::char::{self, DecodeUtf16}; 24 use std::fmt::{self, Write}; 25 use std::hash::{Hash, Hasher}; 26 use std::iter::Cloned; 27 use std::mem::{self, ManuallyDrop}; 28 use std::num::NonZeroUsize; 29 use std::ops::Deref; 30 use std::{slice, str}; 31 use style_traits::SpecifiedValueInfo; 32 use to_shmem::{SharedMemoryBuilder, ToShmem}; 33 34 #[macro_use] 35 #[allow(improper_ctypes, non_camel_case_types, missing_docs)] 36 pub mod atom_macro { 37 include!(concat!(env!("OUT_DIR"), "/gecko/atom_macro.rs")); 38 } 39 40 #[macro_use] 41 pub mod namespace; 42 43 pub use self::namespace::{Namespace, WeakNamespace}; 44 45 /// A handle to a Gecko atom. This is a type that can represent either: 46 /// 47 /// * A strong reference to a dynamic atom (an `nsAtom` pointer), in which case 48 /// the `usize` just holds the pointer value. 49 /// 50 /// * An index from `gGkAtoms` to the `nsStaticAtom` object (shifted to the left one bit, and with 51 /// the lower bit set to `1` to differentiate it from the above), so `(index << 1 | 1)`. 52 /// 53 #[derive(Eq, PartialEq)] 54 #[repr(C)] 55 pub struct Atom(NonZeroUsize); 56 57 impl Serialize for Atom { 58 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> 59 where 60 S: serde::Serializer, 61 { 62 // TODO(dshin, Bug 1929015): Optimization for static atoms is possible. 63 self.deref().with_str(|s| serializer.serialize_str(s)) 64 } 65 } 66 67 struct AtomStrVisitor; 68 impl<'de> serde::de::Visitor<'de> for AtomStrVisitor { 69 type Value = Atom; 70 71 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 72 write!(formatter, "A string to atomize") 73 } 74 75 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E> 76 where 77 E: serde::de::Error, 78 { 79 Ok(Atom::from(s)) 80 } 81 } 82 83 impl<'de> Deserialize<'de> for Atom { 84 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 85 where 86 D: serde::Deserializer<'de>, 87 { 88 deserializer.deserialize_str(AtomStrVisitor) 89 } 90 } 91 92 /// An atom *without* a strong reference. 93 /// 94 /// Only usable as `&'a WeakAtom`, 95 /// where `'a` is the lifetime of something that holds a strong reference to that atom. 96 pub struct WeakAtom(nsAtom); 97 98 /// The number of static atoms we have. 99 const STATIC_ATOM_COUNT: usize = GkAtoms_Atoms_AtomsCount as usize; 100 101 impl Deref for Atom { 102 type Target = WeakAtom; 103 104 #[inline] 105 fn deref(&self) -> &WeakAtom { 106 unsafe { 107 let addr = if self.is_static() { 108 // This is really hot. 109 &gGkAtoms.mAtoms.get_unchecked(self.0.get() >> 1)._base as *const nsAtom 110 } else { 111 self.0.get() as *const nsAtom 112 }; 113 WeakAtom::new(addr as *const nsAtom) 114 } 115 } 116 } 117 118 impl PrecomputedHash for Atom { 119 #[inline] 120 fn precomputed_hash(&self) -> u32 { 121 self.get_hash() 122 } 123 } 124 125 impl Borrow<WeakAtom> for Atom { 126 #[inline] 127 fn borrow(&self) -> &WeakAtom { 128 self 129 } 130 } 131 132 impl ToShmem for Atom { 133 fn to_shmem(&self, _builder: &mut SharedMemoryBuilder) -> to_shmem::Result<Self> { 134 if !self.is_static() { 135 return Err(format!( 136 "ToShmem failed for Atom: must be a static atom: {}", 137 self 138 )); 139 } 140 141 Ok(ManuallyDrop::new(Atom(self.0))) 142 } 143 } 144 145 impl Eq for WeakAtom {} 146 impl PartialEq for WeakAtom { 147 #[inline] 148 fn eq(&self, other: &Self) -> bool { 149 let weak: *const WeakAtom = self; 150 let other: *const WeakAtom = other; 151 weak == other 152 } 153 } 154 155 impl PartialEq<Atom> for WeakAtom { 156 #[inline] 157 fn eq(&self, other: &Atom) -> bool { 158 self == &**other 159 } 160 } 161 162 unsafe impl Send for Atom {} 163 unsafe impl Sync for Atom {} 164 unsafe impl Sync for WeakAtom {} 165 166 impl WeakAtom { 167 /// Construct a `WeakAtom` from a raw `nsAtom`. 168 #[inline] 169 pub unsafe fn new<'a>(atom: *const nsAtom) -> &'a mut Self { 170 &mut *(atom as *mut WeakAtom) 171 } 172 173 /// Clone this atom, bumping the refcount if the atom is not static. 174 #[inline] 175 pub fn clone(&self) -> Atom { 176 unsafe { Atom::from_raw(self.as_ptr()) } 177 } 178 179 /// Get the atom hash. 180 #[inline] 181 pub fn get_hash(&self) -> u32 { 182 self.0.mHash 183 } 184 185 /// Get the atom as a slice of utf-16 chars. 186 #[inline] 187 pub fn as_slice(&self) -> &[u16] { 188 let string = if self.is_static() { 189 let atom_ptr = self.as_ptr() as *const nsStaticAtom; 190 let string_offset = unsafe { (*atom_ptr).mStringOffset }; 191 let string_offset = -(string_offset as isize); 192 let u8_ptr = atom_ptr as *const u8; 193 // It is safe to use offset() here because both addresses are within 194 // the same struct, e.g. mozilla::detail::gGkAtoms. 195 unsafe { u8_ptr.offset(string_offset) as *const u16 } 196 } else { 197 let atom_ptr = self.as_ptr() as *const nsDynamicAtom; 198 let buffer_ptr = unsafe { (*atom_ptr).mStringBuffer.mRawPtr }; 199 // Dynamic atom chars are stored at the end of the string buffer. 200 unsafe { buffer_ptr.offset(1) as *const u16 } 201 }; 202 unsafe { slice::from_raw_parts(string, self.len() as usize) } 203 } 204 205 // NOTE: don't expose this, since it's slow, and easy to be misused. 206 fn chars(&self) -> DecodeUtf16<Cloned<slice::Iter<'_, u16>>> { 207 char::decode_utf16(self.as_slice().iter().cloned()) 208 } 209 210 /// Execute `cb` with the string that this atom represents. 211 /// 212 /// Find alternatives to this function when possible, please, since it's 213 /// pretty slow. 214 pub fn with_str<F, Output>(&self, cb: F) -> Output 215 where 216 F: FnOnce(&str) -> Output, 217 { 218 let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit(); 219 let buffer = unsafe { &mut *buffer.as_mut_ptr() }; 220 221 // The total string length in utf16 is going to be less than or equal 222 // the slice length (each utf16 character is going to take at least one 223 // and at most 2 items in the utf16 slice). 224 // 225 // Each of those characters will take at most four bytes in the utf8 226 // one. Thus if the slice is less than 64 / 4 (16) we can guarantee that 227 // we'll decode it in place. 228 let owned_string; 229 let len = self.len(); 230 let utf8_slice = if len <= 16 { 231 let mut total_len = 0; 232 233 for c in self.chars() { 234 let c = c.unwrap_or(char::REPLACEMENT_CHARACTER); 235 let utf8_len = c.encode_utf8(&mut buffer[total_len..]).len(); 236 total_len += utf8_len; 237 } 238 239 let slice = unsafe { str::from_utf8_unchecked(&buffer[..total_len]) }; 240 debug_assert_eq!(slice, String::from_utf16_lossy(self.as_slice())); 241 slice 242 } else { 243 owned_string = String::from_utf16_lossy(self.as_slice()); 244 &*owned_string 245 }; 246 247 cb(utf8_slice) 248 } 249 250 /// Returns whether this atom is static. 251 #[inline] 252 pub fn is_static(&self) -> bool { 253 self.0.mIsStatic() != 0 254 } 255 256 /// Returns whether this atom is ascii lowercase. 257 #[inline] 258 fn is_ascii_lowercase(&self) -> bool { 259 self.0.mIsAsciiLowercase() != 0 260 } 261 262 /// Returns the length of the atom string. 263 #[inline] 264 pub fn len(&self) -> u32 { 265 self.0.mLength() 266 } 267 268 /// Returns whether this atom is the empty string. 269 #[inline] 270 pub fn is_empty(&self) -> bool { 271 self.len() == 0 272 } 273 274 /// Returns the atom as a mutable pointer. 275 #[inline] 276 pub fn as_ptr(&self) -> *mut nsAtom { 277 let const_ptr: *const nsAtom = &self.0; 278 const_ptr as *mut nsAtom 279 } 280 281 /// Convert this atom to ASCII lower-case 282 pub fn to_ascii_lowercase(&self) -> Atom { 283 if self.is_ascii_lowercase() { 284 return self.clone(); 285 } 286 287 let slice = self.as_slice(); 288 let mut buffer = mem::MaybeUninit::<[u16; 64]>::uninit(); 289 let buffer = unsafe { &mut *buffer.as_mut_ptr() }; 290 let mut vec; 291 let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) { 292 buffer_prefix.copy_from_slice(slice); 293 buffer_prefix 294 } else { 295 vec = slice.to_vec(); 296 &mut vec 297 }; 298 for char16 in &mut *mutable_slice { 299 if *char16 <= 0x7F { 300 *char16 = (*char16 as u8).to_ascii_lowercase() as u16 301 } 302 } 303 Atom::from(&*mutable_slice) 304 } 305 306 /// Return whether two atoms are ASCII-case-insensitive matches 307 #[inline] 308 pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { 309 if self == other { 310 return true; 311 } 312 313 // If we know both atoms are ascii-lowercase, then we can stick with 314 // pointer equality. 315 if self.is_ascii_lowercase() && other.is_ascii_lowercase() { 316 debug_assert!(!self.eq_ignore_ascii_case_slow(other)); 317 return false; 318 } 319 320 self.eq_ignore_ascii_case_slow(other) 321 } 322 323 fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool { 324 let a = self.as_slice(); 325 let b = other.as_slice(); 326 327 if a.len() != b.len() { 328 return false; 329 } 330 331 a.iter().zip(b).all(|(&a16, &b16)| { 332 if a16 <= 0x7F && b16 <= 0x7F { 333 (a16 as u8).eq_ignore_ascii_case(&(b16 as u8)) 334 } else { 335 a16 == b16 336 } 337 }) 338 } 339 } 340 341 impl fmt::Debug for WeakAtom { 342 fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { 343 write!(w, "Gecko WeakAtom({:p}, {})", self, self) 344 } 345 } 346 347 impl fmt::Display for WeakAtom { 348 fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { 349 for c in self.chars() { 350 w.write_char(c.unwrap_or(char::REPLACEMENT_CHARACTER))? 351 } 352 Ok(()) 353 } 354 } 355 356 #[inline] 357 unsafe fn make_handle(ptr: *const nsAtom) -> NonZeroUsize { 358 debug_assert!(!ptr.is_null()); 359 if !WeakAtom::new(ptr).is_static() { 360 NonZeroUsize::new_unchecked(ptr as usize) 361 } else { 362 make_static_handle(ptr as *mut nsStaticAtom) 363 } 364 } 365 366 #[inline] 367 unsafe fn make_static_handle(ptr: *const nsStaticAtom) -> NonZeroUsize { 368 let index = ptr.offset_from(&gGkAtoms.mAtoms[0] as *const _); 369 debug_assert!(index >= 0, "Should be a non-negative index"); 370 debug_assert!( 371 (index as usize) < STATIC_ATOM_COUNT, 372 "Should be a valid static atom index" 373 ); 374 NonZeroUsize::new_unchecked(((index as usize) << 1) | 1) 375 } 376 377 impl Atom { 378 #[inline] 379 fn is_static(&self) -> bool { 380 self.0.get() & 1 == 1 381 } 382 383 /// Execute a callback with the atom represented by `ptr`. 384 pub unsafe fn with<F, R>(ptr: *const nsAtom, callback: F) -> R 385 where 386 F: FnOnce(&Atom) -> R, 387 { 388 let atom = Atom(make_handle(ptr as *mut nsAtom)); 389 let ret = callback(&atom); 390 mem::forget(atom); 391 ret 392 } 393 394 /// Creates a static atom from its index in the static atom table, without 395 /// checking. 396 #[inline] 397 pub const unsafe fn from_index_unchecked(index: u16) -> Self { 398 debug_assert!((index as usize) < STATIC_ATOM_COUNT); 399 Atom(NonZeroUsize::new_unchecked(((index as usize) << 1) | 1)) 400 } 401 402 /// Creates an atom from an atom pointer. 403 #[inline(always)] 404 pub unsafe fn from_raw(ptr: *mut nsAtom) -> Self { 405 let atom = Atom(make_handle(ptr)); 406 if !atom.is_static() { 407 Gecko_AddRefAtom(ptr); 408 } 409 atom 410 } 411 412 /// Creates an atom from an atom pointer that has already had AddRef 413 /// called on it. This may be a static or dynamic atom. 414 #[inline] 415 pub unsafe fn from_addrefed(ptr: *mut nsAtom) -> Self { 416 assert!(!ptr.is_null()); 417 Atom(make_handle(ptr)) 418 } 419 420 /// Convert this atom into an addrefed nsAtom pointer. 421 #[inline] 422 pub fn into_addrefed(self) -> *mut nsAtom { 423 let ptr = self.as_ptr(); 424 mem::forget(self); 425 ptr 426 } 427 } 428 429 impl Hash for Atom { 430 fn hash<H>(&self, state: &mut H) 431 where 432 H: Hasher, 433 { 434 state.write_u32(self.get_hash()); 435 } 436 } 437 438 impl Hash for WeakAtom { 439 fn hash<H>(&self, state: &mut H) 440 where 441 H: Hasher, 442 { 443 state.write_u32(self.get_hash()); 444 } 445 } 446 447 impl Clone for Atom { 448 #[inline(always)] 449 fn clone(&self) -> Atom { 450 unsafe { 451 let atom = Atom(self.0); 452 if !atom.is_static() { 453 Gecko_AddRefAtom(atom.as_ptr()); 454 } 455 atom 456 } 457 } 458 } 459 460 impl Drop for Atom { 461 #[inline] 462 fn drop(&mut self) { 463 if !self.is_static() { 464 unsafe { 465 Gecko_ReleaseAtom(self.as_ptr()); 466 } 467 } 468 } 469 } 470 471 impl Default for Atom { 472 #[inline] 473 fn default() -> Self { 474 atom!("") 475 } 476 } 477 478 impl fmt::Debug for Atom { 479 fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { 480 write!(w, "Atom(0x{:08x}, {})", self.0, self) 481 } 482 } 483 484 impl fmt::Display for Atom { 485 fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { 486 self.deref().fmt(w) 487 } 488 } 489 490 impl<'a> From<&'a str> for Atom { 491 #[inline] 492 fn from(string: &str) -> Atom { 493 debug_assert!(string.len() <= u32::max_value() as usize); 494 unsafe { 495 Atom::from_addrefed(Gecko_Atomize( 496 string.as_ptr() as *const _, 497 string.len() as u32, 498 )) 499 } 500 } 501 } 502 503 impl<'a> From<&'a [u16]> for Atom { 504 #[inline] 505 fn from(slice: &[u16]) -> Atom { 506 Atom::from(&*nsStr::from(slice)) 507 } 508 } 509 510 impl<'a> From<&'a nsAString> for Atom { 511 #[inline] 512 fn from(string: &nsAString) -> Atom { 513 unsafe { Atom::from_addrefed(Gecko_Atomize16(string)) } 514 } 515 } 516 517 impl<'a> From<Cow<'a, str>> for Atom { 518 #[inline] 519 fn from(string: Cow<'a, str>) -> Atom { 520 Atom::from(&*string) 521 } 522 } 523 524 impl From<String> for Atom { 525 #[inline] 526 fn from(string: String) -> Atom { 527 Atom::from(&*string) 528 } 529 } 530 531 malloc_size_of::malloc_size_of_is_0!(Atom); 532 533 impl SpecifiedValueInfo for Atom {}