lib.rs (19621B)
1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT 2 // file at the top-level directory of this distribution. 3 // 4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license 6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your 7 // option. This file may not be copied, modified, or distributed 8 // except according to those terms. 9 10 // Adapted from third_party/rust/encoding_rs/src/lib.rs, so the 11 // "top-level directory" in the above notice refers to 12 // third_party/rust/encoding_rs/. 13 14 extern crate encoding_rs; 15 extern crate nserror; 16 extern crate nsstring; 17 extern crate xmldecl; 18 19 use encoding_rs::*; 20 use nserror::*; 21 use nsstring::*; 22 use std::slice; 23 24 /// Takes `Option<usize>`, the destination string and a value 25 /// to return on failure and tries to start a bulk write of the 26 /// destination string with the capacity given by the `usize` 27 /// wrapped in the first argument. Returns the bulk write 28 /// handle. 29 macro_rules! try_start_bulk_write { 30 ($needed:expr, 31 $dst:ident, 32 $ret:expr) => {{ 33 let needed = match $needed { 34 Some(needed) => needed, 35 None => { 36 return $ret; 37 } 38 }; 39 match unsafe { $dst.bulk_write(needed, 0, false) } { 40 Err(_) => { 41 return $ret; 42 } 43 Ok(handle) => handle, 44 } 45 }}; 46 } 47 48 #[no_mangle] 49 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring( 50 encoding: *mut *const Encoding, 51 src: *const u8, 52 src_len: usize, 53 dst: *mut nsAString, 54 ) -> nsresult { 55 let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst); 56 *encoding = enc as *const Encoding; 57 rv 58 } 59 60 pub fn decode_to_nsstring( 61 encoding: &'static Encoding, 62 src: &[u8], 63 dst: &mut nsAString, 64 ) -> (nsresult, &'static Encoding) { 65 if let Some((enc, bom_length)) = Encoding::for_bom(src) { 66 return ( 67 decode_to_nsstring_without_bom_handling(enc, &src[bom_length..], dst), 68 enc, 69 ); 70 } 71 ( 72 decode_to_nsstring_without_bom_handling(encoding, src, dst), 73 encoding, 74 ) 75 } 76 77 #[no_mangle] 78 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_with_bom_removal( 79 encoding: *const Encoding, 80 src: *const u8, 81 src_len: usize, 82 dst: *mut nsAString, 83 ) -> nsresult { 84 decode_to_nsstring_with_bom_removal(&*encoding, slice::from_raw_parts(src, src_len), &mut *dst) 85 } 86 87 pub fn decode_to_nsstring_with_bom_removal( 88 encoding: &'static Encoding, 89 src: &[u8], 90 dst: &mut nsAString, 91 ) -> nsresult { 92 let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") { 93 &src[3..] 94 } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE")) 95 || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF")) 96 { 97 &src[2..] 98 } else { 99 src 100 }; 101 decode_to_nsstring_without_bom_handling(encoding, without_bom, dst) 102 } 103 104 #[no_mangle] 105 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling( 106 encoding: *const Encoding, 107 src: *const u8, 108 src_len: usize, 109 dst: *mut nsAString, 110 ) -> nsresult { 111 decode_to_nsstring_without_bom_handling( 112 &*encoding, 113 slice::from_raw_parts(src, src_len), 114 &mut *dst, 115 ) 116 } 117 118 pub fn decode_to_nsstring_without_bom_handling( 119 encoding: &'static Encoding, 120 src: &[u8], 121 dst: &mut nsAString, 122 ) -> nsresult { 123 let mut decoder = encoding.new_decoder_without_bom_handling(); 124 let mut handle = try_start_bulk_write!( 125 decoder.max_utf16_buffer_length(src.len()), 126 dst, 127 NS_ERROR_OUT_OF_MEMORY 128 ); 129 let (result, read, written, had_errors) = 130 decoder.decode_to_utf16(src, handle.as_mut_slice(), true); 131 debug_assert_eq!(result, CoderResult::InputEmpty); 132 debug_assert_eq!(read, src.len()); 133 debug_assert!(written <= handle.as_mut_slice().len()); 134 let _ = handle.finish(written, true); 135 if had_errors { 136 return NS_OK_HAD_REPLACEMENTS; 137 } 138 NS_OK 139 } 140 141 #[no_mangle] 142 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling_and_without_replacement( 143 encoding: *const Encoding, 144 src: *const u8, 145 src_len: usize, 146 dst: *mut nsAString, 147 ) -> nsresult { 148 decode_to_nsstring_without_bom_handling_and_without_replacement( 149 &*encoding, 150 slice::from_raw_parts(src, src_len), 151 &mut *dst, 152 ) 153 } 154 155 pub fn decode_to_nsstring_without_bom_handling_and_without_replacement( 156 encoding: &'static Encoding, 157 src: &[u8], 158 dst: &mut nsAString, 159 ) -> nsresult { 160 let mut decoder = encoding.new_decoder_without_bom_handling(); 161 let mut handle = try_start_bulk_write!( 162 decoder.max_utf16_buffer_length(src.len()), 163 dst, 164 NS_ERROR_OUT_OF_MEMORY 165 ); 166 let (result, read, written) = 167 decoder.decode_to_utf16_without_replacement(src, handle.as_mut_slice(), true); 168 match result { 169 DecoderResult::InputEmpty => { 170 debug_assert_eq!(read, src.len()); 171 debug_assert!(written <= handle.as_mut_slice().len()); 172 let _ = handle.finish(written, true); 173 NS_OK 174 } 175 DecoderResult::Malformed(_, _) => { 176 // Let handle's drop() run 177 NS_ERROR_UDEC_ILLEGALINPUT 178 } 179 DecoderResult::OutputFull => unreachable!(), 180 } 181 } 182 183 #[no_mangle] 184 pub unsafe extern "C" fn mozilla_encoding_encode_from_utf16( 185 encoding: *mut *const Encoding, 186 src: *const u16, 187 src_len: usize, 188 dst: *mut nsACString, 189 ) -> nsresult { 190 let (rv, enc) = encode_from_utf16(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst); 191 *encoding = enc as *const Encoding; 192 rv 193 } 194 195 pub fn encode_from_utf16( 196 encoding: &'static Encoding, 197 src: &[u16], 198 dst: &mut nsACString, 199 ) -> (nsresult, &'static Encoding) { 200 let output_encoding = encoding.output_encoding(); 201 let mut encoder = output_encoding.new_encoder(); 202 let mut handle = try_start_bulk_write!( 203 encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len()), 204 dst, 205 (NS_ERROR_OUT_OF_MEMORY, output_encoding) 206 ); 207 208 let mut total_read = 0; 209 let mut total_written = 0; 210 let mut total_had_errors = false; 211 loop { 212 let (result, read, written, had_errors) = encoder.encode_from_utf16( 213 &src[total_read..], 214 &mut (handle.as_mut_slice())[total_written..], 215 true, 216 ); 217 total_read += read; 218 total_written += written; 219 total_had_errors |= had_errors; 220 match result { 221 CoderResult::InputEmpty => { 222 debug_assert_eq!(total_read, src.len()); 223 debug_assert!(total_written <= handle.as_mut_slice().len()); 224 let _ = handle.finish(total_written, true); 225 if total_had_errors { 226 return (NS_OK_HAD_REPLACEMENTS, output_encoding); 227 } 228 return (NS_OK, output_encoding); 229 } 230 CoderResult::OutputFull => { 231 if let Some(needed) = checked_add( 232 total_written, 233 encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len() - total_read), 234 ) { 235 if unsafe { 236 handle 237 .restart_bulk_write(needed, total_written, false) 238 .is_ok() 239 } { 240 continue; 241 } 242 } 243 return (NS_ERROR_OUT_OF_MEMORY, output_encoding); 244 } 245 } 246 } 247 } 248 249 #[no_mangle] 250 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring( 251 encoding: *mut *const Encoding, 252 src: *const nsACString, 253 dst: *mut nsACString, 254 ) -> nsresult { 255 debug_assert_ne!(src as usize, dst as usize); 256 let (rv, enc) = decode_to_nscstring(&**encoding, &*src, &mut *dst); 257 *encoding = enc as *const Encoding; 258 rv 259 } 260 261 pub fn decode_to_nscstring( 262 encoding: &'static Encoding, 263 src: &nsACString, 264 dst: &mut nsACString, 265 ) -> (nsresult, &'static Encoding) { 266 if let Some((enc, bom_length)) = Encoding::for_bom(src) { 267 return ( 268 decode_from_slice_to_nscstring_without_bom_handling(enc, &src[bom_length..], dst, 0), 269 enc, 270 ); 271 } 272 ( 273 decode_to_nscstring_without_bom_handling(encoding, src, dst), 274 encoding, 275 ) 276 } 277 278 #[no_mangle] 279 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_with_bom_removal( 280 encoding: *const Encoding, 281 src: *const nsACString, 282 dst: *mut nsACString, 283 ) -> nsresult { 284 debug_assert_ne!(src as usize, dst as usize); 285 decode_to_nscstring_with_bom_removal(&*encoding, &*src, &mut *dst) 286 } 287 288 pub fn decode_to_nscstring_with_bom_removal( 289 encoding: &'static Encoding, 290 src: &nsACString, 291 dst: &mut nsACString, 292 ) -> nsresult { 293 let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") { 294 &src[3..] 295 } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE")) 296 || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF")) 297 { 298 &src[2..] 299 } else { 300 return decode_to_nscstring_without_bom_handling(encoding, src, dst); 301 }; 302 decode_from_slice_to_nscstring_without_bom_handling(encoding, without_bom, dst, 0) 303 } 304 305 #[no_mangle] 306 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling( 307 encoding: *const Encoding, 308 src: *const nsACString, 309 dst: *mut nsACString, 310 ) -> nsresult { 311 debug_assert_ne!(src as usize, dst as usize); 312 decode_to_nscstring_without_bom_handling(&*encoding, &*src, &mut *dst) 313 } 314 315 pub fn decode_to_nscstring_without_bom_handling( 316 encoding: &'static Encoding, 317 src: &nsACString, 318 dst: &mut nsACString, 319 ) -> nsresult { 320 let bytes = &src[..]; 321 let valid_up_to = if encoding == UTF_8 { 322 Encoding::utf8_valid_up_to(bytes) 323 } else if encoding.is_ascii_compatible() { 324 Encoding::ascii_valid_up_to(bytes) 325 } else if encoding == ISO_2022_JP { 326 Encoding::iso_2022_jp_ascii_valid_up_to(bytes) 327 } else { 328 return decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, 0); 329 }; 330 if valid_up_to == bytes.len() { 331 if dst.fallible_assign(src).is_err() { 332 return NS_ERROR_OUT_OF_MEMORY; 333 } 334 return NS_OK; 335 } 336 decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, valid_up_to) 337 } 338 339 #[no_mangle] 340 pub unsafe extern "C" fn mozilla_encoding_decode_from_slice_to_nscstring_without_bom_handling( 341 encoding: *const Encoding, 342 src: *const u8, 343 src_len: usize, 344 dst: *mut nsACString, 345 already_validated: usize, 346 ) -> nsresult { 347 decode_from_slice_to_nscstring_without_bom_handling( 348 &*encoding, 349 slice::from_raw_parts(src, src_len), 350 &mut *dst, 351 already_validated, 352 ) 353 } 354 355 fn decode_from_slice_to_nscstring_without_bom_handling( 356 encoding: &'static Encoding, 357 src: &[u8], 358 dst: &mut nsACString, 359 already_validated: usize, 360 ) -> nsresult { 361 let bytes = src; 362 let mut decoder = encoding.new_decoder_without_bom_handling(); 363 let mut handle = try_start_bulk_write!(Some(src.len()), dst, NS_ERROR_OUT_OF_MEMORY); 364 365 if already_validated != 0 { 366 (handle.as_mut_slice())[..already_validated].copy_from_slice(&bytes[..already_validated]); 367 } 368 let mut total_read = already_validated; 369 let mut total_written = already_validated; 370 let mut total_had_errors = false; 371 loop { 372 let (result, read, written, had_errors) = decoder.decode_to_utf8( 373 &bytes[total_read..], 374 &mut (handle.as_mut_slice())[total_written..], 375 true, 376 ); 377 total_read += read; 378 total_written += written; 379 total_had_errors |= had_errors; 380 match result { 381 CoderResult::InputEmpty => { 382 debug_assert_eq!(total_read, bytes.len()); 383 let _ = handle.finish(total_written, true); 384 if total_had_errors { 385 return NS_OK_HAD_REPLACEMENTS; 386 } 387 return NS_OK; 388 } 389 CoderResult::OutputFull => { 390 // Allocate for the worst case. That is, we should come 391 // here at most once per invocation of this method. 392 if let Some(needed) = checked_add( 393 total_written, 394 decoder.max_utf8_buffer_length(bytes.len() - total_read), 395 ) { 396 if unsafe { 397 handle 398 .restart_bulk_write(needed, total_written, false) 399 .is_ok() 400 } { 401 continue; 402 } 403 } 404 return NS_ERROR_OUT_OF_MEMORY; 405 } 406 } 407 } 408 } 409 410 #[no_mangle] 411 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling_and_without_replacement( 412 encoding: *const Encoding, 413 src: *const nsACString, 414 dst: *mut nsACString, 415 ) -> nsresult { 416 decode_to_nscstring_without_bom_handling_and_without_replacement(&*encoding, &*src, &mut *dst) 417 } 418 419 pub fn decode_to_nscstring_without_bom_handling_and_without_replacement( 420 encoding: &'static Encoding, 421 src: &nsACString, 422 dst: &mut nsACString, 423 ) -> nsresult { 424 let bytes = &src[..]; 425 if encoding == UTF_8 { 426 let valid_up_to = Encoding::utf8_valid_up_to(bytes); 427 if valid_up_to == bytes.len() { 428 if dst.fallible_assign(src).is_err() { 429 return NS_ERROR_OUT_OF_MEMORY; 430 } 431 return NS_OK; 432 } 433 return NS_ERROR_UDEC_ILLEGALINPUT; 434 } 435 let valid_up_to = if encoding.is_ascii_compatible() { 436 Encoding::ascii_valid_up_to(bytes) 437 } else if encoding == ISO_2022_JP { 438 Encoding::iso_2022_jp_ascii_valid_up_to(bytes) 439 } else { 440 0 441 }; 442 if valid_up_to == bytes.len() { 443 if dst.fallible_assign(src).is_err() { 444 return NS_ERROR_OUT_OF_MEMORY; 445 } 446 return NS_OK; 447 } 448 let mut decoder = encoding.new_decoder_without_bom_handling(); 449 let mut handle = try_start_bulk_write!( 450 checked_add( 451 valid_up_to, 452 decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to) 453 ), 454 dst, 455 NS_ERROR_OUT_OF_MEMORY 456 ); 457 let (result, read, written) = { 458 let dest = handle.as_mut_slice(); 459 dest[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]); 460 decoder.decode_to_utf8_without_replacement( 461 &src[valid_up_to..], 462 &mut dest[valid_up_to..], 463 true, 464 ) 465 }; 466 match result { 467 DecoderResult::InputEmpty => { 468 debug_assert_eq!(valid_up_to + read, src.len()); 469 debug_assert!(valid_up_to + written <= handle.as_mut_slice().len()); 470 let _ = handle.finish(valid_up_to + written, true); 471 NS_OK 472 } 473 DecoderResult::Malformed(_, _) => { 474 // let handle's drop() run 475 NS_ERROR_UDEC_ILLEGALINPUT 476 } 477 DecoderResult::OutputFull => unreachable!(), 478 } 479 } 480 481 #[no_mangle] 482 pub unsafe extern "C" fn mozilla_encoding_encode_from_nscstring( 483 encoding: *mut *const Encoding, 484 src: *const nsACString, 485 dst: *mut nsACString, 486 ) -> nsresult { 487 let (rv, enc) = encode_from_nscstring(&**encoding, &*src, &mut *dst); 488 *encoding = enc as *const Encoding; 489 rv 490 } 491 492 pub fn encode_from_nscstring( 493 encoding: &'static Encoding, 494 src: &nsACString, 495 dst: &mut nsACString, 496 ) -> (nsresult, &'static Encoding) { 497 let output_encoding = encoding.output_encoding(); 498 let bytes = &src[..]; 499 if output_encoding == UTF_8 { 500 let valid_up_to = Encoding::utf8_valid_up_to(bytes); 501 if valid_up_to == bytes.len() { 502 if dst.fallible_assign(src).is_err() { 503 return (NS_ERROR_OUT_OF_MEMORY, output_encoding); 504 } 505 return (NS_OK, output_encoding); 506 } 507 return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding); 508 } 509 let valid_up_to = if output_encoding == ISO_2022_JP { 510 Encoding::iso_2022_jp_ascii_valid_up_to(bytes) 511 } else { 512 debug_assert!(output_encoding.is_ascii_compatible()); 513 Encoding::ascii_valid_up_to(bytes) 514 }; 515 if valid_up_to == bytes.len() { 516 if dst.fallible_assign(src).is_err() { 517 return (NS_ERROR_OUT_OF_MEMORY, output_encoding); 518 } 519 return (NS_OK, output_encoding); 520 } 521 522 // Encoder requires valid UTF-8. Using std instead of encoding_rs 523 // to avoid unsafe blocks. 524 let trail = if let Ok(trail) = ::std::str::from_utf8(&bytes[valid_up_to..]) { 525 trail 526 } else { 527 return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding); 528 }; 529 530 let mut encoder = output_encoding.new_encoder(); 531 let mut handle = try_start_bulk_write!( 532 checked_add( 533 valid_up_to, 534 encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len()) 535 ), 536 dst, 537 (NS_ERROR_OUT_OF_MEMORY, output_encoding) 538 ); 539 540 if valid_up_to != 0 { 541 // to_mut() shouldn't fail right after setting length. 542 (handle.as_mut_slice())[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]); 543 } 544 545 // `total_read` tracks `trail` only but `total_written` tracks the overall situation! 546 // This asymmetry is here, because trail is materialized as `str` without resorting 547 // to unsafe code here. 548 let mut total_read = 0; 549 let mut total_written = valid_up_to; 550 let mut total_had_errors = false; 551 loop { 552 let (result, read, written, had_errors) = encoder.encode_from_utf8( 553 &trail[total_read..], 554 &mut (handle.as_mut_slice())[total_written..], 555 true, 556 ); 557 total_read += read; 558 total_written += written; 559 total_had_errors |= had_errors; 560 match result { 561 CoderResult::InputEmpty => { 562 debug_assert_eq!(valid_up_to + total_read, src.len()); 563 debug_assert!(total_written <= handle.as_mut_slice().len()); 564 let _ = handle.finish(total_written, true); 565 if total_had_errors { 566 return (NS_OK_HAD_REPLACEMENTS, output_encoding); 567 } 568 return (NS_OK, output_encoding); 569 } 570 CoderResult::OutputFull => { 571 if let Some(needed) = checked_add( 572 total_written, 573 encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len() - total_read), 574 ) { 575 if unsafe { 576 handle 577 .restart_bulk_write(needed, total_written, false) 578 .is_ok() 579 } { 580 continue; 581 } 582 } 583 return (NS_ERROR_OUT_OF_MEMORY, output_encoding); 584 } 585 } 586 } 587 } 588 589 #[inline(always)] 590 fn checked_add(num: usize, opt: Option<usize>) -> Option<usize> { 591 if let Some(n) = opt { 592 n.checked_add(num) 593 } else { 594 None 595 } 596 } 597 598 // Declared in nsHtml5StreamParser.cpp 599 #[no_mangle] 600 pub unsafe extern "C" fn xmldecl_parse( 601 buf: *const u8, 602 buf_len: usize, 603 ) -> *const encoding_rs::Encoding { 604 if let Some(encoding) = xmldecl::parse(std::slice::from_raw_parts(buf, buf_len)) { 605 encoding 606 } else { 607 std::ptr::null() 608 } 609 }