tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lib.rs (19621B)


      1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
      2 // file at the top-level directory of this distribution.
      3 //
      4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
      5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
      6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
      7 // option. This file may not be copied, modified, or distributed
      8 // except according to those terms.
      9 
     10 // Adapted from third_party/rust/encoding_rs/src/lib.rs, so the
     11 // "top-level directory" in the above notice refers to
     12 // third_party/rust/encoding_rs/.
     13 
     14 extern crate encoding_rs;
     15 extern crate nserror;
     16 extern crate nsstring;
     17 extern crate xmldecl;
     18 
     19 use encoding_rs::*;
     20 use nserror::*;
     21 use nsstring::*;
     22 use std::slice;
     23 
     24 /// Takes `Option<usize>`, the destination string and a value
     25 /// to return on failure and tries to start a bulk write of the
     26 /// destination string with the capacity given by the `usize`
     27 /// wrapped in the first argument. Returns the bulk write
     28 /// handle.
     29 macro_rules! try_start_bulk_write {
     30    ($needed:expr,
     31     $dst:ident,
     32     $ret:expr) => {{
     33        let needed = match $needed {
     34            Some(needed) => needed,
     35            None => {
     36                return $ret;
     37            }
     38        };
     39        match unsafe { $dst.bulk_write(needed, 0, false) } {
     40            Err(_) => {
     41                return $ret;
     42            }
     43            Ok(handle) => handle,
     44        }
     45    }};
     46 }
     47 
     48 #[no_mangle]
     49 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring(
     50    encoding: *mut *const Encoding,
     51    src: *const u8,
     52    src_len: usize,
     53    dst: *mut nsAString,
     54 ) -> nsresult {
     55    let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst);
     56    *encoding = enc as *const Encoding;
     57    rv
     58 }
     59 
     60 pub fn decode_to_nsstring(
     61    encoding: &'static Encoding,
     62    src: &[u8],
     63    dst: &mut nsAString,
     64 ) -> (nsresult, &'static Encoding) {
     65    if let Some((enc, bom_length)) = Encoding::for_bom(src) {
     66        return (
     67            decode_to_nsstring_without_bom_handling(enc, &src[bom_length..], dst),
     68            enc,
     69        );
     70    }
     71    (
     72        decode_to_nsstring_without_bom_handling(encoding, src, dst),
     73        encoding,
     74    )
     75 }
     76 
     77 #[no_mangle]
     78 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_with_bom_removal(
     79    encoding: *const Encoding,
     80    src: *const u8,
     81    src_len: usize,
     82    dst: *mut nsAString,
     83 ) -> nsresult {
     84    decode_to_nsstring_with_bom_removal(&*encoding, slice::from_raw_parts(src, src_len), &mut *dst)
     85 }
     86 
     87 pub fn decode_to_nsstring_with_bom_removal(
     88    encoding: &'static Encoding,
     89    src: &[u8],
     90    dst: &mut nsAString,
     91 ) -> nsresult {
     92    let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") {
     93        &src[3..]
     94    } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE"))
     95        || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF"))
     96    {
     97        &src[2..]
     98    } else {
     99        src
    100    };
    101    decode_to_nsstring_without_bom_handling(encoding, without_bom, dst)
    102 }
    103 
    104 #[no_mangle]
    105 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling(
    106    encoding: *const Encoding,
    107    src: *const u8,
    108    src_len: usize,
    109    dst: *mut nsAString,
    110 ) -> nsresult {
    111    decode_to_nsstring_without_bom_handling(
    112        &*encoding,
    113        slice::from_raw_parts(src, src_len),
    114        &mut *dst,
    115    )
    116 }
    117 
    118 pub fn decode_to_nsstring_without_bom_handling(
    119    encoding: &'static Encoding,
    120    src: &[u8],
    121    dst: &mut nsAString,
    122 ) -> nsresult {
    123    let mut decoder = encoding.new_decoder_without_bom_handling();
    124    let mut handle = try_start_bulk_write!(
    125        decoder.max_utf16_buffer_length(src.len()),
    126        dst,
    127        NS_ERROR_OUT_OF_MEMORY
    128    );
    129    let (result, read, written, had_errors) =
    130        decoder.decode_to_utf16(src, handle.as_mut_slice(), true);
    131    debug_assert_eq!(result, CoderResult::InputEmpty);
    132    debug_assert_eq!(read, src.len());
    133    debug_assert!(written <= handle.as_mut_slice().len());
    134    let _ = handle.finish(written, true);
    135    if had_errors {
    136        return NS_OK_HAD_REPLACEMENTS;
    137    }
    138    NS_OK
    139 }
    140 
    141 #[no_mangle]
    142 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring_without_bom_handling_and_without_replacement(
    143    encoding: *const Encoding,
    144    src: *const u8,
    145    src_len: usize,
    146    dst: *mut nsAString,
    147 ) -> nsresult {
    148    decode_to_nsstring_without_bom_handling_and_without_replacement(
    149        &*encoding,
    150        slice::from_raw_parts(src, src_len),
    151        &mut *dst,
    152    )
    153 }
    154 
    155 pub fn decode_to_nsstring_without_bom_handling_and_without_replacement(
    156    encoding: &'static Encoding,
    157    src: &[u8],
    158    dst: &mut nsAString,
    159 ) -> nsresult {
    160    let mut decoder = encoding.new_decoder_without_bom_handling();
    161    let mut handle = try_start_bulk_write!(
    162        decoder.max_utf16_buffer_length(src.len()),
    163        dst,
    164        NS_ERROR_OUT_OF_MEMORY
    165    );
    166    let (result, read, written) =
    167        decoder.decode_to_utf16_without_replacement(src, handle.as_mut_slice(), true);
    168    match result {
    169        DecoderResult::InputEmpty => {
    170            debug_assert_eq!(read, src.len());
    171            debug_assert!(written <= handle.as_mut_slice().len());
    172            let _ = handle.finish(written, true);
    173            NS_OK
    174        }
    175        DecoderResult::Malformed(_, _) => {
    176            // Let handle's drop() run
    177            NS_ERROR_UDEC_ILLEGALINPUT
    178        }
    179        DecoderResult::OutputFull => unreachable!(),
    180    }
    181 }
    182 
    183 #[no_mangle]
    184 pub unsafe extern "C" fn mozilla_encoding_encode_from_utf16(
    185    encoding: *mut *const Encoding,
    186    src: *const u16,
    187    src_len: usize,
    188    dst: *mut nsACString,
    189 ) -> nsresult {
    190    let (rv, enc) = encode_from_utf16(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst);
    191    *encoding = enc as *const Encoding;
    192    rv
    193 }
    194 
    195 pub fn encode_from_utf16(
    196    encoding: &'static Encoding,
    197    src: &[u16],
    198    dst: &mut nsACString,
    199 ) -> (nsresult, &'static Encoding) {
    200    let output_encoding = encoding.output_encoding();
    201    let mut encoder = output_encoding.new_encoder();
    202    let mut handle = try_start_bulk_write!(
    203        encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len()),
    204        dst,
    205        (NS_ERROR_OUT_OF_MEMORY, output_encoding)
    206    );
    207 
    208    let mut total_read = 0;
    209    let mut total_written = 0;
    210    let mut total_had_errors = false;
    211    loop {
    212        let (result, read, written, had_errors) = encoder.encode_from_utf16(
    213            &src[total_read..],
    214            &mut (handle.as_mut_slice())[total_written..],
    215            true,
    216        );
    217        total_read += read;
    218        total_written += written;
    219        total_had_errors |= had_errors;
    220        match result {
    221            CoderResult::InputEmpty => {
    222                debug_assert_eq!(total_read, src.len());
    223                debug_assert!(total_written <= handle.as_mut_slice().len());
    224                let _ = handle.finish(total_written, true);
    225                if total_had_errors {
    226                    return (NS_OK_HAD_REPLACEMENTS, output_encoding);
    227                }
    228                return (NS_OK, output_encoding);
    229            }
    230            CoderResult::OutputFull => {
    231                if let Some(needed) = checked_add(
    232                    total_written,
    233                    encoder.max_buffer_length_from_utf16_if_no_unmappables(src.len() - total_read),
    234                ) {
    235                    if unsafe {
    236                        handle
    237                            .restart_bulk_write(needed, total_written, false)
    238                            .is_ok()
    239                    } {
    240                        continue;
    241                    }
    242                }
    243                return (NS_ERROR_OUT_OF_MEMORY, output_encoding);
    244            }
    245        }
    246    }
    247 }
    248 
    249 #[no_mangle]
    250 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring(
    251    encoding: *mut *const Encoding,
    252    src: *const nsACString,
    253    dst: *mut nsACString,
    254 ) -> nsresult {
    255    debug_assert_ne!(src as usize, dst as usize);
    256    let (rv, enc) = decode_to_nscstring(&**encoding, &*src, &mut *dst);
    257    *encoding = enc as *const Encoding;
    258    rv
    259 }
    260 
    261 pub fn decode_to_nscstring(
    262    encoding: &'static Encoding,
    263    src: &nsACString,
    264    dst: &mut nsACString,
    265 ) -> (nsresult, &'static Encoding) {
    266    if let Some((enc, bom_length)) = Encoding::for_bom(src) {
    267        return (
    268            decode_from_slice_to_nscstring_without_bom_handling(enc, &src[bom_length..], dst, 0),
    269            enc,
    270        );
    271    }
    272    (
    273        decode_to_nscstring_without_bom_handling(encoding, src, dst),
    274        encoding,
    275    )
    276 }
    277 
    278 #[no_mangle]
    279 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_with_bom_removal(
    280    encoding: *const Encoding,
    281    src: *const nsACString,
    282    dst: *mut nsACString,
    283 ) -> nsresult {
    284    debug_assert_ne!(src as usize, dst as usize);
    285    decode_to_nscstring_with_bom_removal(&*encoding, &*src, &mut *dst)
    286 }
    287 
    288 pub fn decode_to_nscstring_with_bom_removal(
    289    encoding: &'static Encoding,
    290    src: &nsACString,
    291    dst: &mut nsACString,
    292 ) -> nsresult {
    293    let without_bom = if encoding == UTF_8 && src.starts_with(b"\xEF\xBB\xBF") {
    294        &src[3..]
    295    } else if (encoding == UTF_16LE && src.starts_with(b"\xFF\xFE"))
    296        || (encoding == UTF_16BE && src.starts_with(b"\xFE\xFF"))
    297    {
    298        &src[2..]
    299    } else {
    300        return decode_to_nscstring_without_bom_handling(encoding, src, dst);
    301    };
    302    decode_from_slice_to_nscstring_without_bom_handling(encoding, without_bom, dst, 0)
    303 }
    304 
    305 #[no_mangle]
    306 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling(
    307    encoding: *const Encoding,
    308    src: *const nsACString,
    309    dst: *mut nsACString,
    310 ) -> nsresult {
    311    debug_assert_ne!(src as usize, dst as usize);
    312    decode_to_nscstring_without_bom_handling(&*encoding, &*src, &mut *dst)
    313 }
    314 
    315 pub fn decode_to_nscstring_without_bom_handling(
    316    encoding: &'static Encoding,
    317    src: &nsACString,
    318    dst: &mut nsACString,
    319 ) -> nsresult {
    320    let bytes = &src[..];
    321    let valid_up_to = if encoding == UTF_8 {
    322        Encoding::utf8_valid_up_to(bytes)
    323    } else if encoding.is_ascii_compatible() {
    324        Encoding::ascii_valid_up_to(bytes)
    325    } else if encoding == ISO_2022_JP {
    326        Encoding::iso_2022_jp_ascii_valid_up_to(bytes)
    327    } else {
    328        return decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, 0);
    329    };
    330    if valid_up_to == bytes.len() {
    331        if dst.fallible_assign(src).is_err() {
    332            return NS_ERROR_OUT_OF_MEMORY;
    333        }
    334        return NS_OK;
    335    }
    336    decode_from_slice_to_nscstring_without_bom_handling(encoding, src, dst, valid_up_to)
    337 }
    338 
    339 #[no_mangle]
    340 pub unsafe extern "C" fn mozilla_encoding_decode_from_slice_to_nscstring_without_bom_handling(
    341    encoding: *const Encoding,
    342    src: *const u8,
    343    src_len: usize,
    344    dst: *mut nsACString,
    345    already_validated: usize,
    346 ) -> nsresult {
    347    decode_from_slice_to_nscstring_without_bom_handling(
    348        &*encoding,
    349        slice::from_raw_parts(src, src_len),
    350        &mut *dst,
    351        already_validated,
    352    )
    353 }
    354 
    355 fn decode_from_slice_to_nscstring_without_bom_handling(
    356    encoding: &'static Encoding,
    357    src: &[u8],
    358    dst: &mut nsACString,
    359    already_validated: usize,
    360 ) -> nsresult {
    361    let bytes = src;
    362    let mut decoder = encoding.new_decoder_without_bom_handling();
    363    let mut handle = try_start_bulk_write!(Some(src.len()), dst, NS_ERROR_OUT_OF_MEMORY);
    364 
    365    if already_validated != 0 {
    366        (handle.as_mut_slice())[..already_validated].copy_from_slice(&bytes[..already_validated]);
    367    }
    368    let mut total_read = already_validated;
    369    let mut total_written = already_validated;
    370    let mut total_had_errors = false;
    371    loop {
    372        let (result, read, written, had_errors) = decoder.decode_to_utf8(
    373            &bytes[total_read..],
    374            &mut (handle.as_mut_slice())[total_written..],
    375            true,
    376        );
    377        total_read += read;
    378        total_written += written;
    379        total_had_errors |= had_errors;
    380        match result {
    381            CoderResult::InputEmpty => {
    382                debug_assert_eq!(total_read, bytes.len());
    383                let _ = handle.finish(total_written, true);
    384                if total_had_errors {
    385                    return NS_OK_HAD_REPLACEMENTS;
    386                }
    387                return NS_OK;
    388            }
    389            CoderResult::OutputFull => {
    390                // Allocate for the worst case. That is, we should come
    391                // here at most once per invocation of this method.
    392                if let Some(needed) = checked_add(
    393                    total_written,
    394                    decoder.max_utf8_buffer_length(bytes.len() - total_read),
    395                ) {
    396                    if unsafe {
    397                        handle
    398                            .restart_bulk_write(needed, total_written, false)
    399                            .is_ok()
    400                    } {
    401                        continue;
    402                    }
    403                }
    404                return NS_ERROR_OUT_OF_MEMORY;
    405            }
    406        }
    407    }
    408 }
    409 
    410 #[no_mangle]
    411 pub unsafe extern "C" fn mozilla_encoding_decode_to_nscstring_without_bom_handling_and_without_replacement(
    412    encoding: *const Encoding,
    413    src: *const nsACString,
    414    dst: *mut nsACString,
    415 ) -> nsresult {
    416    decode_to_nscstring_without_bom_handling_and_without_replacement(&*encoding, &*src, &mut *dst)
    417 }
    418 
    419 pub fn decode_to_nscstring_without_bom_handling_and_without_replacement(
    420    encoding: &'static Encoding,
    421    src: &nsACString,
    422    dst: &mut nsACString,
    423 ) -> nsresult {
    424    let bytes = &src[..];
    425    if encoding == UTF_8 {
    426        let valid_up_to = Encoding::utf8_valid_up_to(bytes);
    427        if valid_up_to == bytes.len() {
    428            if dst.fallible_assign(src).is_err() {
    429                return NS_ERROR_OUT_OF_MEMORY;
    430            }
    431            return NS_OK;
    432        }
    433        return NS_ERROR_UDEC_ILLEGALINPUT;
    434    }
    435    let valid_up_to = if encoding.is_ascii_compatible() {
    436        Encoding::ascii_valid_up_to(bytes)
    437    } else if encoding == ISO_2022_JP {
    438        Encoding::iso_2022_jp_ascii_valid_up_to(bytes)
    439    } else {
    440        0
    441    };
    442    if valid_up_to == bytes.len() {
    443        if dst.fallible_assign(src).is_err() {
    444            return NS_ERROR_OUT_OF_MEMORY;
    445        }
    446        return NS_OK;
    447    }
    448    let mut decoder = encoding.new_decoder_without_bom_handling();
    449    let mut handle = try_start_bulk_write!(
    450        checked_add(
    451            valid_up_to,
    452            decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to)
    453        ),
    454        dst,
    455        NS_ERROR_OUT_OF_MEMORY
    456    );
    457    let (result, read, written) = {
    458        let dest = handle.as_mut_slice();
    459        dest[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]);
    460        decoder.decode_to_utf8_without_replacement(
    461            &src[valid_up_to..],
    462            &mut dest[valid_up_to..],
    463            true,
    464        )
    465    };
    466    match result {
    467        DecoderResult::InputEmpty => {
    468            debug_assert_eq!(valid_up_to + read, src.len());
    469            debug_assert!(valid_up_to + written <= handle.as_mut_slice().len());
    470            let _ = handle.finish(valid_up_to + written, true);
    471            NS_OK
    472        }
    473        DecoderResult::Malformed(_, _) => {
    474            // let handle's drop() run
    475            NS_ERROR_UDEC_ILLEGALINPUT
    476        }
    477        DecoderResult::OutputFull => unreachable!(),
    478    }
    479 }
    480 
    481 #[no_mangle]
    482 pub unsafe extern "C" fn mozilla_encoding_encode_from_nscstring(
    483    encoding: *mut *const Encoding,
    484    src: *const nsACString,
    485    dst: *mut nsACString,
    486 ) -> nsresult {
    487    let (rv, enc) = encode_from_nscstring(&**encoding, &*src, &mut *dst);
    488    *encoding = enc as *const Encoding;
    489    rv
    490 }
    491 
    492 pub fn encode_from_nscstring(
    493    encoding: &'static Encoding,
    494    src: &nsACString,
    495    dst: &mut nsACString,
    496 ) -> (nsresult, &'static Encoding) {
    497    let output_encoding = encoding.output_encoding();
    498    let bytes = &src[..];
    499    if output_encoding == UTF_8 {
    500        let valid_up_to = Encoding::utf8_valid_up_to(bytes);
    501        if valid_up_to == bytes.len() {
    502            if dst.fallible_assign(src).is_err() {
    503                return (NS_ERROR_OUT_OF_MEMORY, output_encoding);
    504            }
    505            return (NS_OK, output_encoding);
    506        }
    507        return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding);
    508    }
    509    let valid_up_to = if output_encoding == ISO_2022_JP {
    510        Encoding::iso_2022_jp_ascii_valid_up_to(bytes)
    511    } else {
    512        debug_assert!(output_encoding.is_ascii_compatible());
    513        Encoding::ascii_valid_up_to(bytes)
    514    };
    515    if valid_up_to == bytes.len() {
    516        if dst.fallible_assign(src).is_err() {
    517            return (NS_ERROR_OUT_OF_MEMORY, output_encoding);
    518        }
    519        return (NS_OK, output_encoding);
    520    }
    521 
    522    // Encoder requires valid UTF-8. Using std instead of encoding_rs
    523    // to avoid unsafe blocks.
    524    let trail = if let Ok(trail) = ::std::str::from_utf8(&bytes[valid_up_to..]) {
    525        trail
    526    } else {
    527        return (NS_ERROR_UDEC_ILLEGALINPUT, output_encoding);
    528    };
    529 
    530    let mut encoder = output_encoding.new_encoder();
    531    let mut handle = try_start_bulk_write!(
    532        checked_add(
    533            valid_up_to,
    534            encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len())
    535        ),
    536        dst,
    537        (NS_ERROR_OUT_OF_MEMORY, output_encoding)
    538    );
    539 
    540    if valid_up_to != 0 {
    541        // to_mut() shouldn't fail right after setting length.
    542        (handle.as_mut_slice())[..valid_up_to].copy_from_slice(&bytes[..valid_up_to]);
    543    }
    544 
    545    // `total_read` tracks `trail` only but `total_written` tracks the overall situation!
    546    // This asymmetry is here, because trail is materialized as `str` without resorting
    547    // to unsafe code here.
    548    let mut total_read = 0;
    549    let mut total_written = valid_up_to;
    550    let mut total_had_errors = false;
    551    loop {
    552        let (result, read, written, had_errors) = encoder.encode_from_utf8(
    553            &trail[total_read..],
    554            &mut (handle.as_mut_slice())[total_written..],
    555            true,
    556        );
    557        total_read += read;
    558        total_written += written;
    559        total_had_errors |= had_errors;
    560        match result {
    561            CoderResult::InputEmpty => {
    562                debug_assert_eq!(valid_up_to + total_read, src.len());
    563                debug_assert!(total_written <= handle.as_mut_slice().len());
    564                let _ = handle.finish(total_written, true);
    565                if total_had_errors {
    566                    return (NS_OK_HAD_REPLACEMENTS, output_encoding);
    567                }
    568                return (NS_OK, output_encoding);
    569            }
    570            CoderResult::OutputFull => {
    571                if let Some(needed) = checked_add(
    572                    total_written,
    573                    encoder.max_buffer_length_from_utf8_if_no_unmappables(trail.len() - total_read),
    574                ) {
    575                    if unsafe {
    576                        handle
    577                            .restart_bulk_write(needed, total_written, false)
    578                            .is_ok()
    579                    } {
    580                        continue;
    581                    }
    582                }
    583                return (NS_ERROR_OUT_OF_MEMORY, output_encoding);
    584            }
    585        }
    586    }
    587 }
    588 
    589 #[inline(always)]
    590 fn checked_add(num: usize, opt: Option<usize>) -> Option<usize> {
    591    if let Some(n) = opt {
    592        n.checked_add(num)
    593    } else {
    594        None
    595    }
    596 }
    597 
    598 // Declared in nsHtml5StreamParser.cpp
    599 #[no_mangle]
    600 pub unsafe extern "C" fn xmldecl_parse(
    601    buf: *const u8,
    602    buf_len: usize,
    603 ) -> *const encoding_rs::Encoding {
    604    if let Some(encoding) = xmldecl::parse(std::slice::from_raw_parts(buf, buf_len)) {
    605        encoding
    606    } else {
    607        std::ptr::null()
    608    }
    609 }