prefreader.rs (37203B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 use crate::preferences::{Pref, PrefValue, Preferences}; 6 use std::borrow::Borrow; 7 use std::borrow::Cow; 8 use std::char; 9 use std::error::Error; 10 use std::io::{self, Write}; 11 12 use std::str; 13 use thiserror::Error; 14 15 impl PrefReaderError { 16 fn new(message: String, position: Position, parent: Option<Box<dyn Error>>) -> PrefReaderError { 17 PrefReaderError { 18 message, 19 position, 20 parent, 21 } 22 } 23 } 24 25 impl From<io::Error> for PrefReaderError { 26 fn from(err: io::Error) -> PrefReaderError { 27 PrefReaderError::new("IOError".into(), Position::new(), Some(err.into())) 28 } 29 } 30 31 #[derive(Copy, Clone, Debug, PartialEq)] 32 enum TokenizerState { 33 Junk, 34 CommentStart, 35 CommentLine, 36 CommentBlock, 37 FunctionName, 38 AfterFunctionName, 39 FunctionArgs, 40 FunctionArg, 41 DoubleQuotedString, 42 SingleQuotedString, 43 Number, 44 Bool, 45 AfterFunctionArg, 46 AfterFunction, 47 Error, 48 } 49 50 #[derive(Copy, Clone, Debug, Default, PartialEq)] 51 pub struct Position { 52 line: u32, 53 column: u32, 54 } 55 56 impl Position { 57 pub fn new() -> Position { 58 Position { line: 1, column: 0 } 59 } 60 } 61 62 #[derive(Copy, Clone, Debug, PartialEq)] 63 pub enum TokenType { 64 None, 65 PrefFunction, 66 UserPrefFunction, 67 StickyPrefFunction, 68 CommentBlock, 69 CommentLine, 70 CommentBashLine, 71 Paren, 72 Semicolon, 73 Comma, 74 String, 75 Int, 76 Bool, 77 Error, 78 } 79 80 #[derive(Debug, PartialEq)] 81 pub enum PrefToken<'a> { 82 PrefFunction(Position), 83 UserPrefFunction(Position), 84 StickyPrefFunction(Position), 85 CommentBlock(Cow<'a, str>, Position), 86 CommentLine(Cow<'a, str>, Position), 87 CommentBashLine(Cow<'a, str>, Position), 88 Paren(char, Position), 89 Semicolon(Position), 90 Comma(Position), 91 String(Cow<'a, str>, Position), 92 Int(i64, Position), 93 Bool(bool, Position), 94 Error(String, Position), 95 } 96 97 impl PrefToken<'_> { 98 fn position(&self) -> Position { 99 match *self { 100 PrefToken::PrefFunction(position) => position, 101 PrefToken::UserPrefFunction(position) => position, 102 PrefToken::StickyPrefFunction(position) => position, 103 PrefToken::CommentBlock(_, position) => position, 104 PrefToken::CommentLine(_, position) => position, 105 PrefToken::CommentBashLine(_, position) => position, 106 PrefToken::Paren(_, position) => position, 107 PrefToken::Semicolon(position) => position, 108 PrefToken::Comma(position) => position, 109 PrefToken::String(_, position) => position, 110 PrefToken::Int(_, position) => position, 111 PrefToken::Bool(_, position) => position, 112 PrefToken::Error(_, position) => position, 113 } 114 } 115 } 116 117 #[derive(Debug, Error)] 118 #[error("{message} at line {}, column {}", .position.line, .position.column)] 119 pub struct PrefReaderError { 120 message: String, 121 position: Position, 122 #[source] 123 parent: Option<Box<dyn Error>>, 124 } 125 126 struct TokenData<'a> { 127 token_type: TokenType, 128 complete: bool, 129 position: Position, 130 data: Cow<'a, str>, 131 start_pos: usize, 132 } 133 134 impl<'a> TokenData<'a> { 135 fn new(token_type: TokenType, position: Position, start_pos: usize) -> TokenData<'a> { 136 TokenData { 137 token_type, 138 complete: false, 139 position, 140 data: Cow::Borrowed(""), 141 start_pos, 142 } 143 } 144 145 fn start(&mut self, tokenizer: &PrefTokenizer, token_type: TokenType) { 146 self.token_type = token_type; 147 self.position = tokenizer.position; 148 self.start_pos = tokenizer.pos; 149 } 150 151 fn end(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> { 152 self.complete = true; 153 self.add_slice_to_token(buf, end_pos) 154 } 155 156 fn add_slice_to_token(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> { 157 let data = match str::from_utf8(&buf[self.start_pos..end_pos]) { 158 Ok(x) => x, 159 Err(_) => { 160 return Err(PrefReaderError::new( 161 "Could not convert string to utf8".into(), 162 self.position, 163 None, 164 )); 165 } 166 }; 167 if self.data != "" { 168 self.data.to_mut().push_str(data) 169 } else { 170 self.data = Cow::Borrowed(data) 171 }; 172 Ok(()) 173 } 174 175 fn push_char(&mut self, tokenizer: &PrefTokenizer, data: char) { 176 self.data.to_mut().push(data); 177 self.start_pos = tokenizer.pos + 1; 178 } 179 } 180 181 pub struct PrefTokenizer<'a> { 182 data: &'a [u8], 183 pos: usize, 184 cur: Option<char>, 185 position: Position, 186 state: TokenizerState, 187 next_state: Option<TokenizerState>, 188 } 189 190 impl<'a> PrefTokenizer<'a> { 191 pub fn new(data: &'a [u8]) -> PrefTokenizer<'a> { 192 PrefTokenizer { 193 data, 194 pos: 0, 195 cur: None, 196 position: Position::new(), 197 state: TokenizerState::Junk, 198 next_state: Some(TokenizerState::FunctionName), 199 } 200 } 201 202 fn make_token(&mut self, token_data: TokenData<'a>) -> PrefToken<'a> { 203 let buf = token_data.data; 204 let position = token_data.position; 205 // Note: the panic! here are for cases where the invalid input is regarded as 206 // a bug in the caller. In cases where `make_token` can legitimately be called 207 // with invalid data we must instead return a PrefToken::Error 208 match token_data.token_type { 209 TokenType::None => panic!("Got a token without a type"), 210 TokenType::PrefFunction => PrefToken::PrefFunction(position), 211 TokenType::UserPrefFunction => PrefToken::UserPrefFunction(position), 212 TokenType::StickyPrefFunction => PrefToken::StickyPrefFunction(position), 213 TokenType::CommentBlock => PrefToken::CommentBlock(buf, position), 214 TokenType::CommentLine => PrefToken::CommentLine(buf, position), 215 TokenType::CommentBashLine => PrefToken::CommentBashLine(buf, position), 216 TokenType::Paren => { 217 if buf.len() != 1 { 218 panic!("Expected a buffer of length one"); 219 } 220 PrefToken::Paren(buf.chars().next().unwrap(), position) 221 } 222 TokenType::Semicolon => PrefToken::Semicolon(position), 223 TokenType::Comma => PrefToken::Comma(position), 224 TokenType::String => PrefToken::String(buf, position), 225 TokenType::Int => match buf.parse::<i64>() { 226 Ok(value) => PrefToken::Int(value, position), 227 Err(_) => PrefToken::Error(format!("Expected integer, got {}", buf), position), 228 }, 229 TokenType::Bool => { 230 let value = match buf.borrow() { 231 "true" => true, 232 "false" => false, 233 x => panic!("Boolean wasn't 'true' or 'false' (was {})", x), 234 }; 235 PrefToken::Bool(value, position) 236 } 237 TokenType::Error => panic!("make_token can't construct errors"), 238 } 239 } 240 241 fn get_char(&mut self) -> Option<char> { 242 if self.pos + 1 >= self.data.len() { 243 self.cur = None; 244 return None; 245 }; 246 if self.cur.is_some() { 247 self.pos += 1; 248 } 249 let c = self.data[self.pos] as char; 250 if self.cur == Some('\n') { 251 self.position.line += 1; 252 self.position.column = 0; 253 } else if self.cur.is_some() { 254 self.position.column += 1; 255 }; 256 self.cur = Some(c); 257 self.cur 258 } 259 260 fn unget_char(&mut self) -> Option<char> { 261 if self.pos == 0 { 262 self.position.column = 0; 263 self.cur = None 264 } else { 265 self.pos -= 1; 266 let c = self.data[self.pos] as char; 267 if c == '\n' { 268 self.position.line -= 1; 269 let mut col_pos = self.pos; 270 while col_pos > 0 { 271 col_pos -= 1; 272 if self.data[col_pos] as char == '\n' { 273 break; 274 } 275 } 276 self.position.column = (self.pos - col_pos) as u32; 277 } else { 278 self.position.column -= 1; 279 } 280 self.cur = Some(c); 281 } 282 self.cur 283 } 284 285 fn is_space(c: char) -> bool { 286 matches!(c, ' ' | '\t' | '\r' | '\n') 287 } 288 289 fn skip_whitespace(&mut self) -> Option<char> { 290 while let Some(c) = self.cur { 291 if PrefTokenizer::is_space(c) { 292 self.get_char(); 293 } else { 294 break; 295 }; 296 } 297 self.cur 298 } 299 300 fn consume_escape(&mut self, token_data: &mut TokenData<'a>) -> Result<(), PrefReaderError> { 301 let pos = self.pos; 302 let escaped = self.read_escape()?; 303 if let Some(escape_char) = escaped { 304 token_data.add_slice_to_token(self.data, pos)?; 305 token_data.push_char(self, escape_char); 306 }; 307 Ok(()) 308 } 309 310 fn read_escape(&mut self) -> Result<Option<char>, PrefReaderError> { 311 let escape_char = match self.get_char() { 312 Some('u') => self.read_hex_escape(4, true)?, 313 Some('x') => self.read_hex_escape(2, true)?, 314 Some('\\') => '\\' as u32, 315 Some('"') => '"' as u32, 316 Some('\'') => '\'' as u32, 317 Some('r') => '\r' as u32, 318 Some('n') => '\n' as u32, 319 Some(_) => return Ok(None), 320 None => { 321 return Err(PrefReaderError::new( 322 "EOF in character escape".into(), 323 self.position, 324 None, 325 )) 326 } 327 }; 328 Ok(Some(char::from_u32(escape_char).ok_or_else(|| { 329 PrefReaderError::new( 330 "Invalid codepoint decoded from escape".into(), 331 self.position, 332 None, 333 ) 334 })?)) 335 } 336 337 fn read_hex_escape(&mut self, hex_chars: isize, first: bool) -> Result<u32, PrefReaderError> { 338 let mut value = 0; 339 for _ in 0..hex_chars { 340 match self.get_char() { 341 Some(x) => { 342 value <<= 4; 343 match x { 344 '0'..='9' => value += x as u32 - '0' as u32, 345 'a'..='f' => value += x as u32 - 'a' as u32, 346 'A'..='F' => value += x as u32 - 'A' as u32, 347 _ => { 348 return Err(PrefReaderError::new( 349 "Unexpected character in escape".into(), 350 self.position, 351 None, 352 )) 353 } 354 } 355 } 356 None => { 357 return Err(PrefReaderError::new( 358 "Unexpected EOF in escape".into(), 359 self.position, 360 None, 361 )) 362 } 363 } 364 } 365 if first && (0xD800..=0xDBFF).contains(&value) { 366 // First part of a surrogate pair 367 if self.get_char() != Some('\\') || self.get_char() != Some('u') { 368 return Err(PrefReaderError::new( 369 "Lone high surrogate in surrogate pair".into(), 370 self.position, 371 None, 372 )); 373 } 374 self.unget_char(); 375 let high_surrogate = value; 376 let low_surrogate = self.read_hex_escape(4, false)?; 377 let high_value = (high_surrogate - 0xD800) << 10; 378 let low_value = low_surrogate - 0xDC00; 379 value = high_value + low_value + 0x10000; 380 } else if first && (0xDC00..=0xDFFF).contains(&value) { 381 return Err(PrefReaderError::new( 382 "Lone low surrogate".into(), 383 self.position, 384 None, 385 )); 386 } else if !first && !(0xDC00..=0xDFFF).contains(&value) { 387 return Err(PrefReaderError::new( 388 "Invalid low surrogate in surrogate pair".into(), 389 self.position, 390 None, 391 )); 392 } 393 Ok(value) 394 } 395 396 fn get_match(&mut self, target: &str, separators: &str) -> bool { 397 let initial_pos = self.pos; 398 let mut matched = true; 399 for c in target.chars() { 400 if self.cur == Some(c) { 401 self.get_char(); 402 } else { 403 matched = false; 404 break; 405 } 406 } 407 408 if !matched { 409 for _ in 0..(self.pos - initial_pos) { 410 self.unget_char(); 411 } 412 } else { 413 // Check that the next character is whitespace or a separator 414 if let Some(c) = self.cur { 415 if !(PrefTokenizer::is_space(c) || separators.contains(c) || c == '/') { 416 matched = false; 417 } 418 self.unget_char(); 419 } 420 // Otherwise the token was followed by EOF. That's a valid match, but 421 // will presumably cause a parse error later. 422 } 423 424 matched 425 } 426 427 fn next_token(&mut self) -> Result<Option<TokenData<'a>>, PrefReaderError> { 428 let mut token_data = TokenData::new(TokenType::None, Position::new(), 0); 429 430 loop { 431 let mut c = match self.get_char() { 432 Some(x) => x, 433 None => return Ok(None), 434 }; 435 436 self.state = match self.state { 437 TokenizerState::Junk => { 438 c = match self.skip_whitespace() { 439 Some(x) => x, 440 None => return Ok(None), 441 }; 442 match c { 443 '/' => TokenizerState::CommentStart, 444 '#' => { 445 token_data.start(self, TokenType::CommentBashLine); 446 token_data.start_pos = self.pos + 1; 447 TokenizerState::CommentLine 448 } 449 _ => { 450 self.unget_char(); 451 let next = match self.next_state { 452 Some(x) => x, 453 None => { 454 return Err(PrefReaderError::new( 455 "In Junk state without a next state defined".into(), 456 self.position, 457 None, 458 )) 459 } 460 }; 461 self.next_state = None; 462 next 463 } 464 } 465 } 466 TokenizerState::CommentStart => match c { 467 '*' => { 468 token_data.start(self, TokenType::CommentBlock); 469 token_data.start_pos = self.pos + 1; 470 TokenizerState::CommentBlock 471 } 472 '/' => { 473 token_data.start(self, TokenType::CommentLine); 474 token_data.start_pos = self.pos + 1; 475 TokenizerState::CommentLine 476 } 477 _ => { 478 return Err(PrefReaderError::new( 479 "Invalid character after /".into(), 480 self.position, 481 None, 482 )) 483 } 484 }, 485 TokenizerState::CommentLine => match c { 486 '\n' => { 487 token_data.end(self.data, self.pos)?; 488 TokenizerState::Junk 489 } 490 _ => TokenizerState::CommentLine, 491 }, 492 TokenizerState::CommentBlock => match c { 493 '*' => { 494 if self.get_char() == Some('/') { 495 token_data.end(self.data, self.pos - 1)?; 496 TokenizerState::Junk 497 } else { 498 TokenizerState::CommentBlock 499 } 500 } 501 _ => TokenizerState::CommentBlock, 502 }, 503 TokenizerState::FunctionName => { 504 let position = self.position; 505 let start_pos = self.pos; 506 match c { 507 'u' => { 508 if self.get_match("user_pref", "(") { 509 token_data.start(self, TokenType::UserPrefFunction); 510 } 511 } 512 's' => { 513 if self.get_match("sticky_pref", "(") { 514 token_data.start(self, TokenType::StickyPrefFunction); 515 } 516 } 517 'p' => { 518 if self.get_match("pref", "(") { 519 token_data.start(self, TokenType::PrefFunction); 520 } 521 } 522 _ => {} 523 }; 524 if token_data.token_type == TokenType::None { 525 // We didn't match anything 526 return Err(PrefReaderError::new( 527 "Expected a pref function name".into(), 528 position, 529 None, 530 )); 531 } else { 532 token_data.start_pos = start_pos; 533 token_data.position = position; 534 token_data.end(self.data, self.pos + 1)?; 535 self.next_state = Some(TokenizerState::AfterFunctionName); 536 TokenizerState::Junk 537 } 538 } 539 TokenizerState::AfterFunctionName => match c { 540 '(' => { 541 self.next_state = Some(TokenizerState::FunctionArgs); 542 token_data.start(self, TokenType::Paren); 543 token_data.end(self.data, self.pos + 1)?; 544 self.next_state = Some(TokenizerState::FunctionArgs); 545 TokenizerState::Junk 546 } 547 _ => { 548 return Err(PrefReaderError::new( 549 "Expected an opening paren".into(), 550 self.position, 551 None, 552 )) 553 } 554 }, 555 TokenizerState::FunctionArgs => match c { 556 ')' => { 557 token_data.start(self, TokenType::Paren); 558 token_data.end(self.data, self.pos + 1)?; 559 self.next_state = Some(TokenizerState::AfterFunction); 560 TokenizerState::Junk 561 } 562 _ => { 563 self.unget_char(); 564 TokenizerState::FunctionArg 565 } 566 }, 567 TokenizerState::FunctionArg => match c { 568 '"' => { 569 token_data.start(self, TokenType::String); 570 token_data.start_pos = self.pos + 1; 571 TokenizerState::DoubleQuotedString 572 } 573 '\'' => { 574 token_data.start(self, TokenType::String); 575 token_data.start_pos = self.pos + 1; 576 TokenizerState::SingleQuotedString 577 } 578 't' | 'f' => { 579 self.unget_char(); 580 TokenizerState::Bool 581 } 582 '0'..='9' | '-' | '+' => { 583 token_data.start(self, TokenType::Int); 584 TokenizerState::Number 585 } 586 _ => { 587 return Err(PrefReaderError::new( 588 "Invalid character at start of function argument".into(), 589 self.position, 590 None, 591 )) 592 } 593 }, 594 TokenizerState::DoubleQuotedString => match c { 595 '"' => { 596 token_data.end(self.data, self.pos)?; 597 self.next_state = Some(TokenizerState::AfterFunctionArg); 598 TokenizerState::Junk 599 } 600 '\n' => { 601 return Err(PrefReaderError::new( 602 "EOL in double quoted string".into(), 603 self.position, 604 None, 605 )) 606 } 607 '\\' => { 608 self.consume_escape(&mut token_data)?; 609 TokenizerState::DoubleQuotedString 610 } 611 _ => TokenizerState::DoubleQuotedString, 612 }, 613 TokenizerState::SingleQuotedString => match c { 614 '\'' => { 615 token_data.end(self.data, self.pos)?; 616 self.next_state = Some(TokenizerState::AfterFunctionArg); 617 TokenizerState::Junk 618 } 619 '\n' => { 620 return Err(PrefReaderError::new( 621 "EOL in single quoted string".into(), 622 self.position, 623 None, 624 )) 625 } 626 '\\' => { 627 self.consume_escape(&mut token_data)?; 628 TokenizerState::SingleQuotedString 629 } 630 _ => TokenizerState::SingleQuotedString, 631 }, 632 TokenizerState::Number => match c { 633 '0'..='9' => TokenizerState::Number, 634 ')' | ',' => { 635 token_data.end(self.data, self.pos)?; 636 self.unget_char(); 637 self.next_state = Some(TokenizerState::AfterFunctionArg); 638 TokenizerState::Junk 639 } 640 x if PrefTokenizer::is_space(x) => { 641 token_data.end(self.data, self.pos)?; 642 self.next_state = Some(TokenizerState::AfterFunctionArg); 643 TokenizerState::Junk 644 } 645 _ => { 646 return Err(PrefReaderError::new( 647 "Invalid character in number literal".into(), 648 self.position, 649 None, 650 )) 651 } 652 }, 653 TokenizerState::Bool => { 654 let start_pos = self.pos; 655 let position = self.position; 656 match c { 657 't' => { 658 if self.get_match("true", ",)") { 659 token_data.start(self, TokenType::Bool) 660 } 661 } 662 'f' => { 663 if self.get_match("false", ",)") { 664 token_data.start(self, TokenType::Bool) 665 } 666 } 667 _ => {} 668 }; 669 if token_data.token_type == TokenType::None { 670 return Err(PrefReaderError::new( 671 "Unexpected characters in function argument".into(), 672 position, 673 None, 674 )); 675 } else { 676 token_data.start_pos = start_pos; 677 token_data.position = position; 678 token_data.end(self.data, self.pos + 1)?; 679 self.next_state = Some(TokenizerState::AfterFunctionArg); 680 TokenizerState::Junk 681 } 682 } 683 TokenizerState::AfterFunctionArg => match c { 684 ',' => { 685 token_data.start(self, TokenType::Comma); 686 token_data.end(self.data, self.pos + 1)?; 687 self.next_state = Some(TokenizerState::FunctionArg); 688 TokenizerState::Junk 689 } 690 ')' => { 691 token_data.start(self, TokenType::Paren); 692 token_data.end(self.data, self.pos + 1)?; 693 self.next_state = Some(TokenizerState::AfterFunction); 694 TokenizerState::Junk 695 } 696 _ => { 697 return Err(PrefReaderError::new( 698 "Unexpected character after function argument".into(), 699 self.position, 700 None, 701 )) 702 } 703 }, 704 TokenizerState::AfterFunction => match c { 705 ';' => { 706 token_data.start(self, TokenType::Semicolon); 707 token_data.end(self.data, self.pos)?; 708 self.next_state = Some(TokenizerState::FunctionName); 709 TokenizerState::Junk 710 } 711 _ => { 712 return Err(PrefReaderError::new( 713 "Unexpected character after function".into(), 714 self.position, 715 None, 716 )) 717 } 718 }, 719 TokenizerState::Error => TokenizerState::Error, 720 }; 721 if token_data.complete { 722 return Ok(Some(token_data)); 723 } 724 } 725 } 726 } 727 728 impl<'a> Iterator for PrefTokenizer<'a> { 729 type Item = PrefToken<'a>; 730 731 fn next(&mut self) -> Option<PrefToken<'a>> { 732 if let TokenizerState::Error = self.state { 733 return None; 734 } 735 let token_data = match self.next_token() { 736 Err(e) => { 737 self.state = TokenizerState::Error; 738 return Some(PrefToken::Error(e.message.clone(), e.position)); 739 } 740 Ok(Some(token_data)) => token_data, 741 Ok(None) => return None, 742 }; 743 let token = self.make_token(token_data); 744 Some(token) 745 } 746 } 747 748 pub fn tokenize(data: &[u8]) -> PrefTokenizer<'_> { 749 PrefTokenizer::new(data) 750 } 751 752 pub fn serialize_token<T: Write>(token: &PrefToken, output: &mut T) -> Result<(), PrefReaderError> { 753 let mut data_buf = String::new(); 754 755 let data = match *token { 756 PrefToken::PrefFunction(_) => "pref", 757 PrefToken::UserPrefFunction(_) => "user_pref", 758 PrefToken::StickyPrefFunction(_) => "sticky_pref", 759 PrefToken::CommentBlock(ref data, _) => { 760 data_buf.reserve(data.len() + 4); 761 data_buf.push_str("/*"); 762 data_buf.push_str(data.borrow()); 763 data_buf.push('*'); 764 &*data_buf 765 } 766 PrefToken::CommentLine(ref data, _) => { 767 data_buf.reserve(data.len() + 2); 768 data_buf.push_str("//"); 769 data_buf.push_str(data.borrow()); 770 &*data_buf 771 } 772 PrefToken::CommentBashLine(ref data, _) => { 773 data_buf.reserve(data.len() + 1); 774 data_buf.push('#'); 775 data_buf.push_str(data.borrow()); 776 &*data_buf 777 } 778 PrefToken::Paren(data, _) => { 779 data_buf.push(data); 780 &*data_buf 781 } 782 PrefToken::Comma(_) => ",", 783 PrefToken::Semicolon(_) => ";\n", 784 PrefToken::String(ref data, _) => { 785 data_buf.reserve(data.len() + 2); 786 data_buf.push('"'); 787 data_buf.push_str(escape_quote(data.borrow()).borrow()); 788 data_buf.push('"'); 789 &*data_buf 790 } 791 PrefToken::Int(data, _) => { 792 data_buf.push_str(&data.to_string()); 793 &*data_buf 794 } 795 PrefToken::Bool(data, _) => { 796 if data { 797 "true" 798 } else { 799 "false" 800 } 801 } 802 PrefToken::Error(ref data, pos) => { 803 return Err(PrefReaderError::new(data.clone(), pos, None)) 804 } 805 }; 806 output.write_all(data.as_bytes())?; 807 Ok(()) 808 } 809 810 pub fn serialize_tokens<'a, I, W>(tokens: I, output: &mut W) -> Result<(), PrefReaderError> 811 where 812 I: Iterator<Item = &'a PrefToken<'a>>, 813 W: Write, 814 { 815 for token in tokens { 816 serialize_token(token, output)?; 817 } 818 Ok(()) 819 } 820 821 fn escape_quote(data: &str) -> Cow<'_, str> { 822 // Not very efficient… 823 if data.contains('"') || data.contains('\\') { 824 Cow::Owned(data.replace('\\', r"\\").replace('"', r#"\""#)) 825 } else { 826 Cow::Borrowed(data) 827 } 828 } 829 830 #[derive(Debug, PartialEq)] 831 enum ParserState { 832 Function, 833 Key, 834 Value, 835 } 836 837 struct PrefBuilder { 838 key: Option<String>, 839 value: Option<PrefValue>, 840 sticky: bool, 841 } 842 843 impl PrefBuilder { 844 fn new() -> PrefBuilder { 845 PrefBuilder { 846 key: None, 847 value: None, 848 sticky: false, 849 } 850 } 851 } 852 853 fn skip_comments<'a>(tokenizer: &mut PrefTokenizer<'a>) -> Option<PrefToken<'a>> { 854 loop { 855 match tokenizer.next() { 856 Some(PrefToken::CommentBashLine(_, _)) 857 | Some(PrefToken::CommentBlock(_, _)) 858 | Some(PrefToken::CommentLine(_, _)) => {} 859 Some(x) => return Some(x), 860 None => return None, 861 } 862 } 863 } 864 865 pub fn parse_tokens(tokenizer: &mut PrefTokenizer<'_>) -> Result<Preferences, PrefReaderError> { 866 let mut state = ParserState::Function; 867 let mut current_pref = PrefBuilder::new(); 868 let mut rv = Preferences::new(); 869 870 loop { 871 // Not just using a for loop here seems strange, but this restricts the 872 // scope of the borrow 873 let token = { 874 match tokenizer.next() { 875 Some(x) => x, 876 None => break, 877 } 878 }; 879 // First deal with comments and errors 880 match token { 881 PrefToken::Error(msg, position) => { 882 return Err(PrefReaderError::new(msg, position, None)); 883 } 884 PrefToken::CommentBashLine(_, _) 885 | PrefToken::CommentLine(_, _) 886 | PrefToken::CommentBlock(_, _) => continue, 887 _ => {} 888 } 889 state = match state { 890 ParserState::Function => { 891 match token { 892 PrefToken::PrefFunction(_) => { 893 current_pref.sticky = false; 894 } 895 PrefToken::UserPrefFunction(_) => { 896 current_pref.sticky = false; 897 } 898 PrefToken::StickyPrefFunction(_) => { 899 current_pref.sticky = true; 900 } 901 _ => { 902 return Err(PrefReaderError::new( 903 "Expected pref function".into(), 904 token.position(), 905 None, 906 )); 907 } 908 } 909 let next = skip_comments(tokenizer); 910 match next { 911 Some(PrefToken::Paren('(', _)) => ParserState::Key, 912 _ => { 913 return Err(PrefReaderError::new( 914 "Expected open paren".into(), 915 next.map(|x| x.position()).unwrap_or(tokenizer.position), 916 None, 917 )) 918 } 919 } 920 } 921 ParserState::Key => { 922 match token { 923 PrefToken::String(data, _) => current_pref.key = Some(data.into_owned()), 924 _ => { 925 return Err(PrefReaderError::new( 926 "Expected string".into(), 927 token.position(), 928 None, 929 )); 930 } 931 } 932 let next = skip_comments(tokenizer); 933 match next { 934 Some(PrefToken::Comma(_)) => ParserState::Value, 935 _ => { 936 return Err(PrefReaderError::new( 937 "Expected comma".into(), 938 next.map(|x| x.position()).unwrap_or(tokenizer.position), 939 None, 940 )) 941 } 942 } 943 } 944 ParserState::Value => { 945 match token { 946 PrefToken::String(data, _) => { 947 current_pref.value = Some(PrefValue::String(data.into_owned())) 948 } 949 PrefToken::Int(data, _) => current_pref.value = Some(PrefValue::Int(data)), 950 PrefToken::Bool(data, _) => current_pref.value = Some(PrefValue::Bool(data)), 951 _ => { 952 return Err(PrefReaderError::new( 953 "Expected value".into(), 954 token.position(), 955 None, 956 )) 957 } 958 } 959 let next = skip_comments(tokenizer); 960 match next { 961 Some(PrefToken::Paren(')', _)) => {} 962 _ => { 963 return Err(PrefReaderError::new( 964 "Expected close paren".into(), 965 next.map(|x| x.position()).unwrap_or(tokenizer.position), 966 None, 967 )) 968 } 969 } 970 let next = skip_comments(tokenizer); 971 match next { 972 Some(PrefToken::Semicolon(_)) | None => {} 973 _ => { 974 return Err(PrefReaderError::new( 975 "Expected semicolon".into(), 976 next.map(|x| x.position()).unwrap_or(tokenizer.position), 977 None, 978 )) 979 } 980 } 981 let key = current_pref.key.take(); 982 let value = current_pref.value.take(); 983 let pref = if current_pref.sticky { 984 Pref::new_sticky(value.unwrap()) 985 } else { 986 Pref::new(value.unwrap()) 987 }; 988 rv.insert(key.unwrap(), pref); 989 current_pref.sticky = false; 990 ParserState::Function 991 } 992 } 993 } 994 match state { 995 ParserState::Key | ParserState::Value => { 996 return Err(PrefReaderError::new( 997 "EOF in middle of function".into(), 998 tokenizer.position, 999 None, 1000 )); 1001 } 1002 _ => {} 1003 } 1004 Ok(rv) 1005 } 1006 1007 pub fn serialize<W: Write>(prefs: &Preferences, output: &mut W) -> io::Result<()> { 1008 let mut p: Vec<_> = prefs.iter().collect(); 1009 p.sort_by(|a, b| a.0.cmp(b.0)); 1010 for &(key, pref) in &p { 1011 let func = if pref.sticky { 1012 "sticky_pref(" 1013 } else { 1014 "user_pref(" 1015 } 1016 .as_bytes(); 1017 output.write_all(func)?; 1018 output.write_all(b"\"")?; 1019 output.write_all(escape_quote(key).as_bytes())?; 1020 output.write_all(b"\"")?; 1021 output.write_all(b", ")?; 1022 match pref.value { 1023 PrefValue::Bool(x) => { 1024 output.write_all(if x { b"true" } else { b"false" })?; 1025 } 1026 PrefValue::Int(x) => { 1027 output.write_all(x.to_string().as_bytes())?; 1028 } 1029 PrefValue::String(ref x) => { 1030 output.write_all(b"\"")?; 1031 output.write_all(escape_quote(x).as_bytes())?; 1032 output.write_all(b"\"")?; 1033 } 1034 }; 1035 output.write_all(b");\n")?; 1036 } 1037 Ok(()) 1038 } 1039 1040 pub fn parse(data: &[u8]) -> Result<Preferences, PrefReaderError> { 1041 let mut tokenizer = tokenize(data); 1042 parse_tokens(&mut tokenizer) 1043 }