1use crate::{
10 error::{Error, ErrorKind},
11 stream::{Event, OwnedEvent},
12 Integer,
13};
14use std::io::Read;
15
16pub struct AsciiReader<R: Read> {
17 reader: R,
18 current_pos: u64,
19
20 peeked_char: Option<u8>,
22
23 current_char: Option<u8>,
24}
25
26impl<R: Read> AsciiReader<R> {
27 pub fn new(reader: R) -> Self {
28 Self {
29 reader,
30 current_pos: 0,
31 peeked_char: None,
32 current_char: None,
33 }
34 }
35
36 pub fn into_inner(self) -> R {
37 self.reader
38 }
39
40 fn error(&self, kind: ErrorKind) -> Error {
41 kind.with_byte_offset(self.current_pos)
42 }
43
44 fn read_one(&mut self) -> Result<Option<u8>, Error> {
45 let mut buf: [u8; 1] = [0; 1];
46 match self.reader.read_exact(&mut buf) {
47 Ok(()) => Ok(Some(buf[0])),
48 Err(err) => {
49 if err.kind() == std::io::ErrorKind::UnexpectedEof {
50 Ok(None)
51 } else {
52 Err(self.error(ErrorKind::Io(err)))
53 }
54 }
55 }
56 }
57
58 fn advance(&mut self) -> Result<Option<u8>, Error> {
61 self.current_char = self.peeked_char;
62 self.peeked_char = self.read_one()?;
63
64 if self.current_pos == 0 {
67 self.current_char = self.peeked_char;
68 self.peeked_char = self.read_one()?;
69 }
70
71 if self.current_char.is_some() {
72 self.current_pos += 1;
73 }
74
75 Ok(self.current_char)
76 }
77
78 fn unquoted_string_literal(&mut self, first: u8) -> Result<Option<OwnedEvent>, Error> {
90 let mut acc: Vec<u8> = Vec::new();
91 acc.push(first);
92
93 while {
94 match self.peeked_char {
95 Some(c) => {
96 c != b' ' && c != b')' && c != b'\r' && c != b'\t' && c != b';' && c != b','
97 }
98 None => false,
99 }
100 } {
101 self.advance()?;
103 match self.current_char {
104 Some(c) => acc.push(c),
105 None => return Err(self.error(ErrorKind::UnclosedString)),
106 };
107 }
108
109 let string_literal =
110 String::from_utf8(acc).map_err(|_e| self.error(ErrorKind::InvalidUtf8AsciiStream))?;
111
112 match Integer::from_str(&string_literal) {
114 Ok(i) => Ok(Some(Event::Integer(i))),
115 Err(_) => Ok(Some(Event::String(string_literal.into()))),
116 }
117 }
118
119 fn utf16_escape(&mut self) -> Result<String, Error> {
133 let mut code_units: &mut [u16] = &mut [0u16; 2];
134
135 let Some(code_unit) = self.utf16_code_unit()? else {
136 return Err(self.error(ErrorKind::InvalidUtf16String));
137 };
138
139 code_units[0] = code_unit;
140
141 if !matches!(code_unit, 0xD800..=0xDFFF) {
144 code_units = &mut code_units[0..1];
145 } else {
146 self.advance_quoted_string()?;
147
148 if self.current_char != Some(b'\\')
149 || !matches!(self.peeked_char, Some(b'u') | Some(b'U'))
150 {
151 return Err(self.error(ErrorKind::InvalidUtf16String));
152 }
153
154 self.advance_quoted_string()?;
155
156 if let Some(code_unit) = self.utf16_code_unit()? {
157 code_units[1] = code_unit;
158 }
159 }
160
161 let utf8 = String::from_utf16(code_units)
162 .map_err(|_| self.error(ErrorKind::InvalidUtf16String))?;
163
164 Ok(utf8)
165 }
166
167 fn utf16_code_unit(&mut self) -> Result<Option<u16>, Error> {
170 let hex_chars = [
171 self.advance_quoted_string()?,
172 self.advance_quoted_string()?,
173 self.advance_quoted_string()?,
174 self.advance_quoted_string()?,
175 ];
176
177 let hex_str = std::str::from_utf8(&hex_chars)
178 .map_err(|_| self.error(ErrorKind::InvalidUtf16String))?;
179
180 let code_unit = u16::from_str_radix(hex_str, 16)
181 .map_err(|_| self.error(ErrorKind::InvalidUtf16String))?;
182
183 Ok(Some(code_unit))
184 }
185
186 #[inline]
187 fn advance_quoted_string(&mut self) -> Result<u8, Error> {
188 match self.advance()? {
189 Some(c) => Ok(c),
190 None => Err(self.error(ErrorKind::UnclosedString)),
191 }
192 }
193
194 fn quoted_string_literal(&mut self, quote: u8) -> Result<Option<OwnedEvent>, Error> {
195 let mut acc = String::new();
196
197 loop {
198 let c = self.advance_quoted_string()?;
199
200 if c == quote {
201 return Ok(Some(Event::String(acc.into())));
202 }
203
204 let replacement = if c == b'\\' {
205 let c = self.advance_quoted_string()?;
206
207 match c {
208 b'\\' | b'"' => c as char,
209 b'a' => '\u{7}',
210 b'b' => '\u{8}',
211 b'f' => '\u{c}',
212 b'n' => '\n',
213 b'r' => '\r',
214 b't' => '\t',
215 b'U' => {
216 let utf8 = self.utf16_escape()?;
217 acc.push_str(utf8.as_str());
218 continue;
219 }
220 b'v' => '\u{b}',
221 b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' => {
222 let value = [
223 c,
224 self.advance_quoted_string()?,
225 self.advance_quoted_string()?,
226 ];
227
228 let value = std::str::from_utf8(&value)
229 .map_err(|_| self.error(ErrorKind::InvalidOctalString))?;
230
231 let value = u16::from_str_radix(value, 8)
232 .map_err(|_| self.error(ErrorKind::InvalidOctalString))?
233 as u32;
234
235 let value = char::from_u32(value)
236 .ok_or(self.error(ErrorKind::InvalidOctalString))?;
237
238 map_next_step_to_unicode(value)
239 }
240 _ => return Err(self.error(ErrorKind::InvalidUtf8AsciiStream)),
241 }
242 } else {
243 c as char
244 };
245
246 acc.push(replacement);
247 }
248 }
249
250 fn line_comment(&mut self) -> Result<(), Error> {
251 while {
255 match self.peeked_char {
256 Some(c) => c != b'\n',
257 None => false,
258 }
259 } {
260 let _ = self.advance()?;
261 }
262
263 Ok(())
264 }
265
266 fn block_comment(&mut self) -> Result<(), Error> {
267 let mut latest_consume = b' ';
268 while {
269 latest_consume != b'*'
270 || match self.advance()? {
271 Some(c) => c != b'/',
272 None => false,
273 }
274 } {
275 latest_consume = self
276 .advance()?
277 .ok_or(self.error(ErrorKind::IncompleteComment))?;
278 }
279
280 Ok(())
281 }
282
283 fn potential_comment(&mut self) -> Result<Option<OwnedEvent>, Error> {
287 match self.peeked_char {
288 Some(c) => match c {
289 b'/' => self.line_comment().map(|_| None),
290 b'*' => self.block_comment().map(|_| None),
291 _ => self.unquoted_string_literal(c),
292 },
293 None => Err(self.error(ErrorKind::IncompleteComment)),
295 }
296 }
297
298 fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> {
305 while let Some(c) = self.advance()? {
306 match c {
307 b'(' => return Ok(Some(Event::StartArray(None))),
309 b')' => return Ok(Some(Event::EndCollection)),
310 b'{' => return Ok(Some(Event::StartDictionary(None))),
311 b'}' => return Ok(Some(Event::EndCollection)),
312 b'\'' | b'"' => return self.quoted_string_literal(c),
313 b'/' => {
314 match self.potential_comment() {
315 Ok(Some(event)) => return Ok(Some(event)),
316 Ok(None) => { }
317 Err(e) => return Err(e),
318 }
319 }
320 b',' | b';' | b'=' => { }
321 b' ' | b'\r' | b'\t' | b'\n' => { }
322 _ => return self.unquoted_string_literal(c),
323 }
324 }
325
326 Ok(None)
327 }
328}
329
330impl<R: Read> Iterator for AsciiReader<R> {
331 type Item = Result<OwnedEvent, Error>;
332
333 fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
334 self.read_next().transpose()
335 }
336}
337
338fn map_next_step_to_unicode(c: char) -> char {
342 const NEXT_UNICODE_MAPPING: &[char] = &[
343 '\u{A0}', '\u{C0}', '\u{C1}', '\u{C2}', '\u{C3}', '\u{C4}', '\u{C5}', '\u{C7}', '\u{C8}',
344 '\u{C9}', '\u{CA}', '\u{CB}', '\u{CC}', '\u{CD}', '\u{CE}', '\u{CF}', '\u{D0}', '\u{D1}',
345 '\u{D2}', '\u{D3}', '\u{D4}', '\u{D5}', '\u{D6}', '\u{D9}', '\u{DA}', '\u{DB}', '\u{DC}',
346 '\u{DD}', '\u{DE}', '\u{B5}', '\u{D7}', '\u{F7}', '\u{A9}', '\u{A1}', '\u{A2}', '\u{A3}',
347 '\u{2044}', '\u{A5}', '\u{192}', '\u{A7}', '\u{A4}', '\u{2019}', '\u{201C}', '\u{AB}',
348 '\u{2039}', '\u{203A}', '\u{FB01}', '\u{FB02}', '\u{AE}', '\u{2013}', '\u{2020}',
349 '\u{2021}', '\u{B7}', '\u{A6}', '\u{B6}', '\u{2022}', '\u{201A}', '\u{201E}', '\u{201D}',
350 '\u{BB}', '\u{2026}', '\u{2030}', '\u{AC}', '\u{BF}', '\u{B9}', '\u{2CB}', '\u{B4}',
351 '\u{2C6}', '\u{2DC}', '\u{AF}', '\u{2D8}', '\u{2D9}', '\u{A8}', '\u{B2}', '\u{2DA}',
352 '\u{B8}', '\u{B3}', '\u{2DD}', '\u{2DB}', '\u{2C7}', '\u{2014}', '\u{B1}', '\u{BC}',
353 '\u{BD}', '\u{BE}', '\u{E0}', '\u{E1}', '\u{E2}', '\u{E3}', '\u{E4}', '\u{E5}', '\u{E7}',
354 '\u{E8}', '\u{E9}', '\u{EA}', '\u{EB}', '\u{EC}', '\u{C6}', '\u{ED}', '\u{AA}', '\u{EE}',
355 '\u{EF}', '\u{F0}', '\u{F1}', '\u{141}', '\u{D8}', '\u{152}', '\u{BA}', '\u{F2}', '\u{F3}',
356 '\u{F4}', '\u{F5}', '\u{F6}', '\u{E6}', '\u{F9}', '\u{FA}', '\u{FB}', '\u{131}', '\u{FC}',
357 '\u{FD}', '\u{142}', '\u{F8}', '\u{153}', '\u{DF}', '\u{FE}', '\u{FF}', '\u{FFFD}',
358 '\u{FFFD}',
359 ];
360
361 let index = c as usize;
362
363 if index < 128 || index > 0xff {
364 return c;
365 }
366
367 NEXT_UNICODE_MAPPING[index - 128]
368}
369
370#[cfg(test)]
371mod tests {
372 use std::{fs::File, io::Cursor};
373
374 use super::*;
375 use crate::stream::Event::*;
376
377 #[test]
378 fn empty_test() {
379 let plist = "".to_owned();
380 let cursor = Cursor::new(plist.as_bytes());
381 let streaming_parser = AsciiReader::new(cursor);
382 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
383 assert_eq!(events, &[]);
384 }
385
386 #[test]
387 fn streaming_sample() {
388 let reader = File::open("./tests/data/ascii-sample.plist").unwrap();
389 let streaming_parser = AsciiReader::new(reader);
390 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
391
392 let comparison = &[
393 StartDictionary(None),
394 String("KeyName1".into()),
395 String("Value1".into()),
396 String("AnotherKeyName".into()),
397 String("Value2".into()),
398 String("Something".into()),
399 StartArray(None),
400 String("ArrayItem1".into()),
401 String("ArrayItem2".into()),
402 String("ArrayItem3".into()),
403 EndCollection,
404 String("Key4".into()),
405 String("0.10".into()),
406 String("KeyFive".into()),
407 StartDictionary(None),
408 String("Dictionary2Key1".into()),
409 String("Something".into()),
410 String("AnotherKey".into()),
411 String("Somethingelse".into()),
412 EndCollection,
413 EndCollection,
414 ];
415
416 assert_eq!(events, comparison);
417 }
418
419 #[test]
420 fn utf8_strings() {
421 let plist = "{ names = (Léa, François, Żaklina, 王芳); }".to_owned();
422 let cursor = Cursor::new(plist.as_bytes());
423 let streaming_parser = AsciiReader::new(cursor);
424 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
425
426 let comparison = &[
427 StartDictionary(None),
428 String("names".into()),
429 StartArray(None),
430 String("Léa".into()),
431 String("François".into()),
432 String("Żaklina".into()),
433 String("王芳".into()),
434 EndCollection,
435 EndCollection,
436 ];
437
438 assert_eq!(events, comparison);
439 }
440
441 #[test]
442 fn invalid_utf16_escapes() {
443 let plist = br#"{
444 key1 = "\U123";
445 key2 = "\UD83D";
446 key3 = "\u0080";
447 }"#;
448 let cursor = Cursor::new(plist);
449 let streaming_parser = AsciiReader::new(cursor);
450 let events: Vec<Result<Event, Error>> = streaming_parser.collect();
451
452 assert!(events[2].is_err());
454 assert!(events[4].is_err());
456 assert!(events[6].is_err());
458 }
459
460 #[test]
461 fn invalid_octal_escapes() {
462 let plist = br#"{
463 key1 = "\1";
464 key2 = "\12";
465 }"#;
466 let cursor = Cursor::new(plist);
467 let streaming_parser = AsciiReader::new(cursor);
468 let events: Vec<Result<Event, Error>> = streaming_parser.collect();
469
470 assert!(events[2].is_err());
472 assert!(events[4].is_err());
474 }
475
476 #[test]
477 fn escaped_sequences_in_strings() {
478 let plist = br#"{
479 key1 = "va\"lue";
480 key2 = 'va"lue';
481 key3 = "va\a\b\f\n\r\t\v\"\nlue";
482 key4 = "a\012b";
483 key5 = "\\UD83D\\UDCA9";
484 key6 = "\UD83D\UDCA9";
485 key7 = "\U0080";
486 key8 = "\200\377";
487 }"#;
488 let cursor = Cursor::new(plist);
489 let streaming_parser = AsciiReader::new(cursor);
490 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
491
492 let comparison = &[
493 StartDictionary(None),
494 String("key1".into()),
495 String(r#"va"lue"#.into()),
496 String("key2".into()),
497 String(r#"va"lue"#.into()),
498 String("key3".into()),
499 String("va\u{7}\u{8}\u{c}\n\r\t\u{b}\"\nlue".into()),
500 String("key4".into()),
501 String("a\nb".into()),
502 String("key5".into()),
503 String("\\UD83D\\UDCA9".into()),
504 String("key6".into()),
505 String("💩".into()),
506 String("key7".into()),
507 String("\u{80}".into()),
508 String("key8".into()),
509 String("\u{a0}\u{fffd}".into()),
510 EndCollection,
511 ];
512
513 assert_eq!(events, comparison);
514 }
515
516 #[test]
517 fn integers_and_strings() {
518 let plist = "{ name = James, age = 42 }".to_owned();
519 let cursor = Cursor::new(plist.as_bytes());
520 let streaming_parser = AsciiReader::new(cursor);
521 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
522
523 let comparison = &[
524 StartDictionary(None),
525 String("name".into()),
526 String("James".into()),
527 String("age".into()),
528 Integer(42.into()),
529 EndCollection,
530 ];
531
532 assert_eq!(events, comparison);
533 }
534
535 #[test]
536 fn netnewswire_pbxproj() {
537 let reader = File::open("./tests/data/netnewswire.pbxproj").unwrap();
538 let streaming_parser = AsciiReader::new(reader);
539
540 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
542
543 assert!(!events.is_empty());
544 }
545}