plist/stream/
binary_reader.rs

1use std::{
2    io::{self, Read, Seek, SeekFrom},
3    mem::size_of,
4};
5
6use crate::{
7    date::{Date, InfiniteOrNanDate},
8    error::{Error, ErrorKind},
9    stream::{Event, OwnedEvent},
10    u64_to_usize, Uid,
11};
12
13struct StackItem {
14    object_ref: u64,
15    child_object_refs: Vec<u64>,
16    ty: StackType,
17}
18
19enum StackType {
20    Array,
21    Dict,
22}
23
24// https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
25// https://hg.python.org/cpython/file/3.4/Lib/plistlib.py
26pub struct BinaryReader<R> {
27    stack: Vec<StackItem>,
28    object_offsets: Vec<u64>,
29    object_on_stack: Vec<bool>,
30    reader: PosReader<R>,
31    ref_size: u8,
32    root_object: u64,
33    trailer_start_offset: u64,
34}
35
36struct PosReader<R> {
37    reader: R,
38    pos: u64,
39}
40
41impl<R: Read + Seek> PosReader<R> {
42    fn read_all(&mut self, buf: &mut [u8]) -> Result<(), Error> {
43        self.read_exact(buf)
44            .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
45        Ok(())
46    }
47
48    fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
49        self.pos = self
50            .reader
51            .seek(pos)
52            .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
53        Ok(self.pos)
54    }
55}
56
57impl<R: Read> Read for PosReader<R> {
58    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
59        let count = self.reader.read(buf)?;
60        self.pos
61            .checked_add(count as u64)
62            .expect("file cannot be larger than `u64::max_value()` bytes");
63        Ok(count)
64    }
65}
66
67impl<R: Read + Seek> BinaryReader<R> {
68    pub fn new(reader: R) -> BinaryReader<R> {
69        BinaryReader {
70            stack: Vec::new(),
71            object_offsets: Vec::new(),
72            object_on_stack: Vec::new(),
73            reader: PosReader { reader, pos: 0 },
74            ref_size: 0,
75            root_object: 0,
76            trailer_start_offset: 0,
77        }
78    }
79
80    fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error> {
81        // Check we are not reading past the start of the plist trailer
82        let inner = |len: u64, size: usize| {
83            let byte_len = len.checked_mul(size as u64)?;
84            let end_offset = self.reader.pos.checked_add(byte_len)?;
85            if end_offset <= self.trailer_start_offset {
86                Some(())
87            } else {
88                None
89            }
90        };
91        inner(len, size).ok_or_else(|| self.with_pos(ErrorKind::ObjectOffsetTooLarge))?;
92
93        Ok(Vec::with_capacity(len as usize))
94    }
95
96    fn read_trailer(&mut self) -> Result<(), Error> {
97        self.reader.seek(SeekFrom::Start(0))?;
98        let mut magic = [0; 8];
99        self.reader.read_all(&mut magic)?;
100        if &magic != b"bplist00" {
101            return Err(self.with_pos(ErrorKind::InvalidMagic));
102        }
103
104        self.trailer_start_offset = self.reader.seek(SeekFrom::End(-32))?;
105
106        // Trailer starts with 6 bytes of padding
107        let mut zeros = [0; 6];
108        self.reader.read_all(&mut zeros)?;
109
110        let offset_size = self.read_u8()?;
111        match offset_size {
112            1 | 2 | 4 | 8 => (),
113            _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectOffsetSize)),
114        }
115
116        self.ref_size = self.read_u8()?;
117        match self.ref_size {
118            1 | 2 | 4 | 8 => (),
119            _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectReferenceSize)),
120        }
121
122        let num_objects = self.read_be_u64()?;
123        self.root_object = self.read_be_u64()?;
124        let offset_table_offset = self.read_be_u64()?;
125
126        // Read offset table
127        self.reader.seek(SeekFrom::Start(offset_table_offset))?;
128        self.object_offsets = self.read_ints(num_objects, offset_size)?;
129        self.object_on_stack = vec![false; self.object_offsets.len()];
130
131        Ok(())
132    }
133
134    /// Reads a list of `len` big-endian integers of `size` bytes from the reader.
135    fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error> {
136        let mut ints = self.allocate_vec(len, size as usize)?;
137        for _ in 0..len {
138            match size {
139                1 => ints.push(self.read_u8()?.into()),
140                2 => ints.push(self.read_be_u16()?.into()),
141                4 => ints.push(self.read_be_u32()?.into()),
142                8 => ints.push(self.read_be_u64()?),
143                _ => unreachable!("size is either self.ref_size or offset_size both of which are already validated")
144            }
145        }
146        Ok(ints)
147    }
148
149    /// Reads a list of `len` offsets into the object table from the reader.
150    fn read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error> {
151        let ref_size = self.ref_size;
152        self.read_ints(len, ref_size)
153    }
154
155    /// Reads a compressed value length from the reader. `len` must contain the low 4 bits of the
156    /// object token.
157    fn read_object_len(&mut self, len: u8) -> Result<u64, Error> {
158        if (len & 0x0f) == 0x0f {
159            let len_power_of_two = self.read_u8()? & 0x03;
160            Ok(match len_power_of_two {
161                0 => self.read_u8()?.into(),
162                1 => self.read_be_u16()?.into(),
163                2 => self.read_be_u32()?.into(),
164                3 => self.read_be_u64()?,
165                _ => return Err(self.with_pos(ErrorKind::InvalidObjectLength)),
166            })
167        } else {
168            Ok(len.into())
169        }
170    }
171
172    /// Reads `len` bytes from the reader.
173    fn read_data(&mut self, len: u64) -> Result<Vec<u8>, Error> {
174        let mut data = self.allocate_vec(len, size_of::<u8>())?;
175        data.resize(len as usize, 0);
176        self.reader.read_all(&mut data)?;
177        Ok(data)
178    }
179
180    fn seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error> {
181        let object_ref = u64_to_usize(object_ref)
182            .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
183        let offset = *self
184            .object_offsets
185            .get(object_ref)
186            .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
187        if offset >= self.trailer_start_offset {
188            return Err(self.with_pos(ErrorKind::ObjectOffsetTooLarge));
189        }
190        self.reader.seek(SeekFrom::Start(offset))
191    }
192
193    fn push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error> {
194        let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
195        let is_on_stack = &mut self.object_on_stack[object_ref];
196        if *is_on_stack {
197            return Err(self.with_pos(ErrorKind::RecursiveObject));
198        }
199        *is_on_stack = true;
200        self.stack.push(item);
201        Ok(())
202    }
203
204    fn pop_stack_item(&mut self) -> StackItem {
205        let item = self.stack.pop().expect("internal consistency error");
206        let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
207        self.object_on_stack[object_ref] = false;
208        item
209    }
210
211    fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> {
212        let object_ref = if self.ref_size == 0 {
213            // Initialise here rather than in new
214            self.read_trailer()?;
215            self.root_object
216        } else {
217            let maybe_object_ref = if let Some(stack_item) = self.stack.last_mut() {
218                stack_item.child_object_refs.pop()
219            } else {
220                // Finished reading the plist
221                return Ok(None);
222            };
223
224            if let Some(object_ref) = maybe_object_ref {
225                object_ref
226            } else {
227                // We're at the end of an array or dict. Pop the top stack item and return.
228                let stack_item = self.pop_stack_item();
229                match stack_item.ty {
230                    StackType::Array | StackType::Dict => return Ok(Some(Event::EndCollection)),
231                }
232            }
233        };
234
235        self.seek_to_object(object_ref)?;
236
237        let token = self.read_u8()?;
238        let ty = (token & 0xf0) >> 4;
239        let size = token & 0x0f;
240
241        let result = match (ty, size) {
242            (0x0, 0x00) => return Err(self.with_pos(ErrorKind::NullObjectUnimplemented)),
243            (0x0, 0x08) => Some(Event::Boolean(false)),
244            (0x0, 0x09) => Some(Event::Boolean(true)),
245            (0x0, 0x0f) => return Err(self.with_pos(ErrorKind::FillObjectUnimplemented)),
246            (0x1, 0) => Some(Event::Integer(self.read_u8()?.into())),
247            (0x1, 1) => Some(Event::Integer(self.read_be_u16()?.into())),
248            (0x1, 2) => Some(Event::Integer(self.read_be_u32()?.into())),
249            (0x1, 3) => Some(Event::Integer(self.read_be_i64()?.into())),
250            (0x1, 4) => {
251                let value = self.read_be_i128()?;
252                if value < 0 || value > i128::from(u64::max_value()) {
253                    return Err(self.with_pos(ErrorKind::IntegerOutOfRange));
254                }
255                Some(Event::Integer((value as u64).into()))
256            }
257            (0x1, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // variable length int
258            (0x2, 2) => Some(Event::Real(f32::from_bits(self.read_be_u32()?).into())),
259            (0x2, 3) => Some(Event::Real(f64::from_bits(self.read_be_u64()?))),
260            (0x2, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // odd length float
261            (0x3, 3) => {
262                // Date. Seconds since 1/1/2001 00:00:00.
263                let secs = f64::from_bits(self.read_be_u64()?);
264                let date = Date::from_seconds_since_plist_epoch(secs)
265                    .map_err(|InfiniteOrNanDate| self.with_pos(ErrorKind::InfiniteOrNanDate))?;
266                Some(Event::Date(date))
267            }
268            (0x4, n) => {
269                // Data
270                let len = self.read_object_len(n)?;
271                Some(Event::Data(self.read_data(len)?.into()))
272            }
273            (0x5, n) => {
274                // ASCII string
275                let len = self.read_object_len(n)?;
276                let raw = self.read_data(len)?;
277                let string = String::from_utf8(raw)
278                    .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String))?;
279                Some(Event::String(string.into()))
280            }
281            (0x6, n) => {
282                // UTF-16 string
283                let len_utf16_codepoints = self.read_object_len(n)?;
284                let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?;
285
286                for _ in 0..len_utf16_codepoints {
287                    raw_utf16.push(self.read_be_u16()?);
288                }
289
290                let string = String::from_utf16(&raw_utf16)
291                    .map_err(|_| self.with_pos(ErrorKind::InvalidUtf16String))?;
292                Some(Event::String(string.into()))
293            }
294            (0x8, n) if n < 8 => {
295                // Uid
296                let mut buf = [0; 8];
297                // `len_bytes` is at most 8.
298                let len_bytes = n as usize + 1;
299                // Values are stored in big-endian so we must put the least significant bytes at
300                // the end of the buffer.
301                self.reader.read_all(&mut buf[8 - len_bytes..])?;
302                let value = u64::from_be_bytes(buf);
303
304                Some(Event::Uid(Uid::new(value)))
305            }
306            (0xa, n) => {
307                // Array
308                let len = self.read_object_len(n)?;
309                let mut child_object_refs = self.read_refs(len)?;
310                // Reverse so we can pop off the end of the stack in order
311                child_object_refs.reverse();
312
313                self.push_stack_item_and_check_for_recursion(StackItem {
314                    object_ref,
315                    ty: StackType::Array,
316                    child_object_refs,
317                })?;
318
319                Some(Event::StartArray(Some(len)))
320            }
321            (0xd, n) => {
322                // Dict
323                let len = self.read_object_len(n)?;
324                let key_refs = self.read_refs(len)?;
325                let value_refs = self.read_refs(len)?;
326
327                let keys_and_values_len = len
328                    .checked_mul(2)
329                    .ok_or_else(|| self.with_pos(ErrorKind::ObjectTooLarge))?;
330                let mut child_object_refs =
331                    self.allocate_vec(keys_and_values_len, self.ref_size as usize)?;
332                let len = key_refs.len();
333                for i in 1..=len {
334                    // Reverse so we can pop off the end of the stack in order
335                    child_object_refs.push(value_refs[len - i]);
336                    child_object_refs.push(key_refs[len - i]);
337                }
338
339                self.push_stack_item_and_check_for_recursion(StackItem {
340                    object_ref,
341                    ty: StackType::Dict,
342                    child_object_refs,
343                })?;
344
345                Some(Event::StartDictionary(Some(len as u64)))
346            }
347            (_, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))),
348        };
349
350        Ok(result)
351    }
352
353    fn read_u8(&mut self) -> Result<u8, Error> {
354        let mut buf = [0; 1];
355        self.reader.read_all(&mut buf)?;
356        Ok(buf[0])
357    }
358
359    fn read_be_u16(&mut self) -> Result<u16, Error> {
360        let mut buf = [0; 2];
361        self.reader.read_all(&mut buf)?;
362        Ok(u16::from_be_bytes(buf))
363    }
364
365    fn read_be_u32(&mut self) -> Result<u32, Error> {
366        let mut buf = [0; 4];
367        self.reader.read_all(&mut buf)?;
368        Ok(u32::from_be_bytes(buf))
369    }
370
371    fn read_be_u64(&mut self) -> Result<u64, Error> {
372        let mut buf = [0; 8];
373        self.reader.read_all(&mut buf)?;
374        Ok(u64::from_be_bytes(buf))
375    }
376
377    fn read_be_i64(&mut self) -> Result<i64, Error> {
378        let mut buf = [0; 8];
379        self.reader.read_all(&mut buf)?;
380        Ok(i64::from_be_bytes(buf))
381    }
382
383    fn read_be_i128(&mut self) -> Result<i128, Error> {
384        let mut buf = [0; 16];
385        self.reader.read_all(&mut buf)?;
386        Ok(i128::from_be_bytes(buf))
387    }
388
389    fn with_pos(&self, kind: ErrorKind) -> Error {
390        kind.with_byte_offset(self.reader.pos)
391    }
392}
393
394impl<R: Read + Seek> Iterator for BinaryReader<R> {
395    type Item = Result<OwnedEvent, Error>;
396
397    fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
398        match self.read_next() {
399            Ok(Some(event)) => Some(Ok(event)),
400            Err(err) => {
401                // Mark the plist as finished
402                self.stack.clear();
403                Some(Err(err))
404            }
405            Ok(None) => None,
406        }
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use std::fs::File;
413
414    use super::*;
415
416    #[test]
417    fn streaming_parser() {
418        use crate::stream::Event::*;
419
420        let reader = File::open("./tests/data/binary.plist").unwrap();
421        let streaming_parser = BinaryReader::new(reader);
422        let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
423
424        let comparison = &[
425            StartDictionary(Some(13)),
426            String("Author".into()),
427            String("William Shakespeare".into()),
428            String("Birthdate".into()),
429            Date(super::Date::from_xml_format("1981-05-16T11:32:06Z").unwrap()),
430            String("EmptyArray".into()),
431            StartArray(Some(0)),
432            EndCollection,
433            String("IsNotFalse".into()),
434            Boolean(false),
435            String("SmallestNumber".into()),
436            Integer((-9223372036854775808i64).into()),
437            String("EmptyDictionary".into()),
438            StartDictionary(Some(0)),
439            EndCollection,
440            String("Height".into()),
441            Real(1.6),
442            String("Lines".into()),
443            StartArray(Some(2)),
444            String("It is a tale told by an idiot,     ".into()),
445            String("Full of sound and fury, signifying nothing.".into()),
446            EndCollection,
447            String("Death".into()),
448            Integer(1564.into()),
449            String("Blank".into()),
450            String("".into()),
451            String("BiggestNumber".into()),
452            Integer(18446744073709551615u64.into()),
453            String("IsTrue".into()),
454            Boolean(true),
455            String("Data".into()),
456            Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
457            EndCollection,
458        ];
459
460        assert_eq!(events, &comparison[..]);
461    }
462
463    #[test]
464    fn utf16_plist() {
465        let reader = File::open("./tests/data/utf16_bplist.plist").unwrap();
466        let streaming_parser = BinaryReader::new(reader);
467        let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
468
469        assert_eq!(events[2], Event::String("\u{2605} or better".into()));
470
471        let poem = if let Event::String(ref mut poem) = events[4] {
472            poem
473        } else {
474            panic!("not a string")
475        };
476        assert_eq!(poem.len(), 643);
477        assert_eq!(poem.to_mut().pop().unwrap(), '\u{2605}');
478    }
479
480    #[test]
481    fn nskeyedarchiver_plist() {
482        let reader = File::open("./tests/data/binary_NSKeyedArchiver.plist").unwrap();
483        let streaming_parser = BinaryReader::new(reader);
484        let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
485
486        assert_eq!(events[10], Event::Uid(Uid::new(4)));
487        assert_eq!(events[12], Event::Uid(Uid::new(2)));
488        assert_eq!(events[18], Event::Uid(Uid::new(3)));
489        assert_eq!(events[46], Event::Uid(Uid::new(1)));
490    }
491}