1use std::{
2 io::{self, Read, Seek, SeekFrom},
3 mem::size_of,
4};
5
6use crate::{
7 date::{Date, InfiniteOrNanDate},
8 error::{Error, ErrorKind},
9 stream::{Event, OwnedEvent},
10 u64_to_usize, Uid,
11};
12
13struct StackItem {
14 object_ref: u64,
15 child_object_refs: Vec<u64>,
16 ty: StackType,
17}
18
19enum StackType {
20 Array,
21 Dict,
22}
23
24pub struct BinaryReader<R> {
27 stack: Vec<StackItem>,
28 object_offsets: Vec<u64>,
29 object_on_stack: Vec<bool>,
30 reader: PosReader<R>,
31 ref_size: u8,
32 root_object: u64,
33 trailer_start_offset: u64,
34}
35
36struct PosReader<R> {
37 reader: R,
38 pos: u64,
39}
40
41impl<R: Read + Seek> PosReader<R> {
42 fn read_all(&mut self, buf: &mut [u8]) -> Result<(), Error> {
43 self.read_exact(buf)
44 .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
45 Ok(())
46 }
47
48 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
49 self.pos = self
50 .reader
51 .seek(pos)
52 .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?;
53 Ok(self.pos)
54 }
55}
56
57impl<R: Read> Read for PosReader<R> {
58 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
59 let count = self.reader.read(buf)?;
60 self.pos
61 .checked_add(count as u64)
62 .expect("file cannot be larger than `u64::max_value()` bytes");
63 Ok(count)
64 }
65}
66
67impl<R: Read + Seek> BinaryReader<R> {
68 pub fn new(reader: R) -> BinaryReader<R> {
69 BinaryReader {
70 stack: Vec::new(),
71 object_offsets: Vec::new(),
72 object_on_stack: Vec::new(),
73 reader: PosReader { reader, pos: 0 },
74 ref_size: 0,
75 root_object: 0,
76 trailer_start_offset: 0,
77 }
78 }
79
80 fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error> {
81 let inner = |len: u64, size: usize| {
83 let byte_len = len.checked_mul(size as u64)?;
84 let end_offset = self.reader.pos.checked_add(byte_len)?;
85 if end_offset <= self.trailer_start_offset {
86 Some(())
87 } else {
88 None
89 }
90 };
91 inner(len, size).ok_or_else(|| self.with_pos(ErrorKind::ObjectOffsetTooLarge))?;
92
93 Ok(Vec::with_capacity(len as usize))
94 }
95
96 fn read_trailer(&mut self) -> Result<(), Error> {
97 self.reader.seek(SeekFrom::Start(0))?;
98 let mut magic = [0; 8];
99 self.reader.read_all(&mut magic)?;
100 if &magic != b"bplist00" {
101 return Err(self.with_pos(ErrorKind::InvalidMagic));
102 }
103
104 self.trailer_start_offset = self.reader.seek(SeekFrom::End(-32))?;
105
106 let mut zeros = [0; 6];
108 self.reader.read_all(&mut zeros)?;
109
110 let offset_size = self.read_u8()?;
111 match offset_size {
112 1 | 2 | 4 | 8 => (),
113 _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectOffsetSize)),
114 }
115
116 self.ref_size = self.read_u8()?;
117 match self.ref_size {
118 1 | 2 | 4 | 8 => (),
119 _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectReferenceSize)),
120 }
121
122 let num_objects = self.read_be_u64()?;
123 self.root_object = self.read_be_u64()?;
124 let offset_table_offset = self.read_be_u64()?;
125
126 self.reader.seek(SeekFrom::Start(offset_table_offset))?;
128 self.object_offsets = self.read_ints(num_objects, offset_size)?;
129 self.object_on_stack = vec![false; self.object_offsets.len()];
130
131 Ok(())
132 }
133
134 fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error> {
136 let mut ints = self.allocate_vec(len, size as usize)?;
137 for _ in 0..len {
138 match size {
139 1 => ints.push(self.read_u8()?.into()),
140 2 => ints.push(self.read_be_u16()?.into()),
141 4 => ints.push(self.read_be_u32()?.into()),
142 8 => ints.push(self.read_be_u64()?),
143 _ => unreachable!("size is either self.ref_size or offset_size both of which are already validated")
144 }
145 }
146 Ok(ints)
147 }
148
149 fn read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error> {
151 let ref_size = self.ref_size;
152 self.read_ints(len, ref_size)
153 }
154
155 fn read_object_len(&mut self, len: u8) -> Result<u64, Error> {
158 if (len & 0x0f) == 0x0f {
159 let len_power_of_two = self.read_u8()? & 0x03;
160 Ok(match len_power_of_two {
161 0 => self.read_u8()?.into(),
162 1 => self.read_be_u16()?.into(),
163 2 => self.read_be_u32()?.into(),
164 3 => self.read_be_u64()?,
165 _ => return Err(self.with_pos(ErrorKind::InvalidObjectLength)),
166 })
167 } else {
168 Ok(len.into())
169 }
170 }
171
172 fn read_data(&mut self, len: u64) -> Result<Vec<u8>, Error> {
174 let mut data = self.allocate_vec(len, size_of::<u8>())?;
175 data.resize(len as usize, 0);
176 self.reader.read_all(&mut data)?;
177 Ok(data)
178 }
179
180 fn seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error> {
181 let object_ref = u64_to_usize(object_ref)
182 .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
183 let offset = *self
184 .object_offsets
185 .get(object_ref)
186 .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?;
187 if offset >= self.trailer_start_offset {
188 return Err(self.with_pos(ErrorKind::ObjectOffsetTooLarge));
189 }
190 self.reader.seek(SeekFrom::Start(offset))
191 }
192
193 fn push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error> {
194 let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
195 let is_on_stack = &mut self.object_on_stack[object_ref];
196 if *is_on_stack {
197 return Err(self.with_pos(ErrorKind::RecursiveObject));
198 }
199 *is_on_stack = true;
200 self.stack.push(item);
201 Ok(())
202 }
203
204 fn pop_stack_item(&mut self) -> StackItem {
205 let item = self.stack.pop().expect("internal consistency error");
206 let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error");
207 self.object_on_stack[object_ref] = false;
208 item
209 }
210
211 fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> {
212 let object_ref = if self.ref_size == 0 {
213 self.read_trailer()?;
215 self.root_object
216 } else {
217 let maybe_object_ref = if let Some(stack_item) = self.stack.last_mut() {
218 stack_item.child_object_refs.pop()
219 } else {
220 return Ok(None);
222 };
223
224 if let Some(object_ref) = maybe_object_ref {
225 object_ref
226 } else {
227 let stack_item = self.pop_stack_item();
229 match stack_item.ty {
230 StackType::Array | StackType::Dict => return Ok(Some(Event::EndCollection)),
231 }
232 }
233 };
234
235 self.seek_to_object(object_ref)?;
236
237 let token = self.read_u8()?;
238 let ty = (token & 0xf0) >> 4;
239 let size = token & 0x0f;
240
241 let result = match (ty, size) {
242 (0x0, 0x00) => return Err(self.with_pos(ErrorKind::NullObjectUnimplemented)),
243 (0x0, 0x08) => Some(Event::Boolean(false)),
244 (0x0, 0x09) => Some(Event::Boolean(true)),
245 (0x0, 0x0f) => return Err(self.with_pos(ErrorKind::FillObjectUnimplemented)),
246 (0x1, 0) => Some(Event::Integer(self.read_u8()?.into())),
247 (0x1, 1) => Some(Event::Integer(self.read_be_u16()?.into())),
248 (0x1, 2) => Some(Event::Integer(self.read_be_u32()?.into())),
249 (0x1, 3) => Some(Event::Integer(self.read_be_i64()?.into())),
250 (0x1, 4) => {
251 let value = self.read_be_i128()?;
252 if value < 0 || value > i128::from(u64::max_value()) {
253 return Err(self.with_pos(ErrorKind::IntegerOutOfRange));
254 }
255 Some(Event::Integer((value as u64).into()))
256 }
257 (0x1, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), (0x2, 2) => Some(Event::Real(f32::from_bits(self.read_be_u32()?).into())),
259 (0x2, 3) => Some(Event::Real(f64::from_bits(self.read_be_u64()?))),
260 (0x2, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), (0x3, 3) => {
262 let secs = f64::from_bits(self.read_be_u64()?);
264 let date = Date::from_seconds_since_plist_epoch(secs)
265 .map_err(|InfiniteOrNanDate| self.with_pos(ErrorKind::InfiniteOrNanDate))?;
266 Some(Event::Date(date))
267 }
268 (0x4, n) => {
269 let len = self.read_object_len(n)?;
271 Some(Event::Data(self.read_data(len)?.into()))
272 }
273 (0x5, n) => {
274 let len = self.read_object_len(n)?;
276 let raw = self.read_data(len)?;
277 let string = String::from_utf8(raw)
278 .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String))?;
279 Some(Event::String(string.into()))
280 }
281 (0x6, n) => {
282 let len_utf16_codepoints = self.read_object_len(n)?;
284 let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?;
285
286 for _ in 0..len_utf16_codepoints {
287 raw_utf16.push(self.read_be_u16()?);
288 }
289
290 let string = String::from_utf16(&raw_utf16)
291 .map_err(|_| self.with_pos(ErrorKind::InvalidUtf16String))?;
292 Some(Event::String(string.into()))
293 }
294 (0x8, n) if n < 8 => {
295 let mut buf = [0; 8];
297 let len_bytes = n as usize + 1;
299 self.reader.read_all(&mut buf[8 - len_bytes..])?;
302 let value = u64::from_be_bytes(buf);
303
304 Some(Event::Uid(Uid::new(value)))
305 }
306 (0xa, n) => {
307 let len = self.read_object_len(n)?;
309 let mut child_object_refs = self.read_refs(len)?;
310 child_object_refs.reverse();
312
313 self.push_stack_item_and_check_for_recursion(StackItem {
314 object_ref,
315 ty: StackType::Array,
316 child_object_refs,
317 })?;
318
319 Some(Event::StartArray(Some(len)))
320 }
321 (0xd, n) => {
322 let len = self.read_object_len(n)?;
324 let key_refs = self.read_refs(len)?;
325 let value_refs = self.read_refs(len)?;
326
327 let keys_and_values_len = len
328 .checked_mul(2)
329 .ok_or_else(|| self.with_pos(ErrorKind::ObjectTooLarge))?;
330 let mut child_object_refs =
331 self.allocate_vec(keys_and_values_len, self.ref_size as usize)?;
332 let len = key_refs.len();
333 for i in 1..=len {
334 child_object_refs.push(value_refs[len - i]);
336 child_object_refs.push(key_refs[len - i]);
337 }
338
339 self.push_stack_item_and_check_for_recursion(StackItem {
340 object_ref,
341 ty: StackType::Dict,
342 child_object_refs,
343 })?;
344
345 Some(Event::StartDictionary(Some(len as u64)))
346 }
347 (_, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))),
348 };
349
350 Ok(result)
351 }
352
353 fn read_u8(&mut self) -> Result<u8, Error> {
354 let mut buf = [0; 1];
355 self.reader.read_all(&mut buf)?;
356 Ok(buf[0])
357 }
358
359 fn read_be_u16(&mut self) -> Result<u16, Error> {
360 let mut buf = [0; 2];
361 self.reader.read_all(&mut buf)?;
362 Ok(u16::from_be_bytes(buf))
363 }
364
365 fn read_be_u32(&mut self) -> Result<u32, Error> {
366 let mut buf = [0; 4];
367 self.reader.read_all(&mut buf)?;
368 Ok(u32::from_be_bytes(buf))
369 }
370
371 fn read_be_u64(&mut self) -> Result<u64, Error> {
372 let mut buf = [0; 8];
373 self.reader.read_all(&mut buf)?;
374 Ok(u64::from_be_bytes(buf))
375 }
376
377 fn read_be_i64(&mut self) -> Result<i64, Error> {
378 let mut buf = [0; 8];
379 self.reader.read_all(&mut buf)?;
380 Ok(i64::from_be_bytes(buf))
381 }
382
383 fn read_be_i128(&mut self) -> Result<i128, Error> {
384 let mut buf = [0; 16];
385 self.reader.read_all(&mut buf)?;
386 Ok(i128::from_be_bytes(buf))
387 }
388
389 fn with_pos(&self, kind: ErrorKind) -> Error {
390 kind.with_byte_offset(self.reader.pos)
391 }
392}
393
394impl<R: Read + Seek> Iterator for BinaryReader<R> {
395 type Item = Result<OwnedEvent, Error>;
396
397 fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
398 match self.read_next() {
399 Ok(Some(event)) => Some(Ok(event)),
400 Err(err) => {
401 self.stack.clear();
403 Some(Err(err))
404 }
405 Ok(None) => None,
406 }
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use std::fs::File;
413
414 use super::*;
415
416 #[test]
417 fn streaming_parser() {
418 use crate::stream::Event::*;
419
420 let reader = File::open("./tests/data/binary.plist").unwrap();
421 let streaming_parser = BinaryReader::new(reader);
422 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
423
424 let comparison = &[
425 StartDictionary(Some(13)),
426 String("Author".into()),
427 String("William Shakespeare".into()),
428 String("Birthdate".into()),
429 Date(super::Date::from_xml_format("1981-05-16T11:32:06Z").unwrap()),
430 String("EmptyArray".into()),
431 StartArray(Some(0)),
432 EndCollection,
433 String("IsNotFalse".into()),
434 Boolean(false),
435 String("SmallestNumber".into()),
436 Integer((-9223372036854775808i64).into()),
437 String("EmptyDictionary".into()),
438 StartDictionary(Some(0)),
439 EndCollection,
440 String("Height".into()),
441 Real(1.6),
442 String("Lines".into()),
443 StartArray(Some(2)),
444 String("It is a tale told by an idiot, ".into()),
445 String("Full of sound and fury, signifying nothing.".into()),
446 EndCollection,
447 String("Death".into()),
448 Integer(1564.into()),
449 String("Blank".into()),
450 String("".into()),
451 String("BiggestNumber".into()),
452 Integer(18446744073709551615u64.into()),
453 String("IsTrue".into()),
454 Boolean(true),
455 String("Data".into()),
456 Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
457 EndCollection,
458 ];
459
460 assert_eq!(events, &comparison[..]);
461 }
462
463 #[test]
464 fn utf16_plist() {
465 let reader = File::open("./tests/data/utf16_bplist.plist").unwrap();
466 let streaming_parser = BinaryReader::new(reader);
467 let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
468
469 assert_eq!(events[2], Event::String("\u{2605} or better".into()));
470
471 let poem = if let Event::String(ref mut poem) = events[4] {
472 poem
473 } else {
474 panic!("not a string")
475 };
476 assert_eq!(poem.len(), 643);
477 assert_eq!(poem.to_mut().pop().unwrap(), '\u{2605}');
478 }
479
480 #[test]
481 fn nskeyedarchiver_plist() {
482 let reader = File::open("./tests/data/binary_NSKeyedArchiver.plist").unwrap();
483 let streaming_parser = BinaryReader::new(reader);
484 let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect();
485
486 assert_eq!(events[10], Event::Uid(Uid::new(4)));
487 assert_eq!(events[12], Event::Uid(Uid::new(2)));
488 assert_eq!(events[18], Event::Uid(Uid::new(3)));
489 assert_eq!(events[46], Event::Uid(Uid::new(1)));
490 }
491}