plist/stream/
xml_reader.rs

1use base64::{engine::general_purpose::STANDARD as base64_standard, Engine};
2use quick_xml::{events::Event as XmlEvent, Error as XmlReaderError, Reader as EventReader};
3use std::io::{self, BufRead};
4
5use crate::{
6    error::{Error, ErrorKind, FilePosition},
7    stream::{Event, OwnedEvent},
8    Date, Integer,
9};
10
11#[derive(Clone, PartialEq, Eq)]
12struct ElmName(Box<[u8]>);
13
14impl From<&[u8]> for ElmName {
15    fn from(bytes: &[u8]) -> Self {
16        ElmName(Box::from(bytes))
17    }
18}
19
20impl AsRef<[u8]> for ElmName {
21    fn as_ref(&self) -> &[u8] {
22        &self.0
23    }
24}
25
26pub struct XmlReader<R: BufRead> {
27    buffer: Vec<u8>,
28    started: bool,
29    finished: bool,
30    state: ReaderState<R>,
31}
32
33struct ReaderState<R: BufRead>(EventReader<R>);
34
35enum ReadResult {
36    XmlDecl,
37    Event(OwnedEvent),
38    Eof,
39}
40
41impl<R: BufRead> XmlReader<R> {
42    pub fn new(reader: R) -> XmlReader<R> {
43        let mut xml_reader = EventReader::from_reader(reader);
44        let config = xml_reader.config_mut();
45        config.trim_text(false);
46        config.check_end_names = true;
47        config.expand_empty_elements = true;
48
49        XmlReader {
50            buffer: Vec::new(),
51            started: false,
52            finished: false,
53            state: ReaderState(xml_reader),
54        }
55    }
56
57    pub fn into_inner(self) -> R {
58        self.state.0.into_inner()
59    }
60
61    pub(crate) fn xml_doc_started(&self) -> bool {
62        self.started
63    }
64}
65
66impl From<XmlReaderError> for ErrorKind {
67    fn from(err: XmlReaderError) -> Self {
68        match err {
69            XmlReaderError::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => {
70                ErrorKind::UnexpectedEof
71            }
72            XmlReaderError::Io(err) => match std::sync::Arc::try_unwrap(err) {
73                Ok(err) => ErrorKind::Io(err),
74                Err(err) => ErrorKind::Io(std::io::Error::from(err.kind())),
75            },
76            XmlReaderError::Syntax(_) => ErrorKind::UnexpectedEof,
77            XmlReaderError::IllFormed(_) => ErrorKind::InvalidXmlSyntax,
78            XmlReaderError::NonDecodable(_) => ErrorKind::InvalidXmlUtf8,
79            _ => ErrorKind::InvalidXmlSyntax,
80        }
81    }
82}
83
84impl<R: BufRead> Iterator for XmlReader<R> {
85    type Item = Result<OwnedEvent, Error>;
86
87    fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
88        if self.finished {
89            return None;
90        }
91
92        loop {
93            match self.state.read_next(&mut self.buffer) {
94                Ok(ReadResult::XmlDecl) => {
95                    self.started = true;
96                }
97                Ok(ReadResult::Event(event)) => {
98                    self.started = true;
99                    return Some(Ok(event));
100                }
101                Ok(ReadResult::Eof) => {
102                    self.started = true;
103                    self.finished = true;
104                    return None;
105                }
106                Err(err) => {
107                    self.finished = true;
108                    return Some(Err(err));
109                }
110            }
111        }
112    }
113}
114
115impl<R: BufRead> ReaderState<R> {
116    fn xml_reader_pos(&self) -> FilePosition {
117        let pos = self.0.buffer_position();
118        FilePosition(pos as u64)
119    }
120
121    fn with_pos(&self, kind: ErrorKind) -> Error {
122        kind.with_position(self.xml_reader_pos())
123    }
124
125    fn read_xml_event<'buf>(&mut self, buffer: &'buf mut Vec<u8>) -> Result<XmlEvent<'buf>, Error> {
126        let event = self.0.read_event_into(buffer);
127        let pos = self.xml_reader_pos();
128        event.map_err(|err| ErrorKind::from(err).with_position(pos))
129    }
130
131    fn read_content(&mut self, buffer: &mut Vec<u8>) -> Result<String, Error> {
132        loop {
133            match self.read_xml_event(buffer)? {
134                XmlEvent::Text(text) => {
135                    let unescaped = text
136                        .unescape()
137                        .map_err(|err| self.with_pos(ErrorKind::from(err)))?;
138                    return String::from_utf8(unescaped.as_ref().into())
139                        .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String));
140                }
141                XmlEvent::End(_) => {
142                    return Ok("".to_owned());
143                }
144                XmlEvent::Eof => return Err(self.with_pos(ErrorKind::UnclosedXmlElement)),
145                XmlEvent::Start(_) => return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag)),
146                XmlEvent::PI(_)
147                | XmlEvent::Empty(_)
148                | XmlEvent::Comment(_)
149                | XmlEvent::CData(_)
150                | XmlEvent::Decl(_)
151                | XmlEvent::DocType(_) => {
152                    // skip
153                }
154            }
155        }
156    }
157
158    fn read_next(&mut self, buffer: &mut Vec<u8>) -> Result<ReadResult, Error> {
159        loop {
160            match self.read_xml_event(buffer)? {
161                XmlEvent::Decl(_) | XmlEvent::DocType(_) => return Ok(ReadResult::XmlDecl),
162                XmlEvent::Start(name) => {
163                    match name.local_name().as_ref() {
164                        b"plist" => {}
165                        b"array" => return Ok(ReadResult::Event(Event::StartArray(None))),
166                        b"dict" => return Ok(ReadResult::Event(Event::StartDictionary(None))),
167                        b"key" => {
168                            return Ok(ReadResult::Event(Event::String(
169                                self.read_content(buffer)?.into(),
170                            )))
171                        }
172                        b"data" => {
173                            let mut encoded = self.read_content(buffer)?;
174                            // Strip whitespace and line endings from input string
175                            encoded.retain(|c| !c.is_ascii_whitespace());
176                            let data = base64_standard
177                                .decode(&encoded)
178                                .map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?;
179                            return Ok(ReadResult::Event(Event::Data(data.into())));
180                        }
181                        b"date" => {
182                            let s = self.read_content(buffer)?;
183                            let date = Date::from_xml_format(&s)
184                                .map_err(|_| self.with_pos(ErrorKind::InvalidDateString))?;
185                            return Ok(ReadResult::Event(Event::Date(date)));
186                        }
187                        b"integer" => {
188                            let s = self.read_content(buffer)?;
189                            match Integer::from_str(&s) {
190                                Ok(i) => return Ok(ReadResult::Event(Event::Integer(i))),
191                                Err(_) => {
192                                    return Err(self.with_pos(ErrorKind::InvalidIntegerString))
193                                }
194                            }
195                        }
196                        b"real" => {
197                            let s = self.read_content(buffer)?;
198                            match s.parse() {
199                                Ok(f) => return Ok(ReadResult::Event(Event::Real(f))),
200                                Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)),
201                            }
202                        }
203                        b"string" => {
204                            return Ok(ReadResult::Event(Event::String(
205                                self.read_content(buffer)?.into(),
206                            )))
207                        }
208                        b"true" => return Ok(ReadResult::Event(Event::Boolean(true))),
209                        b"false" => return Ok(ReadResult::Event(Event::Boolean(false))),
210                        _ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)),
211                    }
212                }
213                XmlEvent::End(name) => match name.local_name().as_ref() {
214                    b"array" | b"dict" => return Ok(ReadResult::Event(Event::EndCollection)),
215                    _ => (),
216                },
217                XmlEvent::Eof => return Ok(ReadResult::Eof),
218                XmlEvent::Text(text) => {
219                    let unescaped = text
220                        .unescape()
221                        .map_err(|err| self.with_pos(ErrorKind::from(err)))?;
222
223                    if !unescaped.chars().all(char::is_whitespace) {
224                        return Err(
225                            self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement)
226                        );
227                    }
228                }
229                XmlEvent::PI(_)
230                | XmlEvent::CData(_)
231                | XmlEvent::Comment(_)
232                | XmlEvent::Empty(_) => {
233                    // skip
234                }
235            }
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use std::{fs::File, io::BufReader};
243
244    use super::*;
245    use crate::stream::Event::*;
246
247    #[test]
248    fn streaming_parser() {
249        let reader = File::open("./tests/data/xml.plist").unwrap();
250        let streaming_parser = XmlReader::new(BufReader::new(reader));
251        let events: Result<Vec<_>, _> = streaming_parser.collect();
252
253        let comparison = &[
254            StartDictionary(None),
255            String("Author".into()),
256            String("William Shakespeare".into()),
257            String("Lines".into()),
258            StartArray(None),
259            String("It is a tale told by an idiot,     ".into()),
260            String("Full of sound and fury, signifying nothing.".into()),
261            EndCollection,
262            String("Death".into()),
263            Integer(1564.into()),
264            String("Height".into()),
265            Real(1.60),
266            String("Data".into()),
267            Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
268            String("Birthdate".into()),
269            Date(super::Date::from_xml_format("1981-05-16T11:32:06Z").unwrap()),
270            String("Blank".into()),
271            String("".into()),
272            String("BiggestNumber".into()),
273            Integer(18446744073709551615u64.into()),
274            String("SmallestNumber".into()),
275            Integer((-9223372036854775808i64).into()),
276            String("HexademicalNumber".into()),
277            Integer(0xdead_beef_u64.into()),
278            String("IsTrue".into()),
279            Boolean(true),
280            String("IsNotFalse".into()),
281            Boolean(false),
282            EndCollection,
283        ];
284
285        assert_eq!(events.unwrap(), comparison);
286    }
287
288    #[test]
289    fn bad_data() {
290        let reader = File::open("./tests/data/xml_error.plist").unwrap();
291        let streaming_parser = XmlReader::new(BufReader::new(reader));
292        let events: Vec<_> = streaming_parser.collect();
293
294        assert!(events.last().unwrap().is_err());
295    }
296}