1use base64::{engine::general_purpose::STANDARD as base64_standard, Engine};
2use quick_xml::{events::Event as XmlEvent, Error as XmlReaderError, Reader as EventReader};
3use std::io::{self, BufRead};
4
5use crate::{
6 error::{Error, ErrorKind, FilePosition},
7 stream::{Event, OwnedEvent},
8 Date, Integer,
9};
10
11#[derive(Clone, PartialEq, Eq)]
12struct ElmName(Box<[u8]>);
13
14impl From<&[u8]> for ElmName {
15 fn from(bytes: &[u8]) -> Self {
16 ElmName(Box::from(bytes))
17 }
18}
19
20impl AsRef<[u8]> for ElmName {
21 fn as_ref(&self) -> &[u8] {
22 &self.0
23 }
24}
25
26pub struct XmlReader<R: BufRead> {
27 buffer: Vec<u8>,
28 started: bool,
29 finished: bool,
30 state: ReaderState<R>,
31}
32
33struct ReaderState<R: BufRead>(EventReader<R>);
34
35enum ReadResult {
36 XmlDecl,
37 Event(OwnedEvent),
38 Eof,
39}
40
41impl<R: BufRead> XmlReader<R> {
42 pub fn new(reader: R) -> XmlReader<R> {
43 let mut xml_reader = EventReader::from_reader(reader);
44 let config = xml_reader.config_mut();
45 config.trim_text(false);
46 config.check_end_names = true;
47 config.expand_empty_elements = true;
48
49 XmlReader {
50 buffer: Vec::new(),
51 started: false,
52 finished: false,
53 state: ReaderState(xml_reader),
54 }
55 }
56
57 pub fn into_inner(self) -> R {
58 self.state.0.into_inner()
59 }
60
61 pub(crate) fn xml_doc_started(&self) -> bool {
62 self.started
63 }
64}
65
66impl From<XmlReaderError> for ErrorKind {
67 fn from(err: XmlReaderError) -> Self {
68 match err {
69 XmlReaderError::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => {
70 ErrorKind::UnexpectedEof
71 }
72 XmlReaderError::Io(err) => match std::sync::Arc::try_unwrap(err) {
73 Ok(err) => ErrorKind::Io(err),
74 Err(err) => ErrorKind::Io(std::io::Error::from(err.kind())),
75 },
76 XmlReaderError::Syntax(_) => ErrorKind::UnexpectedEof,
77 XmlReaderError::IllFormed(_) => ErrorKind::InvalidXmlSyntax,
78 XmlReaderError::NonDecodable(_) => ErrorKind::InvalidXmlUtf8,
79 _ => ErrorKind::InvalidXmlSyntax,
80 }
81 }
82}
83
84impl<R: BufRead> Iterator for XmlReader<R> {
85 type Item = Result<OwnedEvent, Error>;
86
87 fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
88 if self.finished {
89 return None;
90 }
91
92 loop {
93 match self.state.read_next(&mut self.buffer) {
94 Ok(ReadResult::XmlDecl) => {
95 self.started = true;
96 }
97 Ok(ReadResult::Event(event)) => {
98 self.started = true;
99 return Some(Ok(event));
100 }
101 Ok(ReadResult::Eof) => {
102 self.started = true;
103 self.finished = true;
104 return None;
105 }
106 Err(err) => {
107 self.finished = true;
108 return Some(Err(err));
109 }
110 }
111 }
112 }
113}
114
115impl<R: BufRead> ReaderState<R> {
116 fn xml_reader_pos(&self) -> FilePosition {
117 let pos = self.0.buffer_position();
118 FilePosition(pos as u64)
119 }
120
121 fn with_pos(&self, kind: ErrorKind) -> Error {
122 kind.with_position(self.xml_reader_pos())
123 }
124
125 fn read_xml_event<'buf>(&mut self, buffer: &'buf mut Vec<u8>) -> Result<XmlEvent<'buf>, Error> {
126 let event = self.0.read_event_into(buffer);
127 let pos = self.xml_reader_pos();
128 event.map_err(|err| ErrorKind::from(err).with_position(pos))
129 }
130
131 fn read_content(&mut self, buffer: &mut Vec<u8>) -> Result<String, Error> {
132 loop {
133 match self.read_xml_event(buffer)? {
134 XmlEvent::Text(text) => {
135 let unescaped = text
136 .unescape()
137 .map_err(|err| self.with_pos(ErrorKind::from(err)))?;
138 return String::from_utf8(unescaped.as_ref().into())
139 .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String));
140 }
141 XmlEvent::End(_) => {
142 return Ok("".to_owned());
143 }
144 XmlEvent::Eof => return Err(self.with_pos(ErrorKind::UnclosedXmlElement)),
145 XmlEvent::Start(_) => return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag)),
146 XmlEvent::PI(_)
147 | XmlEvent::Empty(_)
148 | XmlEvent::Comment(_)
149 | XmlEvent::CData(_)
150 | XmlEvent::Decl(_)
151 | XmlEvent::DocType(_) => {
152 }
154 }
155 }
156 }
157
158 fn read_next(&mut self, buffer: &mut Vec<u8>) -> Result<ReadResult, Error> {
159 loop {
160 match self.read_xml_event(buffer)? {
161 XmlEvent::Decl(_) | XmlEvent::DocType(_) => return Ok(ReadResult::XmlDecl),
162 XmlEvent::Start(name) => {
163 match name.local_name().as_ref() {
164 b"plist" => {}
165 b"array" => return Ok(ReadResult::Event(Event::StartArray(None))),
166 b"dict" => return Ok(ReadResult::Event(Event::StartDictionary(None))),
167 b"key" => {
168 return Ok(ReadResult::Event(Event::String(
169 self.read_content(buffer)?.into(),
170 )))
171 }
172 b"data" => {
173 let mut encoded = self.read_content(buffer)?;
174 encoded.retain(|c| !c.is_ascii_whitespace());
176 let data = base64_standard
177 .decode(&encoded)
178 .map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?;
179 return Ok(ReadResult::Event(Event::Data(data.into())));
180 }
181 b"date" => {
182 let s = self.read_content(buffer)?;
183 let date = Date::from_xml_format(&s)
184 .map_err(|_| self.with_pos(ErrorKind::InvalidDateString))?;
185 return Ok(ReadResult::Event(Event::Date(date)));
186 }
187 b"integer" => {
188 let s = self.read_content(buffer)?;
189 match Integer::from_str(&s) {
190 Ok(i) => return Ok(ReadResult::Event(Event::Integer(i))),
191 Err(_) => {
192 return Err(self.with_pos(ErrorKind::InvalidIntegerString))
193 }
194 }
195 }
196 b"real" => {
197 let s = self.read_content(buffer)?;
198 match s.parse() {
199 Ok(f) => return Ok(ReadResult::Event(Event::Real(f))),
200 Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)),
201 }
202 }
203 b"string" => {
204 return Ok(ReadResult::Event(Event::String(
205 self.read_content(buffer)?.into(),
206 )))
207 }
208 b"true" => return Ok(ReadResult::Event(Event::Boolean(true))),
209 b"false" => return Ok(ReadResult::Event(Event::Boolean(false))),
210 _ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)),
211 }
212 }
213 XmlEvent::End(name) => match name.local_name().as_ref() {
214 b"array" | b"dict" => return Ok(ReadResult::Event(Event::EndCollection)),
215 _ => (),
216 },
217 XmlEvent::Eof => return Ok(ReadResult::Eof),
218 XmlEvent::Text(text) => {
219 let unescaped = text
220 .unescape()
221 .map_err(|err| self.with_pos(ErrorKind::from(err)))?;
222
223 if !unescaped.chars().all(char::is_whitespace) {
224 return Err(
225 self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement)
226 );
227 }
228 }
229 XmlEvent::PI(_)
230 | XmlEvent::CData(_)
231 | XmlEvent::Comment(_)
232 | XmlEvent::Empty(_) => {
233 }
235 }
236 }
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use std::{fs::File, io::BufReader};
243
244 use super::*;
245 use crate::stream::Event::*;
246
247 #[test]
248 fn streaming_parser() {
249 let reader = File::open("./tests/data/xml.plist").unwrap();
250 let streaming_parser = XmlReader::new(BufReader::new(reader));
251 let events: Result<Vec<_>, _> = streaming_parser.collect();
252
253 let comparison = &[
254 StartDictionary(None),
255 String("Author".into()),
256 String("William Shakespeare".into()),
257 String("Lines".into()),
258 StartArray(None),
259 String("It is a tale told by an idiot, ".into()),
260 String("Full of sound and fury, signifying nothing.".into()),
261 EndCollection,
262 String("Death".into()),
263 Integer(1564.into()),
264 String("Height".into()),
265 Real(1.60),
266 String("Data".into()),
267 Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
268 String("Birthdate".into()),
269 Date(super::Date::from_xml_format("1981-05-16T11:32:06Z").unwrap()),
270 String("Blank".into()),
271 String("".into()),
272 String("BiggestNumber".into()),
273 Integer(18446744073709551615u64.into()),
274 String("SmallestNumber".into()),
275 Integer((-9223372036854775808i64).into()),
276 String("HexademicalNumber".into()),
277 Integer(0xdead_beef_u64.into()),
278 String("IsTrue".into()),
279 Boolean(true),
280 String("IsNotFalse".into()),
281 Boolean(false),
282 EndCollection,
283 ];
284
285 assert_eq!(events.unwrap(), comparison);
286 }
287
288 #[test]
289 fn bad_data() {
290 let reader = File::open("./tests/data/xml_error.plist").unwrap();
291 let streaming_parser = XmlReader::new(BufReader::new(reader));
292 let events: Vec<_> = streaming_parser.collect();
293
294 assert!(events.last().unwrap().is_err());
295 }
296}