use base64::{engine::general_purpose::STANDARD as base64_standard, Engine};
use quick_xml::{events::Event as XmlEvent, Error as XmlReaderError, Reader as EventReader};
use std::io::{self, BufRead};
use crate::{
error::{Error, ErrorKind, FilePosition},
stream::{Event, OwnedEvent},
Date, Integer,
};
#[derive(Clone, PartialEq, Eq)]
struct ElmName(Box<[u8]>);
impl From<&[u8]> for ElmName {
fn from(bytes: &[u8]) -> Self {
ElmName(Box::from(bytes))
}
}
impl AsRef<[u8]> for ElmName {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
pub struct XmlReader<R: BufRead> {
buffer: Vec<u8>,
started: bool,
finished: bool,
state: ReaderState<R>,
}
struct ReaderState<R: BufRead>(EventReader<R>);
enum ReadResult {
XmlDecl,
Event(OwnedEvent),
Eof,
}
impl<R: BufRead> XmlReader<R> {
pub fn new(reader: R) -> XmlReader<R> {
let mut xml_reader = EventReader::from_reader(reader);
let config = xml_reader.config_mut();
config.trim_text(false);
config.check_end_names = true;
config.expand_empty_elements = true;
XmlReader {
buffer: Vec::new(),
started: false,
finished: false,
state: ReaderState(xml_reader),
}
}
pub fn into_inner(self) -> R {
self.state.0.into_inner()
}
pub(crate) fn xml_doc_started(&self) -> bool {
self.started
}
}
impl From<XmlReaderError> for ErrorKind {
fn from(err: XmlReaderError) -> Self {
match err {
XmlReaderError::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => {
ErrorKind::UnexpectedEof
}
XmlReaderError::Io(err) => match std::sync::Arc::try_unwrap(err) {
Ok(err) => ErrorKind::Io(err),
Err(err) => ErrorKind::Io(std::io::Error::from(err.kind())),
},
XmlReaderError::Syntax(_) => ErrorKind::UnexpectedEof,
XmlReaderError::IllFormed(_) => ErrorKind::InvalidXmlSyntax,
XmlReaderError::NonDecodable(_) => ErrorKind::InvalidXmlUtf8,
_ => ErrorKind::InvalidXmlSyntax,
}
}
}
impl<R: BufRead> Iterator for XmlReader<R> {
type Item = Result<OwnedEvent, Error>;
fn next(&mut self) -> Option<Result<OwnedEvent, Error>> {
if self.finished {
return None;
}
loop {
match self.state.read_next(&mut self.buffer) {
Ok(ReadResult::XmlDecl) => {
self.started = true;
}
Ok(ReadResult::Event(event)) => {
self.started = true;
return Some(Ok(event));
}
Ok(ReadResult::Eof) => {
self.started = true;
self.finished = true;
return None;
}
Err(err) => {
self.finished = true;
return Some(Err(err));
}
}
}
}
}
impl<R: BufRead> ReaderState<R> {
fn xml_reader_pos(&self) -> FilePosition {
let pos = self.0.buffer_position();
FilePosition(pos as u64)
}
fn with_pos(&self, kind: ErrorKind) -> Error {
kind.with_position(self.xml_reader_pos())
}
fn read_xml_event<'buf>(&mut self, buffer: &'buf mut Vec<u8>) -> Result<XmlEvent<'buf>, Error> {
let event = self.0.read_event_into(buffer);
let pos = self.xml_reader_pos();
event.map_err(|err| ErrorKind::from(err).with_position(pos))
}
fn read_content(&mut self, buffer: &mut Vec<u8>) -> Result<String, Error> {
loop {
match self.read_xml_event(buffer)? {
XmlEvent::Text(text) => {
let unescaped = text
.unescape()
.map_err(|err| self.with_pos(ErrorKind::from(err)))?;
return String::from_utf8(unescaped.as_ref().into())
.map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String));
}
XmlEvent::End(_) => {
return Ok("".to_owned());
}
XmlEvent::Eof => return Err(self.with_pos(ErrorKind::UnclosedXmlElement)),
XmlEvent::Start(_) => return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag)),
XmlEvent::PI(_)
| XmlEvent::Empty(_)
| XmlEvent::Comment(_)
| XmlEvent::CData(_)
| XmlEvent::Decl(_)
| XmlEvent::DocType(_) => {
}
}
}
}
fn read_next(&mut self, buffer: &mut Vec<u8>) -> Result<ReadResult, Error> {
loop {
match self.read_xml_event(buffer)? {
XmlEvent::Decl(_) | XmlEvent::DocType(_) => return Ok(ReadResult::XmlDecl),
XmlEvent::Start(name) => {
match name.local_name().as_ref() {
b"plist" => {}
b"array" => return Ok(ReadResult::Event(Event::StartArray(None))),
b"dict" => return Ok(ReadResult::Event(Event::StartDictionary(None))),
b"key" => {
return Ok(ReadResult::Event(Event::String(
self.read_content(buffer)?.into(),
)))
}
b"data" => {
let mut encoded = self.read_content(buffer)?;
encoded.retain(|c| !c.is_ascii_whitespace());
let data = base64_standard
.decode(&encoded)
.map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?;
return Ok(ReadResult::Event(Event::Data(data.into())));
}
b"date" => {
let s = self.read_content(buffer)?;
let date = Date::from_xml_format(&s)
.map_err(|_| self.with_pos(ErrorKind::InvalidDateString))?;
return Ok(ReadResult::Event(Event::Date(date)));
}
b"integer" => {
let s = self.read_content(buffer)?;
match Integer::from_str(&s) {
Ok(i) => return Ok(ReadResult::Event(Event::Integer(i))),
Err(_) => {
return Err(self.with_pos(ErrorKind::InvalidIntegerString))
}
}
}
b"real" => {
let s = self.read_content(buffer)?;
match s.parse() {
Ok(f) => return Ok(ReadResult::Event(Event::Real(f))),
Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)),
}
}
b"string" => {
return Ok(ReadResult::Event(Event::String(
self.read_content(buffer)?.into(),
)))
}
b"true" => return Ok(ReadResult::Event(Event::Boolean(true))),
b"false" => return Ok(ReadResult::Event(Event::Boolean(false))),
_ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)),
}
}
XmlEvent::End(name) => match name.local_name().as_ref() {
b"array" | b"dict" => return Ok(ReadResult::Event(Event::EndCollection)),
_ => (),
},
XmlEvent::Eof => return Ok(ReadResult::Eof),
XmlEvent::Text(text) => {
let unescaped = text
.unescape()
.map_err(|err| self.with_pos(ErrorKind::from(err)))?;
if !unescaped.chars().all(char::is_whitespace) {
return Err(
self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement)
);
}
}
XmlEvent::PI(_)
| XmlEvent::CData(_)
| XmlEvent::Comment(_)
| XmlEvent::Empty(_) => {
}
}
}
}
}
#[cfg(test)]
mod tests {
use std::{fs::File, io::BufReader};
use super::*;
use crate::stream::Event::*;
#[test]
fn streaming_parser() {
let reader = File::open("./tests/data/xml.plist").unwrap();
let streaming_parser = XmlReader::new(BufReader::new(reader));
let events: Result<Vec<_>, _> = streaming_parser.collect();
let comparison = &[
StartDictionary(None),
String("Author".into()),
String("William Shakespeare".into()),
String("Lines".into()),
StartArray(None),
String("It is a tale told by an idiot, ".into()),
String("Full of sound and fury, signifying nothing.".into()),
EndCollection,
String("Death".into()),
Integer(1564.into()),
String("Height".into()),
Real(1.60),
String("Data".into()),
Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()),
String("Birthdate".into()),
Date(super::Date::from_xml_format("1981-05-16T11:32:06Z").unwrap()),
String("Blank".into()),
String("".into()),
String("BiggestNumber".into()),
Integer(18446744073709551615u64.into()),
String("SmallestNumber".into()),
Integer((-9223372036854775808i64).into()),
String("HexademicalNumber".into()),
Integer(0xdead_beef_u64.into()),
String("IsTrue".into()),
Boolean(true),
String("IsNotFalse".into()),
Boolean(false),
EndCollection,
];
assert_eq!(events.unwrap(), comparison);
}
#[test]
fn bad_data() {
let reader = File::open("./tests/data/xml_error.plist").unwrap();
let streaming_parser = XmlReader::new(BufReader::new(reader));
let events: Vec<_> = streaming_parser.collect();
assert!(events.last().unwrap().is_err());
}
}