quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::ops::Deref;
45use std::str::from_utf8;
46
47use crate::encoding::Decoder;
48use crate::errors::{Error, IllFormedError, Result};
49use crate::escape::{
50    escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
51};
52use crate::name::{LocalName, QName};
53use crate::reader::is_whitespace;
54use crate::utils::write_cow_string;
55#[cfg(feature = "serialize")]
56use crate::utils::CowRef;
57use attributes::{Attribute, Attributes};
58use std::mem::replace;
59
60/// Opening tag data (`Event::Start`), with optional attributes.
61///
62/// `<name attr="value">`.
63///
64/// The name can be accessed using the [`name`] or [`local_name`] methods.
65/// An iterator over the attributes is returned by the [`attributes`] method.
66///
67/// [`name`]: Self::name
68/// [`local_name`]: Self::local_name
69/// [`attributes`]: Self::attributes
70#[derive(Clone, Eq, PartialEq)]
71pub struct BytesStart<'a> {
72    /// content of the element, before any utf8 conversion
73    pub(crate) buf: Cow<'a, [u8]>,
74    /// end of the element name, the name starts at that the start of `buf`
75    pub(crate) name_len: usize,
76}
77
78impl<'a> BytesStart<'a> {
79    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
80    #[inline]
81    pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
82        BytesStart {
83            buf: Cow::Borrowed(content),
84            name_len,
85        }
86    }
87
88    /// Creates a new `BytesStart` from the given name.
89    ///
90    /// # Warning
91    ///
92    /// `name` must be a valid name.
93    #[inline]
94    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
95        let buf = str_cow_to_bytes(name);
96        BytesStart {
97            name_len: buf.len(),
98            buf,
99        }
100    }
101
102    /// Creates a new `BytesStart` from the given content (name + attributes).
103    ///
104    /// # Warning
105    ///
106    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
107    /// must be correctly-formed attributes. Neither are checked, it is possible
108    /// to generate invalid XML if `content` or `name_len` are incorrect.
109    #[inline]
110    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
111        BytesStart {
112            buf: str_cow_to_bytes(content),
113            name_len,
114        }
115    }
116
117    /// Converts the event into an owned event.
118    pub fn into_owned(self) -> BytesStart<'static> {
119        BytesStart {
120            buf: Cow::Owned(self.buf.into_owned()),
121            name_len: self.name_len,
122        }
123    }
124
125    /// Converts the event into an owned event without taking ownership of Event
126    pub fn to_owned(&self) -> BytesStart<'static> {
127        BytesStart {
128            buf: Cow::Owned(self.buf.clone().into_owned()),
129            name_len: self.name_len,
130        }
131    }
132
133    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
134    ///
135    /// # Example
136    ///
137    /// ```
138    /// use quick_xml::events::{BytesStart, Event};
139    /// # use quick_xml::writer::Writer;
140    /// # use quick_xml::Error;
141    ///
142    /// struct SomeStruct<'a> {
143    ///     attrs: BytesStart<'a>,
144    ///     // ...
145    /// }
146    /// # impl<'a> SomeStruct<'a> {
147    /// # fn example(&self) -> Result<(), Error> {
148    /// # let mut writer = Writer::new(Vec::new());
149    ///
150    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
151    /// // ...
152    /// writer.write_event(Event::End(self.attrs.to_end()))?;
153    /// # Ok(())
154    /// # }}
155    /// ```
156    ///
157    /// [`to_end`]: Self::to_end
158    pub fn borrow(&self) -> BytesStart {
159        BytesStart {
160            buf: Cow::Borrowed(&self.buf),
161            name_len: self.name_len,
162        }
163    }
164
165    /// Creates new paired close tag
166    pub fn to_end(&self) -> BytesEnd {
167        BytesEnd::wrap(self.name().into_inner().into())
168    }
169
170    /// Gets the undecoded raw tag name, as present in the input stream.
171    #[inline]
172    pub fn name(&self) -> QName {
173        QName(&self.buf[..self.name_len])
174    }
175
176    /// Gets the undecoded raw local tag name (excluding namespace) as present
177    /// in the input stream.
178    ///
179    /// All content up to and including the first `:` character is removed from the tag name.
180    #[inline]
181    pub fn local_name(&self) -> LocalName {
182        self.name().into()
183    }
184
185    /// Edit the name of the BytesStart in-place
186    ///
187    /// # Warning
188    ///
189    /// `name` must be a valid name.
190    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
191        let bytes = self.buf.to_mut();
192        bytes.splice(..self.name_len, name.iter().cloned());
193        self.name_len = name.len();
194        self
195    }
196
197    /// Gets the undecoded raw tag name, as present in the input stream, which
198    /// is borrowed either to the input, or to the event.
199    ///
200    /// # Lifetimes
201    ///
202    /// - `'a`: Lifetime of the input data from which this event is borrow
203    /// - `'e`: Lifetime of the concrete event instance
204    // TODO: We should made this is a part of public API, but with safe wrapped for a name
205    #[cfg(feature = "serialize")]
206    pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
207        match self.buf {
208            Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
209            Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
210        }
211    }
212}
213
214/// Attribute-related methods
215impl<'a> BytesStart<'a> {
216    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
217    ///
218    /// The yielded items must be convertible to [`Attribute`] using `Into`.
219    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
220    where
221        I: IntoIterator,
222        I::Item: Into<Attribute<'b>>,
223    {
224        self.extend_attributes(attributes);
225        self
226    }
227
228    /// Add additional attributes to this tag using an iterator.
229    ///
230    /// The yielded items must be convertible to [`Attribute`] using `Into`.
231    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
232    where
233        I: IntoIterator,
234        I::Item: Into<Attribute<'b>>,
235    {
236        for attr in attributes {
237            self.push_attribute(attr);
238        }
239        self
240    }
241
242    /// Adds an attribute to this element.
243    pub fn push_attribute<'b, A>(&mut self, attr: A)
244    where
245        A: Into<Attribute<'b>>,
246    {
247        self.buf.to_mut().push(b' ');
248        self.push_attr(attr.into());
249    }
250
251    /// Remove all attributes from the ByteStart
252    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
253        self.buf.to_mut().truncate(self.name_len);
254        self
255    }
256
257    /// Returns an iterator over the attributes of this tag.
258    pub fn attributes(&self) -> Attributes {
259        Attributes::wrap(&self.buf, self.name_len, false)
260    }
261
262    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
263    pub fn html_attributes(&self) -> Attributes {
264        Attributes::wrap(&self.buf, self.name_len, true)
265    }
266
267    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
268    /// including the whitespace after the tag name if there is any.
269    #[inline]
270    pub fn attributes_raw(&self) -> &[u8] {
271        &self.buf[self.name_len..]
272    }
273
274    /// Try to get an attribute
275    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
276        &'a self,
277        attr_name: N,
278    ) -> Result<Option<Attribute<'a>>> {
279        for a in self.attributes().with_checks(false) {
280            let a = a?;
281            if a.key.as_ref() == attr_name.as_ref() {
282                return Ok(Some(a));
283            }
284        }
285        Ok(None)
286    }
287
288    /// Adds an attribute to this element.
289    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
290        let bytes = self.buf.to_mut();
291        bytes.extend_from_slice(attr.key.as_ref());
292        bytes.extend_from_slice(b"=\"");
293        // FIXME: need to escape attribute content
294        bytes.extend_from_slice(attr.value.as_ref());
295        bytes.push(b'"');
296    }
297
298    /// Adds new line in existing element
299    pub(crate) fn push_newline(&mut self) {
300        self.buf.to_mut().push(b'\n');
301    }
302
303    /// Adds indentation bytes in existing element
304    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
305        self.buf.to_mut().extend_from_slice(indent);
306    }
307}
308
309impl<'a> Debug for BytesStart<'a> {
310    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
311        write!(f, "BytesStart {{ buf: ")?;
312        write_cow_string(f, &self.buf)?;
313        write!(f, ", name_len: {} }}", self.name_len)
314    }
315}
316
317impl<'a> Deref for BytesStart<'a> {
318    type Target = [u8];
319
320    fn deref(&self) -> &[u8] {
321        &self.buf
322    }
323}
324
325#[cfg(feature = "arbitrary")]
326impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
327    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
328        let s = <&str>::arbitrary(u)?;
329        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
330            return Err(arbitrary::Error::IncorrectFormat);
331        }
332        let mut result = Self::new(s);
333        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
334        Ok(result)
335    }
336
337    fn size_hint(depth: usize) -> (usize, Option<usize>) {
338        return <&str as arbitrary::Arbitrary>::size_hint(depth);
339    }
340}
341////////////////////////////////////////////////////////////////////////////////////////////////////
342
343/// An XML declaration (`Event::Decl`).
344///
345/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
346#[derive(Clone, Debug, Eq, PartialEq)]
347pub struct BytesDecl<'a> {
348    content: BytesStart<'a>,
349}
350
351impl<'a> BytesDecl<'a> {
352    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
353    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
354    /// attribute.
355    ///
356    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
357    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
358    /// the double quote character is not allowed in any of the attribute values.
359    pub fn new(
360        version: &str,
361        encoding: Option<&str>,
362        standalone: Option<&str>,
363    ) -> BytesDecl<'static> {
364        // Compute length of the buffer based on supplied attributes
365        // ' encoding=""'   => 12
366        let encoding_attr_len = if let Some(xs) = encoding {
367            12 + xs.len()
368        } else {
369            0
370        };
371        // ' standalone=""' => 14
372        let standalone_attr_len = if let Some(xs) = standalone {
373            14 + xs.len()
374        } else {
375            0
376        };
377        // 'xml version=""' => 14
378        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
379
380        buf.push_str("xml version=\"");
381        buf.push_str(version);
382
383        if let Some(encoding_val) = encoding {
384            buf.push_str("\" encoding=\"");
385            buf.push_str(encoding_val);
386        }
387
388        if let Some(standalone_val) = standalone {
389            buf.push_str("\" standalone=\"");
390            buf.push_str(standalone_val);
391        }
392        buf.push('"');
393
394        BytesDecl {
395            content: BytesStart::from_content(buf, 3),
396        }
397    }
398
399    /// Creates a `BytesDecl` from a `BytesStart`
400    pub fn from_start(start: BytesStart<'a>) -> Self {
401        Self { content: start }
402    }
403
404    /// Gets xml version, excluding quotes (`'` or `"`).
405    ///
406    /// According to the [grammar], the version *must* be the first thing in the declaration.
407    /// This method tries to extract the first thing in the declaration and return it.
408    /// In case of multiple attributes value of the first one is returned.
409    ///
410    /// If version is missed in the declaration, or the first thing is not a version,
411    /// [`IllFormedError::MissingDeclVersion`] will be returned.
412    ///
413    /// # Examples
414    ///
415    /// ```
416    /// use quick_xml::errors::{Error, IllFormedError};
417    /// use quick_xml::events::{BytesDecl, BytesStart};
418    ///
419    /// // <?xml version='1.1'?>
420    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
421    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
422    ///
423    /// // <?xml version='1.0' version='1.1'?>
424    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
425    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
426    ///
427    /// // <?xml encoding='utf-8'?>
428    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
429    /// match decl.version() {
430    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
431    ///     _ => assert!(false),
432    /// }
433    ///
434    /// // <?xml encoding='utf-8' version='1.1'?>
435    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
436    /// match decl.version() {
437    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
438    ///     _ => assert!(false),
439    /// }
440    ///
441    /// // <?xml?>
442    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
443    /// match decl.version() {
444    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
445    ///     _ => assert!(false),
446    /// }
447    /// ```
448    ///
449    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
450    pub fn version(&self) -> Result<Cow<[u8]>> {
451        // The version *must* be the first thing in the declaration.
452        match self.content.attributes().with_checks(false).next() {
453            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
454            // first attribute was not "version"
455            Some(Ok(a)) => {
456                let found = from_utf8(a.key.as_ref())?.to_string();
457                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
458                    found,
459                ))))
460            }
461            // error parsing attributes
462            Some(Err(e)) => Err(e.into()),
463            // no attributes
464            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
465        }
466    }
467
468    /// Gets xml encoding, excluding quotes (`'` or `"`).
469    ///
470    /// Although according to the [grammar] encoding must appear before `"standalone"`
471    /// and after `"version"`, this method does not check that. The first occurrence
472    /// of the attribute will be returned even if there are several. Also, method does
473    /// not restrict symbols that can forming the encoding, so the returned encoding
474    /// name may not correspond to the grammar.
475    ///
476    /// # Examples
477    ///
478    /// ```
479    /// use std::borrow::Cow;
480    /// use quick_xml::Error;
481    /// use quick_xml::events::{BytesDecl, BytesStart};
482    ///
483    /// // <?xml version='1.1'?>
484    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
485    /// assert!(decl.encoding().is_none());
486    ///
487    /// // <?xml encoding='utf-8'?>
488    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
489    /// match decl.encoding() {
490    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
491    ///     _ => assert!(false),
492    /// }
493    ///
494    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
495    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
496    /// match decl.encoding() {
497    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
498    ///     _ => assert!(false),
499    /// }
500    /// ```
501    ///
502    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
503    pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
504        self.content
505            .try_get_attribute("encoding")
506            .map(|a| a.map(|a| a.value))
507            .transpose()
508    }
509
510    /// Gets xml standalone, excluding quotes (`'` or `"`).
511    ///
512    /// Although according to the [grammar] standalone flag must appear after `"version"`
513    /// and `"encoding"`, this method does not check that. The first occurrence of the
514    /// attribute will be returned even if there are several. Also, method does not
515    /// restrict symbols that can forming the value, so the returned flag name may not
516    /// correspond to the grammar.
517    ///
518    /// # Examples
519    ///
520    /// ```
521    /// use std::borrow::Cow;
522    /// use quick_xml::Error;
523    /// use quick_xml::events::{BytesDecl, BytesStart};
524    ///
525    /// // <?xml version='1.1'?>
526    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
527    /// assert!(decl.standalone().is_none());
528    ///
529    /// // <?xml standalone='yes'?>
530    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
531    /// match decl.standalone() {
532    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
533    ///     _ => assert!(false),
534    /// }
535    ///
536    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
537    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
538    /// match decl.standalone() {
539    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
540    ///     _ => assert!(false),
541    /// }
542    /// ```
543    ///
544    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
545    pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
546        self.content
547            .try_get_attribute("standalone")
548            .map(|a| a.map(|a| a.value))
549            .transpose()
550    }
551
552    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
553    /// algorithm.
554    ///
555    /// If encoding in not known, or `encoding` key was not found, returns `None`.
556    /// In case of duplicated `encoding` key, encoding, corresponding to the first
557    /// one, is returned.
558    #[cfg(feature = "encoding")]
559    pub fn encoder(&self) -> Option<&'static Encoding> {
560        self.encoding()
561            .and_then(|e| e.ok())
562            .and_then(|e| Encoding::for_label(&e))
563    }
564
565    /// Converts the event into an owned event.
566    pub fn into_owned(self) -> BytesDecl<'static> {
567        BytesDecl {
568            content: self.content.into_owned(),
569        }
570    }
571
572    /// Converts the event into a borrowed event.
573    #[inline]
574    pub fn borrow(&self) -> BytesDecl {
575        BytesDecl {
576            content: self.content.borrow(),
577        }
578    }
579}
580
581impl<'a> Deref for BytesDecl<'a> {
582    type Target = [u8];
583
584    fn deref(&self) -> &[u8] {
585        &self.content
586    }
587}
588
589#[cfg(feature = "arbitrary")]
590impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
591    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
592        Ok(Self::new(
593            <&str>::arbitrary(u)?,
594            Option::<&str>::arbitrary(u)?,
595            Option::<&str>::arbitrary(u)?,
596        ))
597    }
598
599    fn size_hint(depth: usize) -> (usize, Option<usize>) {
600        return <&str as arbitrary::Arbitrary>::size_hint(depth);
601    }
602}
603
604////////////////////////////////////////////////////////////////////////////////////////////////////
605
606/// A struct to manage `Event::End` events
607#[derive(Clone, Eq, PartialEq)]
608pub struct BytesEnd<'a> {
609    name: Cow<'a, [u8]>,
610}
611
612impl<'a> BytesEnd<'a> {
613    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
614    #[inline]
615    pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
616        BytesEnd { name }
617    }
618
619    /// Creates a new `BytesEnd` borrowing a slice.
620    ///
621    /// # Warning
622    ///
623    /// `name` must be a valid name.
624    #[inline]
625    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
626        Self::wrap(str_cow_to_bytes(name))
627    }
628
629    /// Converts the event into an owned event.
630    pub fn into_owned(self) -> BytesEnd<'static> {
631        BytesEnd {
632            name: Cow::Owned(self.name.into_owned()),
633        }
634    }
635
636    /// Converts the event into a borrowed event.
637    #[inline]
638    pub fn borrow(&self) -> BytesEnd {
639        BytesEnd {
640            name: Cow::Borrowed(&self.name),
641        }
642    }
643
644    /// Gets the undecoded raw tag name, as present in the input stream.
645    #[inline]
646    pub fn name(&self) -> QName {
647        QName(&self.name)
648    }
649
650    /// Gets the undecoded raw local tag name (excluding namespace) as present
651    /// in the input stream.
652    ///
653    /// All content up to and including the first `:` character is removed from the tag name.
654    #[inline]
655    pub fn local_name(&self) -> LocalName {
656        self.name().into()
657    }
658}
659
660impl<'a> Debug for BytesEnd<'a> {
661    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
662        write!(f, "BytesEnd {{ name: ")?;
663        write_cow_string(f, &self.name)?;
664        write!(f, " }}")
665    }
666}
667
668impl<'a> Deref for BytesEnd<'a> {
669    type Target = [u8];
670
671    fn deref(&self) -> &[u8] {
672        &self.name
673    }
674}
675
676#[cfg(feature = "arbitrary")]
677impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
678    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
679        Ok(Self::new(<&str>::arbitrary(u)?))
680    }
681    fn size_hint(depth: usize) -> (usize, Option<usize>) {
682        return <&str as arbitrary::Arbitrary>::size_hint(depth);
683    }
684}
685
686////////////////////////////////////////////////////////////////////////////////////////////////////
687
688/// Data from various events (most notably, `Event::Text`) that stored in XML
689/// in escaped form. Internally data is stored in escaped form
690#[derive(Clone, Eq, PartialEq)]
691pub struct BytesText<'a> {
692    /// Escaped then encoded content of the event. Content is encoded in the XML
693    /// document encoding when event comes from the reader and should be in the
694    /// document encoding when event passed to the writer
695    content: Cow<'a, [u8]>,
696    /// Encoding in which the `content` is stored inside the event
697    decoder: Decoder,
698}
699
700impl<'a> BytesText<'a> {
701    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
702    #[inline]
703    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
704        Self {
705            content: content.into(),
706            decoder,
707        }
708    }
709
710    /// Creates a new `BytesText` from an escaped string.
711    #[inline]
712    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
713        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
714    }
715
716    /// Creates a new `BytesText` from a string. The string is expected not to
717    /// be escaped.
718    #[inline]
719    pub fn new(content: &'a str) -> Self {
720        Self::from_escaped(escape(content))
721    }
722
723    /// Ensures that all data is owned to extend the object's lifetime if
724    /// necessary.
725    #[inline]
726    pub fn into_owned(self) -> BytesText<'static> {
727        BytesText {
728            content: self.content.into_owned().into(),
729            decoder: self.decoder,
730        }
731    }
732
733    /// Extracts the inner `Cow` from the `BytesText` event container.
734    #[inline]
735    pub fn into_inner(self) -> Cow<'a, [u8]> {
736        self.content
737    }
738
739    /// Converts the event into a borrowed event.
740    #[inline]
741    pub fn borrow(&self) -> BytesText {
742        BytesText {
743            content: Cow::Borrowed(&self.content),
744            decoder: self.decoder,
745        }
746    }
747
748    /// Decodes then unescapes the content of the event.
749    ///
750    /// This will allocate if the value contains any escape sequences or in
751    /// non-UTF-8 encoding.
752    pub fn unescape(&self) -> Result<Cow<'a, str>> {
753        self.unescape_with(resolve_predefined_entity)
754    }
755
756    /// Decodes then unescapes the content of the event with custom entities.
757    ///
758    /// This will allocate if the value contains any escape sequences or in
759    /// non-UTF-8 encoding.
760    pub fn unescape_with<'entity>(
761        &self,
762        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
763    ) -> Result<Cow<'a, str>> {
764        let decoded = match &self.content {
765            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
766            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
767            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
768        };
769
770        match unescape_with(&decoded, resolve_entity)? {
771            // Because result is borrowed, no replacements was done and we can use original string
772            Cow::Borrowed(_) => Ok(decoded),
773            Cow::Owned(s) => Ok(s.into()),
774        }
775    }
776
777    /// Removes leading XML whitespace bytes from text content.
778    ///
779    /// Returns `true` if content is empty after that
780    pub fn inplace_trim_start(&mut self) -> bool {
781        self.content = trim_cow(
782            replace(&mut self.content, Cow::Borrowed(b"")),
783            trim_xml_start,
784        );
785        self.content.is_empty()
786    }
787
788    /// Removes trailing XML whitespace bytes from text content.
789    ///
790    /// Returns `true` if content is empty after that
791    pub fn inplace_trim_end(&mut self) -> bool {
792        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
793        self.content.is_empty()
794    }
795}
796
797impl<'a> Debug for BytesText<'a> {
798    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
799        write!(f, "BytesText {{ content: ")?;
800        write_cow_string(f, &self.content)?;
801        write!(f, " }}")
802    }
803}
804
805impl<'a> Deref for BytesText<'a> {
806    type Target = [u8];
807
808    fn deref(&self) -> &[u8] {
809        &self.content
810    }
811}
812
813#[cfg(feature = "arbitrary")]
814impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
815    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
816        let s = <&str>::arbitrary(u)?;
817        if !s.chars().all(char::is_alphanumeric) {
818            return Err(arbitrary::Error::IncorrectFormat);
819        }
820        Ok(Self::new(s))
821    }
822
823    fn size_hint(depth: usize) -> (usize, Option<usize>) {
824        return <&str as arbitrary::Arbitrary>::size_hint(depth);
825    }
826}
827
828////////////////////////////////////////////////////////////////////////////////////////////////////
829
830/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
831/// [convert](Self::escape) it to [`BytesText`]
832#[derive(Clone, Eq, PartialEq)]
833pub struct BytesCData<'a> {
834    content: Cow<'a, [u8]>,
835    /// Encoding in which the `content` is stored inside the event
836    decoder: Decoder,
837}
838
839impl<'a> BytesCData<'a> {
840    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
841    #[inline]
842    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
843        Self {
844            content: content.into(),
845            decoder,
846        }
847    }
848
849    /// Creates a new `BytesCData` from a string.
850    ///
851    /// # Warning
852    ///
853    /// `content` must not contain the `]]>` sequence.
854    #[inline]
855    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
856        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
857    }
858
859    /// Ensures that all data is owned to extend the object's lifetime if
860    /// necessary.
861    #[inline]
862    pub fn into_owned(self) -> BytesCData<'static> {
863        BytesCData {
864            content: self.content.into_owned().into(),
865            decoder: self.decoder,
866        }
867    }
868
869    /// Extracts the inner `Cow` from the `BytesCData` event container.
870    #[inline]
871    pub fn into_inner(self) -> Cow<'a, [u8]> {
872        self.content
873    }
874
875    /// Converts the event into a borrowed event.
876    #[inline]
877    pub fn borrow(&self) -> BytesCData {
878        BytesCData {
879            content: Cow::Borrowed(&self.content),
880            decoder: self.decoder,
881        }
882    }
883
884    /// Converts this CDATA content to an escaped version, that can be written
885    /// as an usual text in XML.
886    ///
887    /// This function performs following replacements:
888    ///
889    /// | Character | Replacement
890    /// |-----------|------------
891    /// | `<`       | `&lt;`
892    /// | `>`       | `&gt;`
893    /// | `&`       | `&amp;`
894    /// | `'`       | `&apos;`
895    /// | `"`       | `&quot;`
896    pub fn escape(self) -> Result<BytesText<'a>> {
897        let decoded = self.decode()?;
898        Ok(BytesText::wrap(
899            match escape(&decoded) {
900                // Because result is borrowed, no replacements was done and we can use original content
901                Cow::Borrowed(_) => self.content,
902                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
903            },
904            Decoder::utf8(),
905        ))
906    }
907
908    /// Converts this CDATA content to an escaped version, that can be written
909    /// as an usual text in XML.
910    ///
911    /// In XML text content, it is allowed (though not recommended) to leave
912    /// the quote special characters `"` and `'` unescaped.
913    ///
914    /// This function performs following replacements:
915    ///
916    /// | Character | Replacement
917    /// |-----------|------------
918    /// | `<`       | `&lt;`
919    /// | `>`       | `&gt;`
920    /// | `&`       | `&amp;`
921    pub fn partial_escape(self) -> Result<BytesText<'a>> {
922        let decoded = self.decode()?;
923        Ok(BytesText::wrap(
924            match partial_escape(&decoded) {
925                // Because result is borrowed, no replacements was done and we can use original content
926                Cow::Borrowed(_) => self.content,
927                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
928            },
929            Decoder::utf8(),
930        ))
931    }
932
933    /// Converts this CDATA content to an escaped version, that can be written
934    /// as an usual text in XML. This method escapes only those characters that
935    /// must be escaped according to the [specification].
936    ///
937    /// This function performs following replacements:
938    ///
939    /// | Character | Replacement
940    /// |-----------|------------
941    /// | `<`       | `&lt;`
942    /// | `&`       | `&amp;`
943    ///
944    /// [specification]: https://www.w3.org/TR/xml11/#syntax
945    pub fn minimal_escape(self) -> Result<BytesText<'a>> {
946        let decoded = self.decode()?;
947        Ok(BytesText::wrap(
948            match minimal_escape(&decoded) {
949                // Because result is borrowed, no replacements was done and we can use original content
950                Cow::Borrowed(_) => self.content,
951                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
952            },
953            Decoder::utf8(),
954        ))
955    }
956
957    /// Gets content of this text buffer in the specified encoding
958    pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
959        Ok(match &self.content {
960            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
961            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
962            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
963        })
964    }
965}
966
967impl<'a> Debug for BytesCData<'a> {
968    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
969        write!(f, "BytesCData {{ content: ")?;
970        write_cow_string(f, &self.content)?;
971        write!(f, " }}")
972    }
973}
974
975impl<'a> Deref for BytesCData<'a> {
976    type Target = [u8];
977
978    fn deref(&self) -> &[u8] {
979        &self.content
980    }
981}
982
983#[cfg(feature = "arbitrary")]
984impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
985    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
986        Ok(Self::new(<&str>::arbitrary(u)?))
987    }
988    fn size_hint(depth: usize) -> (usize, Option<usize>) {
989        return <&str as arbitrary::Arbitrary>::size_hint(depth);
990    }
991}
992
993////////////////////////////////////////////////////////////////////////////////////////////////////
994
995/// Event emitted by [`Reader::read_event_into`].
996///
997/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
998#[derive(Clone, Debug, Eq, PartialEq)]
999#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1000pub enum Event<'a> {
1001    /// Start tag (with attributes) `<tag attr="value">`.
1002    Start(BytesStart<'a>),
1003    /// End tag `</tag>`.
1004    End(BytesEnd<'a>),
1005    /// Empty element tag (with attributes) `<tag attr="value" />`.
1006    Empty(BytesStart<'a>),
1007    /// Escaped character data between tags.
1008    Text(BytesText<'a>),
1009    /// Unescaped character data stored in `<![CDATA[...]]>`.
1010    CData(BytesCData<'a>),
1011    /// Comment `<!-- ... -->`.
1012    Comment(BytesText<'a>),
1013    /// XML declaration `<?xml ...?>`.
1014    Decl(BytesDecl<'a>),
1015    /// Processing instruction `<?...?>`.
1016    PI(BytesText<'a>),
1017    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1018    DocType(BytesText<'a>),
1019    /// End of XML document.
1020    Eof,
1021}
1022
1023impl<'a> Event<'a> {
1024    /// Converts the event to an owned version, untied to the lifetime of
1025    /// buffer used when reading but incurring a new, separate allocation.
1026    pub fn into_owned(self) -> Event<'static> {
1027        match self {
1028            Event::Start(e) => Event::Start(e.into_owned()),
1029            Event::End(e) => Event::End(e.into_owned()),
1030            Event::Empty(e) => Event::Empty(e.into_owned()),
1031            Event::Text(e) => Event::Text(e.into_owned()),
1032            Event::Comment(e) => Event::Comment(e.into_owned()),
1033            Event::CData(e) => Event::CData(e.into_owned()),
1034            Event::Decl(e) => Event::Decl(e.into_owned()),
1035            Event::PI(e) => Event::PI(e.into_owned()),
1036            Event::DocType(e) => Event::DocType(e.into_owned()),
1037            Event::Eof => Event::Eof,
1038        }
1039    }
1040
1041    /// Converts the event into a borrowed event.
1042    #[inline]
1043    pub fn borrow(&self) -> Event {
1044        match self {
1045            Event::Start(e) => Event::Start(e.borrow()),
1046            Event::End(e) => Event::End(e.borrow()),
1047            Event::Empty(e) => Event::Empty(e.borrow()),
1048            Event::Text(e) => Event::Text(e.borrow()),
1049            Event::Comment(e) => Event::Comment(e.borrow()),
1050            Event::CData(e) => Event::CData(e.borrow()),
1051            Event::Decl(e) => Event::Decl(e.borrow()),
1052            Event::PI(e) => Event::PI(e.borrow()),
1053            Event::DocType(e) => Event::DocType(e.borrow()),
1054            Event::Eof => Event::Eof,
1055        }
1056    }
1057}
1058
1059impl<'a> Deref for Event<'a> {
1060    type Target = [u8];
1061
1062    fn deref(&self) -> &[u8] {
1063        match *self {
1064            Event::Start(ref e) | Event::Empty(ref e) => e,
1065            Event::End(ref e) => e,
1066            Event::Text(ref e) => e,
1067            Event::Decl(ref e) => e,
1068            Event::PI(ref e) => e,
1069            Event::CData(ref e) => e,
1070            Event::Comment(ref e) => e,
1071            Event::DocType(ref e) => e,
1072            Event::Eof => &[],
1073        }
1074    }
1075}
1076
1077impl<'a> AsRef<Event<'a>> for Event<'a> {
1078    fn as_ref(&self) -> &Event<'a> {
1079        self
1080    }
1081}
1082
1083////////////////////////////////////////////////////////////////////////////////////////////////////
1084
1085#[inline]
1086fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1087    match content.into() {
1088        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1089        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1090    }
1091}
1092
1093/// Returns a byte slice with leading XML whitespace bytes removed.
1094///
1095/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1096const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
1097    // Note: A pattern matching based approach (instead of indexing) allows
1098    // making the function const.
1099    while let [first, rest @ ..] = bytes {
1100        if is_whitespace(*first) {
1101            bytes = rest;
1102        } else {
1103            break;
1104        }
1105    }
1106    bytes
1107}
1108
1109/// Returns a byte slice with trailing XML whitespace bytes removed.
1110///
1111/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1112const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
1113    // Note: A pattern matching based approach (instead of indexing) allows
1114    // making the function const.
1115    while let [rest @ .., last] = bytes {
1116        if is_whitespace(*last) {
1117            bytes = rest;
1118        } else {
1119            break;
1120        }
1121    }
1122    bytes
1123}
1124
1125fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1126where
1127    F: FnOnce(&[u8]) -> &[u8],
1128{
1129    match value {
1130        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1131        Cow::Owned(mut bytes) => {
1132            let trimmed = trim(&bytes);
1133            if trimmed.len() != bytes.len() {
1134                bytes = trimmed.to_vec();
1135            }
1136            Cow::Owned(bytes)
1137        }
1138    }
1139}
1140
1141#[cfg(test)]
1142mod test {
1143    use super::*;
1144    use pretty_assertions::assert_eq;
1145
1146    #[test]
1147    fn bytestart_create() {
1148        let b = BytesStart::new("test");
1149        assert_eq!(b.len(), 4);
1150        assert_eq!(b.name(), QName(b"test"));
1151    }
1152
1153    #[test]
1154    fn bytestart_set_name() {
1155        let mut b = BytesStart::new("test");
1156        assert_eq!(b.len(), 4);
1157        assert_eq!(b.name(), QName(b"test"));
1158        assert_eq!(b.attributes_raw(), b"");
1159        b.push_attribute(("x", "a"));
1160        assert_eq!(b.len(), 10);
1161        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1162        b.set_name(b"g");
1163        assert_eq!(b.len(), 7);
1164        assert_eq!(b.name(), QName(b"g"));
1165    }
1166
1167    #[test]
1168    fn bytestart_clear_attributes() {
1169        let mut b = BytesStart::new("test");
1170        b.push_attribute(("x", "y\"z"));
1171        b.push_attribute(("x", "y\"z"));
1172        b.clear_attributes();
1173        assert!(b.attributes().next().is_none());
1174        assert_eq!(b.len(), 4);
1175        assert_eq!(b.name(), QName(b"test"));
1176    }
1177}