quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::ops::Deref;
45use std::str::from_utf8;
46
47use crate::encoding::Decoder;
48use crate::errors::{Error, IllFormedError, Result};
49use crate::escape::{
50 escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
51};
52use crate::name::{LocalName, QName};
53use crate::reader::is_whitespace;
54use crate::utils::write_cow_string;
55#[cfg(feature = "serialize")]
56use crate::utils::CowRef;
57use attributes::{Attribute, Attributes};
58use std::mem::replace;
59
60/// Opening tag data (`Event::Start`), with optional attributes.
61///
62/// `<name attr="value">`.
63///
64/// The name can be accessed using the [`name`] or [`local_name`] methods.
65/// An iterator over the attributes is returned by the [`attributes`] method.
66///
67/// [`name`]: Self::name
68/// [`local_name`]: Self::local_name
69/// [`attributes`]: Self::attributes
70#[derive(Clone, Eq, PartialEq)]
71pub struct BytesStart<'a> {
72 /// content of the element, before any utf8 conversion
73 pub(crate) buf: Cow<'a, [u8]>,
74 /// end of the element name, the name starts at that the start of `buf`
75 pub(crate) name_len: usize,
76}
77
78impl<'a> BytesStart<'a> {
79 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
80 #[inline]
81 pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
82 BytesStart {
83 buf: Cow::Borrowed(content),
84 name_len,
85 }
86 }
87
88 /// Creates a new `BytesStart` from the given name.
89 ///
90 /// # Warning
91 ///
92 /// `name` must be a valid name.
93 #[inline]
94 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
95 let buf = str_cow_to_bytes(name);
96 BytesStart {
97 name_len: buf.len(),
98 buf,
99 }
100 }
101
102 /// Creates a new `BytesStart` from the given content (name + attributes).
103 ///
104 /// # Warning
105 ///
106 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
107 /// must be correctly-formed attributes. Neither are checked, it is possible
108 /// to generate invalid XML if `content` or `name_len` are incorrect.
109 #[inline]
110 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
111 BytesStart {
112 buf: str_cow_to_bytes(content),
113 name_len,
114 }
115 }
116
117 /// Converts the event into an owned event.
118 pub fn into_owned(self) -> BytesStart<'static> {
119 BytesStart {
120 buf: Cow::Owned(self.buf.into_owned()),
121 name_len: self.name_len,
122 }
123 }
124
125 /// Converts the event into an owned event without taking ownership of Event
126 pub fn to_owned(&self) -> BytesStart<'static> {
127 BytesStart {
128 buf: Cow::Owned(self.buf.clone().into_owned()),
129 name_len: self.name_len,
130 }
131 }
132
133 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
134 ///
135 /// # Example
136 ///
137 /// ```
138 /// use quick_xml::events::{BytesStart, Event};
139 /// # use quick_xml::writer::Writer;
140 /// # use quick_xml::Error;
141 ///
142 /// struct SomeStruct<'a> {
143 /// attrs: BytesStart<'a>,
144 /// // ...
145 /// }
146 /// # impl<'a> SomeStruct<'a> {
147 /// # fn example(&self) -> Result<(), Error> {
148 /// # let mut writer = Writer::new(Vec::new());
149 ///
150 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
151 /// // ...
152 /// writer.write_event(Event::End(self.attrs.to_end()))?;
153 /// # Ok(())
154 /// # }}
155 /// ```
156 ///
157 /// [`to_end`]: Self::to_end
158 pub fn borrow(&self) -> BytesStart {
159 BytesStart {
160 buf: Cow::Borrowed(&self.buf),
161 name_len: self.name_len,
162 }
163 }
164
165 /// Creates new paired close tag
166 pub fn to_end(&self) -> BytesEnd {
167 BytesEnd::wrap(self.name().into_inner().into())
168 }
169
170 /// Gets the undecoded raw tag name, as present in the input stream.
171 #[inline]
172 pub fn name(&self) -> QName {
173 QName(&self.buf[..self.name_len])
174 }
175
176 /// Gets the undecoded raw local tag name (excluding namespace) as present
177 /// in the input stream.
178 ///
179 /// All content up to and including the first `:` character is removed from the tag name.
180 #[inline]
181 pub fn local_name(&self) -> LocalName {
182 self.name().into()
183 }
184
185 /// Edit the name of the BytesStart in-place
186 ///
187 /// # Warning
188 ///
189 /// `name` must be a valid name.
190 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
191 let bytes = self.buf.to_mut();
192 bytes.splice(..self.name_len, name.iter().cloned());
193 self.name_len = name.len();
194 self
195 }
196
197 /// Gets the undecoded raw tag name, as present in the input stream, which
198 /// is borrowed either to the input, or to the event.
199 ///
200 /// # Lifetimes
201 ///
202 /// - `'a`: Lifetime of the input data from which this event is borrow
203 /// - `'e`: Lifetime of the concrete event instance
204 // TODO: We should made this is a part of public API, but with safe wrapped for a name
205 #[cfg(feature = "serialize")]
206 pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
207 match self.buf {
208 Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
209 Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
210 }
211 }
212}
213
214/// Attribute-related methods
215impl<'a> BytesStart<'a> {
216 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
217 ///
218 /// The yielded items must be convertible to [`Attribute`] using `Into`.
219 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
220 where
221 I: IntoIterator,
222 I::Item: Into<Attribute<'b>>,
223 {
224 self.extend_attributes(attributes);
225 self
226 }
227
228 /// Add additional attributes to this tag using an iterator.
229 ///
230 /// The yielded items must be convertible to [`Attribute`] using `Into`.
231 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
232 where
233 I: IntoIterator,
234 I::Item: Into<Attribute<'b>>,
235 {
236 for attr in attributes {
237 self.push_attribute(attr);
238 }
239 self
240 }
241
242 /// Adds an attribute to this element.
243 pub fn push_attribute<'b, A>(&mut self, attr: A)
244 where
245 A: Into<Attribute<'b>>,
246 {
247 self.buf.to_mut().push(b' ');
248 self.push_attr(attr.into());
249 }
250
251 /// Remove all attributes from the ByteStart
252 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
253 self.buf.to_mut().truncate(self.name_len);
254 self
255 }
256
257 /// Returns an iterator over the attributes of this tag.
258 pub fn attributes(&self) -> Attributes {
259 Attributes::wrap(&self.buf, self.name_len, false)
260 }
261
262 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
263 pub fn html_attributes(&self) -> Attributes {
264 Attributes::wrap(&self.buf, self.name_len, true)
265 }
266
267 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
268 /// including the whitespace after the tag name if there is any.
269 #[inline]
270 pub fn attributes_raw(&self) -> &[u8] {
271 &self.buf[self.name_len..]
272 }
273
274 /// Try to get an attribute
275 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
276 &'a self,
277 attr_name: N,
278 ) -> Result<Option<Attribute<'a>>> {
279 for a in self.attributes().with_checks(false) {
280 let a = a?;
281 if a.key.as_ref() == attr_name.as_ref() {
282 return Ok(Some(a));
283 }
284 }
285 Ok(None)
286 }
287
288 /// Adds an attribute to this element.
289 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
290 let bytes = self.buf.to_mut();
291 bytes.extend_from_slice(attr.key.as_ref());
292 bytes.extend_from_slice(b"=\"");
293 // FIXME: need to escape attribute content
294 bytes.extend_from_slice(attr.value.as_ref());
295 bytes.push(b'"');
296 }
297
298 /// Adds new line in existing element
299 pub(crate) fn push_newline(&mut self) {
300 self.buf.to_mut().push(b'\n');
301 }
302
303 /// Adds indentation bytes in existing element
304 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
305 self.buf.to_mut().extend_from_slice(indent);
306 }
307}
308
309impl<'a> Debug for BytesStart<'a> {
310 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
311 write!(f, "BytesStart {{ buf: ")?;
312 write_cow_string(f, &self.buf)?;
313 write!(f, ", name_len: {} }}", self.name_len)
314 }
315}
316
317impl<'a> Deref for BytesStart<'a> {
318 type Target = [u8];
319
320 fn deref(&self) -> &[u8] {
321 &self.buf
322 }
323}
324
325#[cfg(feature = "arbitrary")]
326impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
327 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
328 let s = <&str>::arbitrary(u)?;
329 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
330 return Err(arbitrary::Error::IncorrectFormat);
331 }
332 let mut result = Self::new(s);
333 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
334 Ok(result)
335 }
336
337 fn size_hint(depth: usize) -> (usize, Option<usize>) {
338 return <&str as arbitrary::Arbitrary>::size_hint(depth);
339 }
340}
341////////////////////////////////////////////////////////////////////////////////////////////////////
342
343/// An XML declaration (`Event::Decl`).
344///
345/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
346#[derive(Clone, Debug, Eq, PartialEq)]
347pub struct BytesDecl<'a> {
348 content: BytesStart<'a>,
349}
350
351impl<'a> BytesDecl<'a> {
352 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
353 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
354 /// attribute.
355 ///
356 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
357 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
358 /// the double quote character is not allowed in any of the attribute values.
359 pub fn new(
360 version: &str,
361 encoding: Option<&str>,
362 standalone: Option<&str>,
363 ) -> BytesDecl<'static> {
364 // Compute length of the buffer based on supplied attributes
365 // ' encoding=""' => 12
366 let encoding_attr_len = if let Some(xs) = encoding {
367 12 + xs.len()
368 } else {
369 0
370 };
371 // ' standalone=""' => 14
372 let standalone_attr_len = if let Some(xs) = standalone {
373 14 + xs.len()
374 } else {
375 0
376 };
377 // 'xml version=""' => 14
378 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
379
380 buf.push_str("xml version=\"");
381 buf.push_str(version);
382
383 if let Some(encoding_val) = encoding {
384 buf.push_str("\" encoding=\"");
385 buf.push_str(encoding_val);
386 }
387
388 if let Some(standalone_val) = standalone {
389 buf.push_str("\" standalone=\"");
390 buf.push_str(standalone_val);
391 }
392 buf.push('"');
393
394 BytesDecl {
395 content: BytesStart::from_content(buf, 3),
396 }
397 }
398
399 /// Creates a `BytesDecl` from a `BytesStart`
400 pub fn from_start(start: BytesStart<'a>) -> Self {
401 Self { content: start }
402 }
403
404 /// Gets xml version, excluding quotes (`'` or `"`).
405 ///
406 /// According to the [grammar], the version *must* be the first thing in the declaration.
407 /// This method tries to extract the first thing in the declaration and return it.
408 /// In case of multiple attributes value of the first one is returned.
409 ///
410 /// If version is missed in the declaration, or the first thing is not a version,
411 /// [`IllFormedError::MissingDeclVersion`] will be returned.
412 ///
413 /// # Examples
414 ///
415 /// ```
416 /// use quick_xml::errors::{Error, IllFormedError};
417 /// use quick_xml::events::{BytesDecl, BytesStart};
418 ///
419 /// // <?xml version='1.1'?>
420 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
421 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
422 ///
423 /// // <?xml version='1.0' version='1.1'?>
424 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
425 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
426 ///
427 /// // <?xml encoding='utf-8'?>
428 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
429 /// match decl.version() {
430 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
431 /// _ => assert!(false),
432 /// }
433 ///
434 /// // <?xml encoding='utf-8' version='1.1'?>
435 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
436 /// match decl.version() {
437 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
438 /// _ => assert!(false),
439 /// }
440 ///
441 /// // <?xml?>
442 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
443 /// match decl.version() {
444 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
445 /// _ => assert!(false),
446 /// }
447 /// ```
448 ///
449 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
450 pub fn version(&self) -> Result<Cow<[u8]>> {
451 // The version *must* be the first thing in the declaration.
452 match self.content.attributes().with_checks(false).next() {
453 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
454 // first attribute was not "version"
455 Some(Ok(a)) => {
456 let found = from_utf8(a.key.as_ref())?.to_string();
457 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
458 found,
459 ))))
460 }
461 // error parsing attributes
462 Some(Err(e)) => Err(e.into()),
463 // no attributes
464 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
465 }
466 }
467
468 /// Gets xml encoding, excluding quotes (`'` or `"`).
469 ///
470 /// Although according to the [grammar] encoding must appear before `"standalone"`
471 /// and after `"version"`, this method does not check that. The first occurrence
472 /// of the attribute will be returned even if there are several. Also, method does
473 /// not restrict symbols that can forming the encoding, so the returned encoding
474 /// name may not correspond to the grammar.
475 ///
476 /// # Examples
477 ///
478 /// ```
479 /// use std::borrow::Cow;
480 /// use quick_xml::Error;
481 /// use quick_xml::events::{BytesDecl, BytesStart};
482 ///
483 /// // <?xml version='1.1'?>
484 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
485 /// assert!(decl.encoding().is_none());
486 ///
487 /// // <?xml encoding='utf-8'?>
488 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
489 /// match decl.encoding() {
490 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
491 /// _ => assert!(false),
492 /// }
493 ///
494 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
495 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
496 /// match decl.encoding() {
497 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
498 /// _ => assert!(false),
499 /// }
500 /// ```
501 ///
502 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
503 pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
504 self.content
505 .try_get_attribute("encoding")
506 .map(|a| a.map(|a| a.value))
507 .transpose()
508 }
509
510 /// Gets xml standalone, excluding quotes (`'` or `"`).
511 ///
512 /// Although according to the [grammar] standalone flag must appear after `"version"`
513 /// and `"encoding"`, this method does not check that. The first occurrence of the
514 /// attribute will be returned even if there are several. Also, method does not
515 /// restrict symbols that can forming the value, so the returned flag name may not
516 /// correspond to the grammar.
517 ///
518 /// # Examples
519 ///
520 /// ```
521 /// use std::borrow::Cow;
522 /// use quick_xml::Error;
523 /// use quick_xml::events::{BytesDecl, BytesStart};
524 ///
525 /// // <?xml version='1.1'?>
526 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
527 /// assert!(decl.standalone().is_none());
528 ///
529 /// // <?xml standalone='yes'?>
530 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
531 /// match decl.standalone() {
532 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
533 /// _ => assert!(false),
534 /// }
535 ///
536 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
537 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
538 /// match decl.standalone() {
539 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
540 /// _ => assert!(false),
541 /// }
542 /// ```
543 ///
544 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
545 pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
546 self.content
547 .try_get_attribute("standalone")
548 .map(|a| a.map(|a| a.value))
549 .transpose()
550 }
551
552 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
553 /// algorithm.
554 ///
555 /// If encoding in not known, or `encoding` key was not found, returns `None`.
556 /// In case of duplicated `encoding` key, encoding, corresponding to the first
557 /// one, is returned.
558 #[cfg(feature = "encoding")]
559 pub fn encoder(&self) -> Option<&'static Encoding> {
560 self.encoding()
561 .and_then(|e| e.ok())
562 .and_then(|e| Encoding::for_label(&e))
563 }
564
565 /// Converts the event into an owned event.
566 pub fn into_owned(self) -> BytesDecl<'static> {
567 BytesDecl {
568 content: self.content.into_owned(),
569 }
570 }
571
572 /// Converts the event into a borrowed event.
573 #[inline]
574 pub fn borrow(&self) -> BytesDecl {
575 BytesDecl {
576 content: self.content.borrow(),
577 }
578 }
579}
580
581impl<'a> Deref for BytesDecl<'a> {
582 type Target = [u8];
583
584 fn deref(&self) -> &[u8] {
585 &self.content
586 }
587}
588
589#[cfg(feature = "arbitrary")]
590impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
591 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
592 Ok(Self::new(
593 <&str>::arbitrary(u)?,
594 Option::<&str>::arbitrary(u)?,
595 Option::<&str>::arbitrary(u)?,
596 ))
597 }
598
599 fn size_hint(depth: usize) -> (usize, Option<usize>) {
600 return <&str as arbitrary::Arbitrary>::size_hint(depth);
601 }
602}
603
604////////////////////////////////////////////////////////////////////////////////////////////////////
605
606/// A struct to manage `Event::End` events
607#[derive(Clone, Eq, PartialEq)]
608pub struct BytesEnd<'a> {
609 name: Cow<'a, [u8]>,
610}
611
612impl<'a> BytesEnd<'a> {
613 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
614 #[inline]
615 pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
616 BytesEnd { name }
617 }
618
619 /// Creates a new `BytesEnd` borrowing a slice.
620 ///
621 /// # Warning
622 ///
623 /// `name` must be a valid name.
624 #[inline]
625 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
626 Self::wrap(str_cow_to_bytes(name))
627 }
628
629 /// Converts the event into an owned event.
630 pub fn into_owned(self) -> BytesEnd<'static> {
631 BytesEnd {
632 name: Cow::Owned(self.name.into_owned()),
633 }
634 }
635
636 /// Converts the event into a borrowed event.
637 #[inline]
638 pub fn borrow(&self) -> BytesEnd {
639 BytesEnd {
640 name: Cow::Borrowed(&self.name),
641 }
642 }
643
644 /// Gets the undecoded raw tag name, as present in the input stream.
645 #[inline]
646 pub fn name(&self) -> QName {
647 QName(&self.name)
648 }
649
650 /// Gets the undecoded raw local tag name (excluding namespace) as present
651 /// in the input stream.
652 ///
653 /// All content up to and including the first `:` character is removed from the tag name.
654 #[inline]
655 pub fn local_name(&self) -> LocalName {
656 self.name().into()
657 }
658}
659
660impl<'a> Debug for BytesEnd<'a> {
661 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
662 write!(f, "BytesEnd {{ name: ")?;
663 write_cow_string(f, &self.name)?;
664 write!(f, " }}")
665 }
666}
667
668impl<'a> Deref for BytesEnd<'a> {
669 type Target = [u8];
670
671 fn deref(&self) -> &[u8] {
672 &self.name
673 }
674}
675
676#[cfg(feature = "arbitrary")]
677impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
678 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
679 Ok(Self::new(<&str>::arbitrary(u)?))
680 }
681 fn size_hint(depth: usize) -> (usize, Option<usize>) {
682 return <&str as arbitrary::Arbitrary>::size_hint(depth);
683 }
684}
685
686////////////////////////////////////////////////////////////////////////////////////////////////////
687
688/// Data from various events (most notably, `Event::Text`) that stored in XML
689/// in escaped form. Internally data is stored in escaped form
690#[derive(Clone, Eq, PartialEq)]
691pub struct BytesText<'a> {
692 /// Escaped then encoded content of the event. Content is encoded in the XML
693 /// document encoding when event comes from the reader and should be in the
694 /// document encoding when event passed to the writer
695 content: Cow<'a, [u8]>,
696 /// Encoding in which the `content` is stored inside the event
697 decoder: Decoder,
698}
699
700impl<'a> BytesText<'a> {
701 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
702 #[inline]
703 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
704 Self {
705 content: content.into(),
706 decoder,
707 }
708 }
709
710 /// Creates a new `BytesText` from an escaped string.
711 #[inline]
712 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
713 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
714 }
715
716 /// Creates a new `BytesText` from a string. The string is expected not to
717 /// be escaped.
718 #[inline]
719 pub fn new(content: &'a str) -> Self {
720 Self::from_escaped(escape(content))
721 }
722
723 /// Ensures that all data is owned to extend the object's lifetime if
724 /// necessary.
725 #[inline]
726 pub fn into_owned(self) -> BytesText<'static> {
727 BytesText {
728 content: self.content.into_owned().into(),
729 decoder: self.decoder,
730 }
731 }
732
733 /// Extracts the inner `Cow` from the `BytesText` event container.
734 #[inline]
735 pub fn into_inner(self) -> Cow<'a, [u8]> {
736 self.content
737 }
738
739 /// Converts the event into a borrowed event.
740 #[inline]
741 pub fn borrow(&self) -> BytesText {
742 BytesText {
743 content: Cow::Borrowed(&self.content),
744 decoder: self.decoder,
745 }
746 }
747
748 /// Decodes then unescapes the content of the event.
749 ///
750 /// This will allocate if the value contains any escape sequences or in
751 /// non-UTF-8 encoding.
752 pub fn unescape(&self) -> Result<Cow<'a, str>> {
753 self.unescape_with(resolve_predefined_entity)
754 }
755
756 /// Decodes then unescapes the content of the event with custom entities.
757 ///
758 /// This will allocate if the value contains any escape sequences or in
759 /// non-UTF-8 encoding.
760 pub fn unescape_with<'entity>(
761 &self,
762 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
763 ) -> Result<Cow<'a, str>> {
764 let decoded = match &self.content {
765 Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
766 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
767 Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
768 };
769
770 match unescape_with(&decoded, resolve_entity)? {
771 // Because result is borrowed, no replacements was done and we can use original string
772 Cow::Borrowed(_) => Ok(decoded),
773 Cow::Owned(s) => Ok(s.into()),
774 }
775 }
776
777 /// Removes leading XML whitespace bytes from text content.
778 ///
779 /// Returns `true` if content is empty after that
780 pub fn inplace_trim_start(&mut self) -> bool {
781 self.content = trim_cow(
782 replace(&mut self.content, Cow::Borrowed(b"")),
783 trim_xml_start,
784 );
785 self.content.is_empty()
786 }
787
788 /// Removes trailing XML whitespace bytes from text content.
789 ///
790 /// Returns `true` if content is empty after that
791 pub fn inplace_trim_end(&mut self) -> bool {
792 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
793 self.content.is_empty()
794 }
795}
796
797impl<'a> Debug for BytesText<'a> {
798 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
799 write!(f, "BytesText {{ content: ")?;
800 write_cow_string(f, &self.content)?;
801 write!(f, " }}")
802 }
803}
804
805impl<'a> Deref for BytesText<'a> {
806 type Target = [u8];
807
808 fn deref(&self) -> &[u8] {
809 &self.content
810 }
811}
812
813#[cfg(feature = "arbitrary")]
814impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
815 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
816 let s = <&str>::arbitrary(u)?;
817 if !s.chars().all(char::is_alphanumeric) {
818 return Err(arbitrary::Error::IncorrectFormat);
819 }
820 Ok(Self::new(s))
821 }
822
823 fn size_hint(depth: usize) -> (usize, Option<usize>) {
824 return <&str as arbitrary::Arbitrary>::size_hint(depth);
825 }
826}
827
828////////////////////////////////////////////////////////////////////////////////////////////////////
829
830/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
831/// [convert](Self::escape) it to [`BytesText`]
832#[derive(Clone, Eq, PartialEq)]
833pub struct BytesCData<'a> {
834 content: Cow<'a, [u8]>,
835 /// Encoding in which the `content` is stored inside the event
836 decoder: Decoder,
837}
838
839impl<'a> BytesCData<'a> {
840 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
841 #[inline]
842 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
843 Self {
844 content: content.into(),
845 decoder,
846 }
847 }
848
849 /// Creates a new `BytesCData` from a string.
850 ///
851 /// # Warning
852 ///
853 /// `content` must not contain the `]]>` sequence.
854 #[inline]
855 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
856 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
857 }
858
859 /// Ensures that all data is owned to extend the object's lifetime if
860 /// necessary.
861 #[inline]
862 pub fn into_owned(self) -> BytesCData<'static> {
863 BytesCData {
864 content: self.content.into_owned().into(),
865 decoder: self.decoder,
866 }
867 }
868
869 /// Extracts the inner `Cow` from the `BytesCData` event container.
870 #[inline]
871 pub fn into_inner(self) -> Cow<'a, [u8]> {
872 self.content
873 }
874
875 /// Converts the event into a borrowed event.
876 #[inline]
877 pub fn borrow(&self) -> BytesCData {
878 BytesCData {
879 content: Cow::Borrowed(&self.content),
880 decoder: self.decoder,
881 }
882 }
883
884 /// Converts this CDATA content to an escaped version, that can be written
885 /// as an usual text in XML.
886 ///
887 /// This function performs following replacements:
888 ///
889 /// | Character | Replacement
890 /// |-----------|------------
891 /// | `<` | `<`
892 /// | `>` | `>`
893 /// | `&` | `&`
894 /// | `'` | `'`
895 /// | `"` | `"`
896 pub fn escape(self) -> Result<BytesText<'a>> {
897 let decoded = self.decode()?;
898 Ok(BytesText::wrap(
899 match escape(&decoded) {
900 // Because result is borrowed, no replacements was done and we can use original content
901 Cow::Borrowed(_) => self.content,
902 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
903 },
904 Decoder::utf8(),
905 ))
906 }
907
908 /// Converts this CDATA content to an escaped version, that can be written
909 /// as an usual text in XML.
910 ///
911 /// In XML text content, it is allowed (though not recommended) to leave
912 /// the quote special characters `"` and `'` unescaped.
913 ///
914 /// This function performs following replacements:
915 ///
916 /// | Character | Replacement
917 /// |-----------|------------
918 /// | `<` | `<`
919 /// | `>` | `>`
920 /// | `&` | `&`
921 pub fn partial_escape(self) -> Result<BytesText<'a>> {
922 let decoded = self.decode()?;
923 Ok(BytesText::wrap(
924 match partial_escape(&decoded) {
925 // Because result is borrowed, no replacements was done and we can use original content
926 Cow::Borrowed(_) => self.content,
927 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
928 },
929 Decoder::utf8(),
930 ))
931 }
932
933 /// Converts this CDATA content to an escaped version, that can be written
934 /// as an usual text in XML. This method escapes only those characters that
935 /// must be escaped according to the [specification].
936 ///
937 /// This function performs following replacements:
938 ///
939 /// | Character | Replacement
940 /// |-----------|------------
941 /// | `<` | `<`
942 /// | `&` | `&`
943 ///
944 /// [specification]: https://www.w3.org/TR/xml11/#syntax
945 pub fn minimal_escape(self) -> Result<BytesText<'a>> {
946 let decoded = self.decode()?;
947 Ok(BytesText::wrap(
948 match minimal_escape(&decoded) {
949 // Because result is borrowed, no replacements was done and we can use original content
950 Cow::Borrowed(_) => self.content,
951 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
952 },
953 Decoder::utf8(),
954 ))
955 }
956
957 /// Gets content of this text buffer in the specified encoding
958 pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
959 Ok(match &self.content {
960 Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
961 // Convert to owned, because otherwise Cow will be bound with wrong lifetime
962 Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
963 })
964 }
965}
966
967impl<'a> Debug for BytesCData<'a> {
968 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
969 write!(f, "BytesCData {{ content: ")?;
970 write_cow_string(f, &self.content)?;
971 write!(f, " }}")
972 }
973}
974
975impl<'a> Deref for BytesCData<'a> {
976 type Target = [u8];
977
978 fn deref(&self) -> &[u8] {
979 &self.content
980 }
981}
982
983#[cfg(feature = "arbitrary")]
984impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
985 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
986 Ok(Self::new(<&str>::arbitrary(u)?))
987 }
988 fn size_hint(depth: usize) -> (usize, Option<usize>) {
989 return <&str as arbitrary::Arbitrary>::size_hint(depth);
990 }
991}
992
993////////////////////////////////////////////////////////////////////////////////////////////////////
994
995/// Event emitted by [`Reader::read_event_into`].
996///
997/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
998#[derive(Clone, Debug, Eq, PartialEq)]
999#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1000pub enum Event<'a> {
1001 /// Start tag (with attributes) `<tag attr="value">`.
1002 Start(BytesStart<'a>),
1003 /// End tag `</tag>`.
1004 End(BytesEnd<'a>),
1005 /// Empty element tag (with attributes) `<tag attr="value" />`.
1006 Empty(BytesStart<'a>),
1007 /// Escaped character data between tags.
1008 Text(BytesText<'a>),
1009 /// Unescaped character data stored in `<![CDATA[...]]>`.
1010 CData(BytesCData<'a>),
1011 /// Comment `<!-- ... -->`.
1012 Comment(BytesText<'a>),
1013 /// XML declaration `<?xml ...?>`.
1014 Decl(BytesDecl<'a>),
1015 /// Processing instruction `<?...?>`.
1016 PI(BytesText<'a>),
1017 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1018 DocType(BytesText<'a>),
1019 /// End of XML document.
1020 Eof,
1021}
1022
1023impl<'a> Event<'a> {
1024 /// Converts the event to an owned version, untied to the lifetime of
1025 /// buffer used when reading but incurring a new, separate allocation.
1026 pub fn into_owned(self) -> Event<'static> {
1027 match self {
1028 Event::Start(e) => Event::Start(e.into_owned()),
1029 Event::End(e) => Event::End(e.into_owned()),
1030 Event::Empty(e) => Event::Empty(e.into_owned()),
1031 Event::Text(e) => Event::Text(e.into_owned()),
1032 Event::Comment(e) => Event::Comment(e.into_owned()),
1033 Event::CData(e) => Event::CData(e.into_owned()),
1034 Event::Decl(e) => Event::Decl(e.into_owned()),
1035 Event::PI(e) => Event::PI(e.into_owned()),
1036 Event::DocType(e) => Event::DocType(e.into_owned()),
1037 Event::Eof => Event::Eof,
1038 }
1039 }
1040
1041 /// Converts the event into a borrowed event.
1042 #[inline]
1043 pub fn borrow(&self) -> Event {
1044 match self {
1045 Event::Start(e) => Event::Start(e.borrow()),
1046 Event::End(e) => Event::End(e.borrow()),
1047 Event::Empty(e) => Event::Empty(e.borrow()),
1048 Event::Text(e) => Event::Text(e.borrow()),
1049 Event::Comment(e) => Event::Comment(e.borrow()),
1050 Event::CData(e) => Event::CData(e.borrow()),
1051 Event::Decl(e) => Event::Decl(e.borrow()),
1052 Event::PI(e) => Event::PI(e.borrow()),
1053 Event::DocType(e) => Event::DocType(e.borrow()),
1054 Event::Eof => Event::Eof,
1055 }
1056 }
1057}
1058
1059impl<'a> Deref for Event<'a> {
1060 type Target = [u8];
1061
1062 fn deref(&self) -> &[u8] {
1063 match *self {
1064 Event::Start(ref e) | Event::Empty(ref e) => e,
1065 Event::End(ref e) => e,
1066 Event::Text(ref e) => e,
1067 Event::Decl(ref e) => e,
1068 Event::PI(ref e) => e,
1069 Event::CData(ref e) => e,
1070 Event::Comment(ref e) => e,
1071 Event::DocType(ref e) => e,
1072 Event::Eof => &[],
1073 }
1074 }
1075}
1076
1077impl<'a> AsRef<Event<'a>> for Event<'a> {
1078 fn as_ref(&self) -> &Event<'a> {
1079 self
1080 }
1081}
1082
1083////////////////////////////////////////////////////////////////////////////////////////////////////
1084
1085#[inline]
1086fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1087 match content.into() {
1088 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1089 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1090 }
1091}
1092
1093/// Returns a byte slice with leading XML whitespace bytes removed.
1094///
1095/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1096const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
1097 // Note: A pattern matching based approach (instead of indexing) allows
1098 // making the function const.
1099 while let [first, rest @ ..] = bytes {
1100 if is_whitespace(*first) {
1101 bytes = rest;
1102 } else {
1103 break;
1104 }
1105 }
1106 bytes
1107}
1108
1109/// Returns a byte slice with trailing XML whitespace bytes removed.
1110///
1111/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1112const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
1113 // Note: A pattern matching based approach (instead of indexing) allows
1114 // making the function const.
1115 while let [rest @ .., last] = bytes {
1116 if is_whitespace(*last) {
1117 bytes = rest;
1118 } else {
1119 break;
1120 }
1121 }
1122 bytes
1123}
1124
1125fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1126where
1127 F: FnOnce(&[u8]) -> &[u8],
1128{
1129 match value {
1130 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1131 Cow::Owned(mut bytes) => {
1132 let trimmed = trim(&bytes);
1133 if trimmed.len() != bytes.len() {
1134 bytes = trimmed.to_vec();
1135 }
1136 Cow::Owned(bytes)
1137 }
1138 }
1139}
1140
1141#[cfg(test)]
1142mod test {
1143 use super::*;
1144 use pretty_assertions::assert_eq;
1145
1146 #[test]
1147 fn bytestart_create() {
1148 let b = BytesStart::new("test");
1149 assert_eq!(b.len(), 4);
1150 assert_eq!(b.name(), QName(b"test"));
1151 }
1152
1153 #[test]
1154 fn bytestart_set_name() {
1155 let mut b = BytesStart::new("test");
1156 assert_eq!(b.len(), 4);
1157 assert_eq!(b.name(), QName(b"test"));
1158 assert_eq!(b.attributes_raw(), b"");
1159 b.push_attribute(("x", "a"));
1160 assert_eq!(b.len(), 10);
1161 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1162 b.set_name(b"g");
1163 assert_eq!(b.len(), 7);
1164 assert_eq!(b.name(), QName(b"g"));
1165 }
1166
1167 #[test]
1168 fn bytestart_clear_attributes() {
1169 let mut b = BytesStart::new("test");
1170 b.push_attribute(("x", "y\"z"));
1171 b.push_attribute(("x", "y\"z"));
1172 b.clear_attributes();
1173 assert!(b.attributes().next().is_none());
1174 assert_eq!(b.len(), 4);
1175 assert_eq!(b.name(), QName(b"test"));
1176 }
1177}