quick_xml/reader/ns_reader.rs
1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::ops::Deref;
11use std::path::Path;
12
13use crate::errors::Result;
14use crate::events::Event;
15use crate::name::{LocalName, NamespaceResolver, PrefixIter, QName, ResolveResult};
16use crate::reader::{Config, Reader, Span, XmlSource};
17
18/// A low level encoding-agnostic XML event reader that performs namespace resolution.
19///
20/// Consumes a [`BufRead`] and streams XML `Event`s.
21pub struct NsReader<R> {
22 /// An XML reader
23 pub(super) reader: Reader<R>,
24 /// A buffer to manage namespaces
25 ns_resolver: NamespaceResolver,
26 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
27 /// event will be processed by the user, so we only mark that we should that
28 /// in the next [`Self::read_event_impl()`] call.
29 pending_pop: bool,
30}
31
32/// Builder methods
33impl<R> NsReader<R> {
34 /// Creates a `NsReader` that reads from a reader.
35 #[inline]
36 pub fn from_reader(reader: R) -> Self {
37 Self::new(Reader::from_reader(reader))
38 }
39
40 /// Returns reference to the parser configuration
41 #[inline]
42 pub fn config(&self) -> &Config {
43 self.reader.config()
44 }
45
46 /// Returns mutable reference to the parser configuration
47 #[inline]
48 pub fn config_mut(&mut self) -> &mut Config {
49 self.reader.config_mut()
50 }
51
52 /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces.
53 ///
54 /// # Examples
55 ///
56 /// This example shows what results the returned iterator would return after
57 /// reading each event of a simple XML.
58 ///
59 /// ```
60 /// # use pretty_assertions::assert_eq;
61 /// use quick_xml::name::{Namespace, PrefixDeclaration};
62 /// use quick_xml::NsReader;
63 ///
64 /// let src = "<root>
65 /// <a xmlns=\"a1\" xmlns:a=\"a2\">
66 /// <b xmlns=\"b1\" xmlns:b=\"b2\">
67 /// <c/>
68 /// </b>
69 /// <d/>
70 /// </a>
71 /// </root>";
72 /// let mut reader = NsReader::from_str(src);
73 /// reader.config_mut().trim_text(true);
74 /// // No prefixes at the beginning
75 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
76 ///
77 /// reader.read_resolved_event()?; // <root>
78 /// // No prefixes declared on root
79 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
80 ///
81 /// reader.read_resolved_event()?; // <a>
82 /// // Two prefixes declared on "a"
83 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
84 /// (PrefixDeclaration::Default, Namespace(b"a1")),
85 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
86 /// ]);
87 ///
88 /// reader.read_resolved_event()?; // <b>
89 /// // The default prefix got overridden and new "b" prefix
90 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
91 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
92 /// (PrefixDeclaration::Default, Namespace(b"b1")),
93 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
94 /// ]);
95 ///
96 /// reader.read_resolved_event()?; // <c/>
97 /// // Still the same
98 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
99 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
100 /// (PrefixDeclaration::Default, Namespace(b"b1")),
101 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
102 /// ]);
103 ///
104 /// reader.read_resolved_event()?; // </b>
105 /// // Still the same
106 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
107 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
108 /// (PrefixDeclaration::Default, Namespace(b"b1")),
109 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
110 /// ]);
111 ///
112 /// reader.read_resolved_event()?; // <d/>
113 /// // </b> got closed so back to the prefixes declared on <a>
114 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
115 /// (PrefixDeclaration::Default, Namespace(b"a1")),
116 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
117 /// ]);
118 ///
119 /// reader.read_resolved_event()?; // </a>
120 /// // Still the same
121 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
122 /// (PrefixDeclaration::Default, Namespace(b"a1")),
123 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
124 /// ]);
125 ///
126 /// reader.read_resolved_event()?; // </root>
127 /// // <a> got closed
128 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
129 /// # quick_xml::Result::Ok(())
130 /// ```
131 #[inline]
132 pub fn prefixes(&self) -> PrefixIter {
133 self.ns_resolver.iter()
134 }
135}
136
137/// Private methods
138impl<R> NsReader<R> {
139 #[inline]
140 fn new(reader: Reader<R>) -> Self {
141 Self {
142 reader,
143 ns_resolver: NamespaceResolver::default(),
144 pending_pop: false,
145 }
146 }
147
148 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
149 where
150 R: XmlSource<'i, B>,
151 {
152 self.pop();
153 let event = self.reader.read_event_impl(buf);
154 self.process_event(event)
155 }
156
157 pub(super) fn pop(&mut self) {
158 if self.pending_pop {
159 self.ns_resolver.pop();
160 self.pending_pop = false;
161 }
162 }
163
164 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
165 match event {
166 Ok(Event::Start(e)) => {
167 self.ns_resolver.push(&e)?;
168 Ok(Event::Start(e))
169 }
170 Ok(Event::Empty(e)) => {
171 self.ns_resolver.push(&e)?;
172 // notify next `read_event_impl()` invocation that it needs to pop this
173 // namespace scope
174 self.pending_pop = true;
175 Ok(Event::Empty(e))
176 }
177 Ok(Event::End(e)) => {
178 // notify next `read_event_impl()` invocation that it needs to pop this
179 // namespace scope
180 self.pending_pop = true;
181 Ok(Event::End(e))
182 }
183 e => e,
184 }
185 }
186
187 pub(super) fn resolve_event<'i>(
188 &mut self,
189 event: Result<Event<'i>>,
190 ) -> Result<(ResolveResult, Event<'i>)> {
191 match event {
192 Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))),
193 Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))),
194 Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))),
195 Ok(e) => Ok((ResolveResult::Unbound, e)),
196 Err(e) => Err(e),
197 }
198 }
199}
200
201/// Getters
202impl<R> NsReader<R> {
203 /// Consumes `NsReader` returning the underlying reader
204 ///
205 /// See the [`Reader::into_inner`] for examples
206 #[inline]
207 pub fn into_inner(self) -> R {
208 self.reader.into_inner()
209 }
210
211 /// Gets a mutable reference to the underlying reader.
212 pub fn get_mut(&mut self) -> &mut R {
213 self.reader.get_mut()
214 }
215
216 /// Resolves a potentially qualified **element name** or **attribute name**
217 /// into _(namespace name, local name)_.
218 ///
219 /// _Qualified_ names have the form `prefix:local-name` where the `prefix`
220 /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
221 /// The namespace prefix can be defined on the same element as the name in question.
222 ///
223 /// The method returns following results depending on the `name` shape,
224 /// `attribute` flag and the presence of the default namespace:
225 ///
226 /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
227 /// |---------|-------------|-------------------|-----------------------|------------
228 /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name`
229 /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name`
230 /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
231 /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
232 /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name`
233 /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
234 ///
235 /// If you want to clearly indicate that name that you resolve is an element
236 /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
237 /// methods.
238 ///
239 /// # Lifetimes
240 ///
241 /// - `'n`: lifetime of a name. Returned local name will be bound to the same
242 /// lifetime as the name in question.
243 /// - returned namespace name will be bound to the reader itself
244 ///
245 /// [`Bound`]: ResolveResult::Bound
246 /// [`Unbound`]: ResolveResult::Unbound
247 /// [`Unknown`]: ResolveResult::Unknown
248 /// [`resolve_attribute()`]: Self::resolve_attribute()
249 /// [`resolve_element()`]: Self::resolve_element()
250 #[inline]
251 pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) {
252 self.ns_resolver.resolve(name, !attribute)
253 }
254
255 /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
256 ///
257 /// _Qualified_ element names have the form `prefix:local-name` where the
258 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
259 /// The namespace prefix can be defined on the same element as the element
260 /// in question.
261 ///
262 /// _Unqualified_ elements inherits the current _default namespace_.
263 ///
264 /// The method returns following results depending on the `name` shape and
265 /// the presence of the default namespace:
266 ///
267 /// |`xmlns="..."`|QName |ResolveResult |LocalName
268 /// |-------------|-------------------|-----------------------|------------
269 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
270 /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
271 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
272 ///
273 /// # Lifetimes
274 ///
275 /// - `'n`: lifetime of an element name. Returned local name will be bound
276 /// to the same lifetime as the name in question.
277 /// - returned namespace name will be bound to the reader itself
278 ///
279 /// # Examples
280 ///
281 /// This example shows how you can resolve qualified name into a namespace.
282 /// Note, that in the code like this you do not need to do that manually,
283 /// because the namespace resolution result returned by the [`read_resolved_event()`].
284 ///
285 /// ```
286 /// # use pretty_assertions::assert_eq;
287 /// use quick_xml::events::Event;
288 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
289 /// use quick_xml::reader::NsReader;
290 ///
291 /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>");
292 ///
293 /// match reader.read_event().unwrap() {
294 /// Event::Empty(e) => assert_eq!(
295 /// reader.resolve_element(e.name()),
296 /// (Bound(Namespace(b"root namespace")), QName(b"tag").into())
297 /// ),
298 /// _ => unreachable!(),
299 /// }
300 /// ```
301 ///
302 /// [`Bound`]: ResolveResult::Bound
303 /// [`Unbound`]: ResolveResult::Unbound
304 /// [`Unknown`]: ResolveResult::Unknown
305 /// [`read_resolved_event()`]: Self::read_resolved_event
306 #[inline]
307 pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
308 self.ns_resolver.resolve(name, true)
309 }
310
311 /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
312 ///
313 /// _Qualified_ attribute names have the form `prefix:local-name` where the
314 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
315 /// The namespace prefix can be defined on the same element as the attribute
316 /// in question.
317 ///
318 /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
319 ///
320 /// The method returns following results depending on the `name` shape and
321 /// the presence of the default namespace:
322 ///
323 /// |`xmlns="..."`|QName |ResolveResult |LocalName
324 /// |-------------|-------------------|-----------------------|------------
325 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
326 /// |Defined |`local-name` |[`Unbound`] |`local-name`
327 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
328 ///
329 /// # Lifetimes
330 ///
331 /// - `'n`: lifetime of an attribute name. Returned local name will be bound
332 /// to the same lifetime as the name in question.
333 /// - returned namespace name will be bound to the reader itself
334 ///
335 /// # Examples
336 ///
337 /// ```
338 /// # use pretty_assertions::assert_eq;
339 /// use quick_xml::events::Event;
340 /// use quick_xml::events::attributes::Attribute;
341 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
342 /// use quick_xml::reader::NsReader;
343 ///
344 /// let mut reader = NsReader::from_str("
345 /// <tag one='1'
346 /// p:two='2'
347 /// xmlns='root namespace'
348 /// xmlns:p='other namespace'/>
349 /// ");
350 /// reader.config_mut().trim_text(true);
351 ///
352 /// match reader.read_event().unwrap() {
353 /// Event::Empty(e) => {
354 /// let mut iter = e.attributes();
355 ///
356 /// // Unlike elements, attributes without explicit namespace
357 /// // not bound to any namespace
358 /// let one = iter.next().unwrap().unwrap();
359 /// assert_eq!(
360 /// reader.resolve_attribute(one.key),
361 /// (Unbound, QName(b"one").into())
362 /// );
363 ///
364 /// let two = iter.next().unwrap().unwrap();
365 /// assert_eq!(
366 /// reader.resolve_attribute(two.key),
367 /// (Bound(Namespace(b"other namespace")), QName(b"two").into())
368 /// );
369 /// }
370 /// _ => unreachable!(),
371 /// }
372 /// ```
373 ///
374 /// [`Bound`]: ResolveResult::Bound
375 /// [`Unbound`]: ResolveResult::Unbound
376 /// [`Unknown`]: ResolveResult::Unknown
377 #[inline]
378 pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) {
379 self.ns_resolver.resolve(name, false)
380 }
381}
382
383impl<R: BufRead> NsReader<R> {
384 /// Reads the next event into given buffer.
385 ///
386 /// This method manages namespaces but doesn't resolve them automatically.
387 /// You should call [`resolve_element()`] if you want to get a namespace.
388 ///
389 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
390 /// namespace as soon as you get an event.
391 ///
392 /// # Examples
393 ///
394 /// ```
395 /// # use pretty_assertions::assert_eq;
396 /// use quick_xml::events::Event;
397 /// use quick_xml::name::{Namespace, ResolveResult::*};
398 /// use quick_xml::reader::NsReader;
399 ///
400 /// let mut reader = NsReader::from_str(r#"
401 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
402 /// <y:tag2><!--Test comment-->Test</y:tag2>
403 /// <y:tag2>Test 2</y:tag2>
404 /// </x:tag1>
405 /// "#);
406 /// reader.config_mut().trim_text(true);
407 ///
408 /// let mut count = 0;
409 /// let mut buf = Vec::new();
410 /// let mut txt = Vec::new();
411 /// loop {
412 /// match reader.read_event_into(&mut buf).unwrap() {
413 /// Event::Start(e) => {
414 /// count += 1;
415 /// let (ns, local) = reader.resolve_element(e.name());
416 /// match local.as_ref() {
417 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
418 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
419 /// _ => unreachable!(),
420 /// }
421 /// }
422 /// Event::Text(e) => {
423 /// txt.push(e.unescape().unwrap().into_owned())
424 /// }
425 /// Event::Eof => break,
426 /// _ => (),
427 /// }
428 /// buf.clear();
429 /// }
430 /// assert_eq!(count, 3);
431 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
432 /// ```
433 ///
434 /// [`resolve_element()`]: Self::resolve_element
435 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
436 #[inline]
437 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
438 self.read_event_impl(buf)
439 }
440
441 /// Reads the next event into given buffer and resolves its namespace (if applicable).
442 ///
443 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
444 /// For all other events the concept of namespace is not defined, so
445 /// a [`ResolveResult::Unbound`] is returned.
446 ///
447 /// If you are not interested in namespaces, you can use [`read_event_into()`]
448 /// which will not automatically resolve namespaces for you.
449 ///
450 /// # Examples
451 ///
452 /// ```
453 /// # use pretty_assertions::assert_eq;
454 /// use quick_xml::events::Event;
455 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
456 /// use quick_xml::reader::NsReader;
457 ///
458 /// let mut reader = NsReader::from_str(r#"
459 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
460 /// <y:tag2><!--Test comment-->Test</y:tag2>
461 /// <y:tag2>Test 2</y:tag2>
462 /// </x:tag1>
463 /// "#);
464 /// reader.config_mut().trim_text(true);
465 ///
466 /// let mut count = 0;
467 /// let mut buf = Vec::new();
468 /// let mut txt = Vec::new();
469 /// loop {
470 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
471 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
472 /// count += 1;
473 /// assert_eq!(e.local_name(), QName(b"tag1").into());
474 /// }
475 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
476 /// count += 1;
477 /// assert_eq!(e.local_name(), QName(b"tag2").into());
478 /// }
479 /// (_, Event::Start(_)) => unreachable!(),
480 ///
481 /// (_, Event::Text(e)) => {
482 /// txt.push(e.unescape().unwrap().into_owned())
483 /// }
484 /// (_, Event::Eof) => break,
485 /// _ => (),
486 /// }
487 /// buf.clear();
488 /// }
489 /// assert_eq!(count, 3);
490 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
491 /// ```
492 ///
493 /// [`Start`]: Event::Start
494 /// [`Empty`]: Event::Empty
495 /// [`End`]: Event::End
496 /// [`read_event_into()`]: Self::read_event_into
497 #[inline]
498 pub fn read_resolved_event_into<'b>(
499 &mut self,
500 buf: &'b mut Vec<u8>,
501 ) -> Result<(ResolveResult, Event<'b>)> {
502 let event = self.read_event_impl(buf);
503 self.resolve_event(event)
504 }
505
506 /// Reads until end element is found using provided buffer as intermediate
507 /// storage for events content. This function is supposed to be called after
508 /// you already read a [`Start`] event.
509 ///
510 /// Returns a span that cover content between `>` of an opening tag and `<` of
511 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
512 /// this method was called after reading expanded [`Start`] event.
513 ///
514 /// Manages nested cases where parent and child elements have the _literally_
515 /// same name.
516 ///
517 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
518 /// will be returned. In particularly, that error will be returned if you call
519 /// this method without consuming the corresponding [`Start`] event first.
520 ///
521 /// If your reader created from a string slice or byte array slice, it is
522 /// better to use [`read_to_end()`] method, because it will not copy bytes
523 /// into intermediate buffer.
524 ///
525 /// The provided `buf` buffer will be filled only by one event content at time.
526 /// Before reading of each event the buffer will be cleared. If you know an
527 /// appropriate size of each event, you can preallocate the buffer to reduce
528 /// number of reallocations.
529 ///
530 /// The `end` parameter should contain name of the end element _in the reader
531 /// encoding_. It is good practice to always get that parameter using
532 /// [`BytesStart::to_end()`] method.
533 ///
534 /// # Namespaces
535 ///
536 /// While the `NsReader` does namespace resolution, namespaces does not
537 /// change the algorithm for comparing names. Although the names `a:name`
538 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
539 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
540 /// according to [the specification]
541 ///
542 /// > The end of every element that begins with a **start-tag** MUST be marked
543 /// > by an **end-tag** containing a name that echoes the element's type as
544 /// > given in the **start-tag**
545 ///
546 /// # Examples
547 ///
548 /// This example shows, how you can skip XML content after you read the
549 /// start event.
550 ///
551 /// ```
552 /// # use pretty_assertions::assert_eq;
553 /// use quick_xml::events::{BytesStart, Event};
554 /// use quick_xml::name::{Namespace, ResolveResult};
555 /// use quick_xml::reader::NsReader;
556 ///
557 /// let mut reader = NsReader::from_str(r#"
558 /// <outer xmlns="namespace 1">
559 /// <inner xmlns="namespace 2">
560 /// <outer></outer>
561 /// </inner>
562 /// <inner>
563 /// <inner></inner>
564 /// <inner/>
565 /// <outer></outer>
566 /// <p:outer xmlns:p="ns"></p:outer>
567 /// <outer/>
568 /// </inner>
569 /// </outer>
570 /// "#);
571 /// reader.config_mut().trim_text(true);
572 /// let mut buf = Vec::new();
573 ///
574 /// let ns = Namespace(b"namespace 1");
575 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
576 /// let end = start.to_end().into_owned();
577 ///
578 /// // First, we read a start event...
579 /// assert_eq!(
580 /// reader.read_resolved_event_into(&mut buf).unwrap(),
581 /// (ResolveResult::Bound(ns), Event::Start(start))
582 /// );
583 ///
584 /// // ...then, we could skip all events to the corresponding end event.
585 /// // This call will correctly handle nested <outer> elements.
586 /// // Note, however, that this method does not handle namespaces.
587 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
588 ///
589 /// // At the end we should get an Eof event, because we ate the whole XML
590 /// assert_eq!(
591 /// reader.read_resolved_event_into(&mut buf).unwrap(),
592 /// (ResolveResult::Unbound, Event::Eof)
593 /// );
594 /// ```
595 ///
596 /// [`Start`]: Event::Start
597 /// [`End`]: Event::End
598 /// [`IllFormed`]: crate::errors::Error::IllFormed
599 /// [`read_to_end()`]: Self::read_to_end
600 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
601 /// [`expand_empty_elements`]: Config::expand_empty_elements
602 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
603 #[inline]
604 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
605 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
606 // match literally the start name. See `Config::check_end_names` documentation
607 self.reader.read_to_end_into(end, buf)
608 }
609}
610
611impl NsReader<BufReader<File>> {
612 /// Creates an XML reader from a file path.
613 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
614 Ok(Self::new(Reader::from_file(path)?))
615 }
616}
617
618impl<'i> NsReader<&'i [u8]> {
619 /// Creates an XML reader from a string slice.
620 #[inline]
621 #[allow(clippy::should_implement_trait)]
622 pub fn from_str(s: &'i str) -> Self {
623 Self::new(Reader::from_str(s))
624 }
625
626 /// Reads the next event, borrow its content from the input buffer.
627 ///
628 /// This method manages namespaces but doesn't resolve them automatically.
629 /// You should call [`resolve_element()`] if you want to get a namespace.
630 ///
631 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
632 /// as soon as you get an event.
633 ///
634 /// There is no asynchronous `read_event_async()` version of this function,
635 /// because it is not necessary -- the contents are already in memory and no IO
636 /// is needed, therefore there is no potential for blocking.
637 ///
638 /// # Examples
639 ///
640 /// ```
641 /// # use pretty_assertions::assert_eq;
642 /// use quick_xml::events::Event;
643 /// use quick_xml::name::{Namespace, ResolveResult::*};
644 /// use quick_xml::reader::NsReader;
645 ///
646 /// let mut reader = NsReader::from_str(r#"
647 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
648 /// <y:tag2><!--Test comment-->Test</y:tag2>
649 /// <y:tag2>Test 2</y:tag2>
650 /// </x:tag1>
651 /// "#);
652 /// reader.config_mut().trim_text(true);
653 ///
654 /// let mut count = 0;
655 /// let mut txt = Vec::new();
656 /// loop {
657 /// match reader.read_event().unwrap() {
658 /// Event::Start(e) => {
659 /// count += 1;
660 /// let (ns, local) = reader.resolve_element(e.name());
661 /// match local.as_ref() {
662 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
663 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
664 /// _ => unreachable!(),
665 /// }
666 /// }
667 /// Event::Text(e) => {
668 /// txt.push(e.unescape().unwrap().into_owned())
669 /// }
670 /// Event::Eof => break,
671 /// _ => (),
672 /// }
673 /// }
674 /// assert_eq!(count, 3);
675 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
676 /// ```
677 ///
678 /// [`resolve_element()`]: Self::resolve_element
679 /// [`read_resolved_event()`]: Self::read_resolved_event
680 #[inline]
681 pub fn read_event(&mut self) -> Result<Event<'i>> {
682 self.read_event_impl(())
683 }
684
685 /// Reads the next event, borrow its content from the input buffer, and resolves
686 /// its namespace (if applicable).
687 ///
688 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
689 /// For all other events the concept of namespace is not defined, so
690 /// a [`ResolveResult::Unbound`] is returned.
691 ///
692 /// If you are not interested in namespaces, you can use [`read_event()`]
693 /// which will not automatically resolve namespaces for you.
694 ///
695 /// There is no asynchronous `read_resolved_event_async()` version of this function,
696 /// because it is not necessary -- the contents are already in memory and no IO
697 /// is needed, therefore there is no potential for blocking.
698 ///
699 /// # Examples
700 ///
701 /// ```
702 /// # use pretty_assertions::assert_eq;
703 /// use quick_xml::events::Event;
704 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
705 /// use quick_xml::reader::NsReader;
706 ///
707 /// let mut reader = NsReader::from_str(r#"
708 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
709 /// <y:tag2><!--Test comment-->Test</y:tag2>
710 /// <y:tag2>Test 2</y:tag2>
711 /// </x:tag1>
712 /// "#);
713 /// reader.config_mut().trim_text(true);
714 ///
715 /// let mut count = 0;
716 /// let mut txt = Vec::new();
717 /// loop {
718 /// match reader.read_resolved_event().unwrap() {
719 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
720 /// count += 1;
721 /// assert_eq!(e.local_name(), QName(b"tag1").into());
722 /// }
723 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
724 /// count += 1;
725 /// assert_eq!(e.local_name(), QName(b"tag2").into());
726 /// }
727 /// (_, Event::Start(_)) => unreachable!(),
728 ///
729 /// (_, Event::Text(e)) => {
730 /// txt.push(e.unescape().unwrap().into_owned())
731 /// }
732 /// (_, Event::Eof) => break,
733 /// _ => (),
734 /// }
735 /// }
736 /// assert_eq!(count, 3);
737 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
738 /// ```
739 ///
740 /// [`Start`]: Event::Start
741 /// [`Empty`]: Event::Empty
742 /// [`End`]: Event::End
743 /// [`read_event()`]: Self::read_event
744 #[inline]
745 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> {
746 let event = self.read_event_impl(());
747 self.resolve_event(event)
748 }
749
750 /// Reads until end element is found. This function is supposed to be called
751 /// after you already read a [`Start`] event.
752 ///
753 /// Returns a span that cover content between `>` of an opening tag and `<` of
754 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
755 /// this method was called after reading expanded [`Start`] event.
756 ///
757 /// Manages nested cases where parent and child elements have the _literally_
758 /// same name.
759 ///
760 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
761 /// will be returned. In particularly, that error will be returned if you call
762 /// this method without consuming the corresponding [`Start`] event first.
763 ///
764 /// The `end` parameter should contain name of the end element _in the reader
765 /// encoding_. It is good practice to always get that parameter using
766 /// [`BytesStart::to_end()`] method.
767 ///
768 /// There is no asynchronous `read_to_end_async()` version of this function,
769 /// because it is not necessary -- the contents are already in memory and no IO
770 /// is needed, therefore there is no potential for blocking.
771 ///
772 /// # Namespaces
773 ///
774 /// While the `NsReader` does namespace resolution, namespaces does not
775 /// change the algorithm for comparing names. Although the names `a:name`
776 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
777 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
778 /// according to [the specification]
779 ///
780 /// > The end of every element that begins with a **start-tag** MUST be marked
781 /// > by an **end-tag** containing a name that echoes the element's type as
782 /// > given in the **start-tag**
783 ///
784 /// # Examples
785 ///
786 /// This example shows, how you can skip XML content after you read the
787 /// start event.
788 ///
789 /// ```
790 /// # use pretty_assertions::assert_eq;
791 /// use quick_xml::events::{BytesStart, Event};
792 /// use quick_xml::name::{Namespace, ResolveResult};
793 /// use quick_xml::reader::NsReader;
794 ///
795 /// let mut reader = NsReader::from_str(r#"
796 /// <outer xmlns="namespace 1">
797 /// <inner xmlns="namespace 2">
798 /// <outer></outer>
799 /// </inner>
800 /// <inner>
801 /// <inner></inner>
802 /// <inner/>
803 /// <outer></outer>
804 /// <p:outer xmlns:p="ns"></p:outer>
805 /// <outer/>
806 /// </inner>
807 /// </outer>
808 /// "#);
809 /// reader.config_mut().trim_text(true);
810 ///
811 /// let ns = Namespace(b"namespace 1");
812 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
813 /// let end = start.to_end().into_owned();
814 ///
815 /// // First, we read a start event...
816 /// assert_eq!(
817 /// reader.read_resolved_event().unwrap(),
818 /// (ResolveResult::Bound(ns), Event::Start(start))
819 /// );
820 ///
821 /// // ...then, we could skip all events to the corresponding end event.
822 /// // This call will correctly handle nested <outer> elements.
823 /// // Note, however, that this method does not handle namespaces.
824 /// reader.read_to_end(end.name()).unwrap();
825 ///
826 /// // At the end we should get an Eof event, because we ate the whole XML
827 /// assert_eq!(
828 /// reader.read_resolved_event().unwrap(),
829 /// (ResolveResult::Unbound, Event::Eof)
830 /// );
831 /// ```
832 ///
833 /// [`Start`]: Event::Start
834 /// [`End`]: Event::End
835 /// [`IllFormed`]: crate::errors::Error::IllFormed
836 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
837 /// [`expand_empty_elements`]: Config::expand_empty_elements
838 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
839 #[inline]
840 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
841 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
842 // match literally the start name. See `Config::check_end_names` documentation
843 self.reader.read_to_end(end)
844 }
845
846 /// Reads content between start and end tags, including any markup. This
847 /// function is supposed to be called after you already read a [`Start`] event.
848 ///
849 /// Manages nested cases where parent and child elements have the _literally_
850 /// same name.
851 ///
852 /// This method does not unescape read data, instead it returns content
853 /// "as is" of the XML document. This is because it has no idea what text
854 /// it reads, and if, for example, it contains CDATA section, attempt to
855 /// unescape it content will spoil data.
856 ///
857 /// Any text will be decoded using the XML current [`decoder()`].
858 ///
859 /// Actually, this method perform the following code:
860 ///
861 /// ```ignore
862 /// let span = reader.read_to_end(end)?;
863 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
864 /// ```
865 ///
866 /// # Examples
867 ///
868 /// This example shows, how you can read a HTML content from your XML document.
869 ///
870 /// ```
871 /// # use pretty_assertions::assert_eq;
872 /// # use std::borrow::Cow;
873 /// use quick_xml::events::{BytesStart, Event};
874 /// use quick_xml::reader::NsReader;
875 ///
876 /// let mut reader = NsReader::from_str(r#"
877 /// <html>
878 /// <title>This is a HTML text</title>
879 /// <p>Usual XML rules does not apply inside it
880 /// <p>For example, elements not needed to be "closed"
881 /// </html>
882 /// "#);
883 /// reader.config_mut().trim_text(true);
884 ///
885 /// let start = BytesStart::new("html");
886 /// let end = start.to_end().into_owned();
887 ///
888 /// // First, we read a start event...
889 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
890 /// // ...and disable checking of end names because we expect HTML further...
891 /// reader.config_mut().check_end_names = false;
892 ///
893 /// // ...then, we could read text content until close tag.
894 /// // This call will correctly handle nested <html> elements.
895 /// let text = reader.read_text(end.name()).unwrap();
896 /// assert_eq!(text, Cow::Borrowed(r#"
897 /// <title>This is a HTML text</title>
898 /// <p>Usual XML rules does not apply inside it
899 /// <p>For example, elements not needed to be "closed"
900 /// "#));
901 ///
902 /// // Now we can enable checks again
903 /// reader.config_mut().check_end_names = true;
904 ///
905 /// // At the end we should get an Eof event, because we ate the whole XML
906 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
907 /// ```
908 ///
909 /// [`Start`]: Event::Start
910 /// [`decoder()`]: Reader::decoder()
911 #[inline]
912 pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
913 self.reader.read_text(end)
914 }
915}
916
917impl<R> Deref for NsReader<R> {
918 type Target = Reader<R>;
919
920 #[inline]
921 fn deref(&self) -> &Self::Target {
922 &self.reader
923 }
924}