quick_xml/reader/
ns_reader.rs

1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::ops::Deref;
11use std::path::Path;
12
13use crate::errors::Result;
14use crate::events::Event;
15use crate::name::{LocalName, NamespaceBindingsIter, NamespaceResolver, QName, ResolveResult};
16use crate::reader::{Config, Reader, Span, XmlSource};
17
18/// A low level encoding-agnostic XML event reader that performs namespace resolution.
19///
20/// Consumes a [`BufRead`] and streams XML `Event`s.
21#[derive(Debug, Clone)]
22pub struct NsReader<R> {
23    /// An XML reader
24    pub(super) reader: Reader<R>,
25    /// A buffer to manage namespaces
26    ns_resolver: NamespaceResolver,
27    /// We cannot pop data from the namespace stack until returned `Empty` or `End`
28    /// event will be processed by the user, so we only mark that we should that
29    /// in the next [`Self::read_event_impl()`] call.
30    pending_pop: bool,
31}
32
33/// Builder methods
34impl<R> NsReader<R> {
35    /// Creates a `NsReader` that reads from a reader.
36    #[inline]
37    pub fn from_reader(reader: R) -> Self {
38        Self::new(Reader::from_reader(reader))
39    }
40
41    /// Returns reference to the parser configuration
42    #[inline]
43    pub const fn config(&self) -> &Config {
44        self.reader.config()
45    }
46
47    /// Returns mutable reference to the parser configuration
48    #[inline]
49    pub fn config_mut(&mut self) -> &mut Config {
50        self.reader.config_mut()
51    }
52
53    /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces.
54    ///
55    /// # Examples
56    ///
57    /// This example shows what results the returned iterator would return after
58    /// reading each event of a simple XML.
59    ///
60    /// ```
61    /// # use pretty_assertions::assert_eq;
62    /// use quick_xml::name::{Namespace, PrefixDeclaration};
63    /// use quick_xml::NsReader;
64    ///
65    /// let src = "<root>
66    ///   <a xmlns=\"a1\" xmlns:a=\"a2\">
67    ///     <b xmlns=\"b1\" xmlns:b=\"b2\">
68    ///       <c/>
69    ///     </b>
70    ///     <d/>
71    ///   </a>
72    /// </root>";
73    /// let mut reader = NsReader::from_str(src);
74    /// reader.config_mut().trim_text(true);
75    /// // No prefixes at the beginning
76    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
77    ///
78    /// reader.read_resolved_event()?; // <root>
79    /// // No prefixes declared on root
80    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
81    ///
82    /// reader.read_resolved_event()?; // <a>
83    /// // Two prefixes declared on "a"
84    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
85    ///     (PrefixDeclaration::Default, Namespace(b"a1")),
86    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
87    /// ]);
88    ///
89    /// reader.read_resolved_event()?; // <b>
90    /// // The default prefix got overridden and new "b" prefix
91    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
92    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
93    ///     (PrefixDeclaration::Default, Namespace(b"b1")),
94    ///     (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
95    /// ]);
96    ///
97    /// reader.read_resolved_event()?; // <c/>
98    /// // Still the same
99    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
100    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
101    ///     (PrefixDeclaration::Default, Namespace(b"b1")),
102    ///     (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
103    /// ]);
104    ///
105    /// reader.read_resolved_event()?; // </b>
106    /// // Still the same
107    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
108    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
109    ///     (PrefixDeclaration::Default, Namespace(b"b1")),
110    ///     (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
111    /// ]);
112    ///
113    /// reader.read_resolved_event()?; // <d/>
114    /// // </b> got closed so back to the prefixes declared on <a>
115    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
116    ///     (PrefixDeclaration::Default, Namespace(b"a1")),
117    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
118    /// ]);
119    ///
120    /// reader.read_resolved_event()?; // </a>
121    /// // Still the same
122    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
123    ///     (PrefixDeclaration::Default, Namespace(b"a1")),
124    ///     (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
125    /// ]);
126    ///
127    /// reader.read_resolved_event()?; // </root>
128    /// // <a> got closed
129    /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
130    /// # quick_xml::Result::Ok(())
131    /// ```
132    #[inline]
133    pub const fn prefixes(&self) -> NamespaceBindingsIter<'_> {
134        self.ns_resolver.bindings()
135    }
136}
137
138/// Private methods
139impl<R> NsReader<R> {
140    #[inline]
141    fn new(reader: Reader<R>) -> Self {
142        Self {
143            reader,
144            ns_resolver: NamespaceResolver::default(),
145            pending_pop: false,
146        }
147    }
148
149    fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
150    where
151        R: XmlSource<'i, B>,
152    {
153        self.pop();
154        let event = self.reader.read_event_impl(buf);
155        self.process_event(event)
156    }
157
158    pub(super) fn pop(&mut self) {
159        if self.pending_pop {
160            self.ns_resolver.pop();
161            self.pending_pop = false;
162        }
163    }
164
165    pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
166        match event {
167            Ok(Event::Start(e)) => {
168                self.ns_resolver.push(&e)?;
169                Ok(Event::Start(e))
170            }
171            Ok(Event::Empty(e)) => {
172                self.ns_resolver.push(&e)?;
173                // notify next `read_event_impl()` invocation that it needs to pop this
174                // namespace scope
175                self.pending_pop = true;
176                Ok(Event::Empty(e))
177            }
178            Ok(Event::End(e)) => {
179                // notify next `read_event_impl()` invocation that it needs to pop this
180                // namespace scope
181                self.pending_pop = true;
182                Ok(Event::End(e))
183            }
184            e => e,
185        }
186    }
187}
188
189/// Getters
190impl<R> NsReader<R> {
191    /// Consumes `NsReader` returning the underlying reader
192    ///
193    /// See the [`Reader::into_inner`] for examples
194    #[inline]
195    pub fn into_inner(self) -> R {
196        self.reader.into_inner()
197    }
198
199    /// Gets a mutable reference to the underlying reader.
200    pub fn get_mut(&mut self) -> &mut R {
201        self.reader.get_mut()
202    }
203
204    /// Returns a storage of namespace bindings associated with this reader.
205    #[inline]
206    pub const fn resolver(&self) -> &NamespaceResolver {
207        &self.ns_resolver
208    }
209
210    /// Resolves a potentially qualified **element name** or **attribute name**
211    /// into _(namespace name, local name)_.
212    ///
213    /// _Qualified_ names have the form `local-name` or `prefix:local-name` where the `prefix`
214    /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
215    /// The namespace prefix can be defined on the same element as the name in question.
216    ///
217    /// The method returns following results depending on the `name` shape, `attribute` flag
218    /// and the presence of the default namespace on element or any of its parents:
219    ///
220    /// |attribute|`xmlns="..."`|QName              |ResolveResult          |LocalName
221    /// |---------|-------------|-------------------|-----------------------|------------
222    /// |`true`   |_(any)_      |`local-name`       |[`Unbound`]            |`local-name`
223    /// |`true`   |_(any)_      |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
224    /// |`false`  |Not defined  |`local-name`       |[`Unbound`]            |`local-name`
225    /// |`false`  |Defined      |`local-name`       |[`Bound`] (to `xmlns`) |`local-name`
226    /// |`false`  |_(any)_      |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
227    ///
228    /// If you want to clearly indicate that name that you resolve is an element
229    /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
230    /// methods.
231    ///
232    /// # Lifetimes
233    ///
234    /// - `'n`: lifetime of a name. Returned local name will be bound to the same
235    ///   lifetime as the name in question.
236    /// - returned namespace name will be bound to the reader itself
237    ///
238    /// [`Bound`]: ResolveResult::Bound
239    /// [`Unbound`]: ResolveResult::Unbound
240    /// [`Unknown`]: ResolveResult::Unknown
241    /// [`resolve_attribute()`]: Self::resolve_attribute()
242    /// [`resolve_element()`]: Self::resolve_element()
243    #[inline]
244    pub fn resolve<'n>(
245        &self,
246        name: QName<'n>,
247        attribute: bool,
248    ) -> (ResolveResult<'_>, LocalName<'n>) {
249        self.ns_resolver.resolve(name, !attribute)
250    }
251
252    /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
253    ///
254    /// _Qualified_ element names have the form `prefix:local-name` where the
255    /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
256    /// The namespace prefix can be defined on the same element as the element
257    /// in question.
258    ///
259    /// _Unqualified_ elements inherits the current _default namespace_.
260    ///
261    /// The method returns following results depending on the `name` shape and
262    /// the presence of the default namespace:
263    ///
264    /// |`xmlns="..."`|QName              |ResolveResult          |LocalName
265    /// |-------------|-------------------|-----------------------|------------
266    /// |Not defined  |`local-name`       |[`Unbound`]            |`local-name`
267    /// |Defined      |`local-name`       |[`Bound`] (default)    |`local-name`
268    /// |_any_        |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
269    ///
270    /// # Lifetimes
271    ///
272    /// - `'n`: lifetime of an element name. Returned local name will be bound
273    ///   to the same lifetime as the name in question.
274    /// - returned namespace name will be bound to the reader itself
275    ///
276    /// # Examples
277    ///
278    /// This example shows how you can resolve qualified name into a namespace.
279    /// Note, that in the code like this you do not need to do that manually,
280    /// because the namespace resolution result returned by the [`read_resolved_event()`].
281    ///
282    /// ```
283    /// # use pretty_assertions::assert_eq;
284    /// use quick_xml::events::Event;
285    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
286    /// use quick_xml::reader::NsReader;
287    ///
288    /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>");
289    ///
290    /// match reader.read_event().unwrap() {
291    ///     Event::Empty(e) => assert_eq!(
292    ///         reader.resolve_element(e.name()),
293    ///         (Bound(Namespace(b"root namespace")), QName(b"tag").into())
294    ///     ),
295    ///     _ => unreachable!(),
296    /// }
297    /// ```
298    ///
299    /// [`Bound`]: ResolveResult::Bound
300    /// [`Unbound`]: ResolveResult::Unbound
301    /// [`Unknown`]: ResolveResult::Unknown
302    /// [`read_resolved_event()`]: Self::read_resolved_event
303    #[inline]
304    pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
305        self.ns_resolver.resolve_element(name)
306    }
307
308    /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
309    ///
310    /// _Qualified_ attribute names have the form `prefix:local-name` where the
311    /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
312    /// The namespace prefix can be defined on the same element as the attribute
313    /// in question.
314    ///
315    /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
316    ///
317    /// The method returns following results depending on the `name` shape and
318    /// the presence of the default namespace:
319    ///
320    /// |`xmlns="..."`|QName              |ResolveResult          |LocalName
321    /// |-------------|-------------------|-----------------------|------------
322    /// |Not defined  |`local-name`       |[`Unbound`]            |`local-name`
323    /// |Defined      |`local-name`       |[`Unbound`]            |`local-name`
324    /// |_any_        |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
325    ///
326    /// # Lifetimes
327    ///
328    /// - `'n`: lifetime of an attribute name. Returned local name will be bound
329    ///   to the same lifetime as the name in question.
330    /// - returned namespace name will be bound to the reader itself
331    ///
332    /// # Examples
333    ///
334    /// ```
335    /// # use pretty_assertions::assert_eq;
336    /// use quick_xml::events::Event;
337    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
338    /// use quick_xml::reader::NsReader;
339    ///
340    /// let mut reader = NsReader::from_str("
341    ///     <tag one='1'
342    ///          p:two='2'
343    ///          xmlns='root namespace'
344    ///          xmlns:p='other namespace'/>
345    /// ");
346    /// reader.config_mut().trim_text(true);
347    ///
348    /// match reader.read_event().unwrap() {
349    ///     Event::Empty(e) => {
350    ///         let mut iter = e.attributes();
351    ///
352    ///         // Unlike elements, attributes without explicit namespace
353    ///         // not bound to any namespace
354    ///         let one = iter.next().unwrap().unwrap();
355    ///         assert_eq!(
356    ///             reader.resolve_attribute(one.key),
357    ///             (Unbound, QName(b"one").into())
358    ///         );
359    ///
360    ///         let two = iter.next().unwrap().unwrap();
361    ///         assert_eq!(
362    ///             reader.resolve_attribute(two.key),
363    ///             (Bound(Namespace(b"other namespace")), QName(b"two").into())
364    ///         );
365    ///     }
366    ///     _ => unreachable!(),
367    /// }
368    /// ```
369    ///
370    /// [`Bound`]: ResolveResult::Bound
371    /// [`Unbound`]: ResolveResult::Unbound
372    /// [`Unknown`]: ResolveResult::Unknown
373    #[inline]
374    pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
375        self.ns_resolver.resolve_attribute(name)
376    }
377}
378
379impl<R: BufRead> NsReader<R> {
380    /// Reads the next event into given buffer.
381    ///
382    /// This method manages namespaces but doesn't resolve them automatically.
383    /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
384    ///
385    /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
386    /// namespace as soon as you get an event.
387    ///
388    /// # Examples
389    ///
390    /// ```
391    /// # use pretty_assertions::assert_eq;
392    /// use quick_xml::events::Event;
393    /// use quick_xml::name::{Namespace, ResolveResult::*};
394    /// use quick_xml::reader::NsReader;
395    ///
396    /// let mut reader = NsReader::from_str(r#"
397    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
398    ///        <y:tag2><!--Test comment-->Test</y:tag2>
399    ///        <y:tag2>Test 2</y:tag2>
400    ///     </x:tag1>
401    /// "#);
402    /// reader.config_mut().trim_text(true);
403    ///
404    /// let mut count = 0;
405    /// let mut buf = Vec::new();
406    /// let mut txt = Vec::new();
407    /// loop {
408    ///     match reader.read_event_into(&mut buf).unwrap() {
409    ///         Event::Start(e) => {
410    ///             count += 1;
411    ///             let (ns, local) = reader.resolver().resolve_element(e.name());
412    ///             match local.as_ref() {
413    ///                 b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
414    ///                 b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
415    ///                 _ => unreachable!(),
416    ///             }
417    ///         }
418    ///         Event::Text(e) => {
419    ///             txt.push(e.decode().unwrap().into_owned())
420    ///         }
421    ///         Event::Eof => break,
422    ///         _ => (),
423    ///     }
424    ///     buf.clear();
425    /// }
426    /// assert_eq!(count, 3);
427    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
428    /// ```
429    ///
430    /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
431    /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
432    #[inline]
433    pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
434        self.read_event_impl(buf)
435    }
436
437    /// Reads the next event into given buffer and resolves its namespace (if applicable).
438    ///
439    /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
440    /// For all other events the concept of namespace is not defined, so
441    /// a [`ResolveResult::Unbound`] is returned.
442    ///
443    /// If you are not interested in namespaces, you can use [`read_event_into()`]
444    /// which will not automatically resolve namespaces for you.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// # use pretty_assertions::assert_eq;
450    /// use quick_xml::events::Event;
451    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
452    /// use quick_xml::reader::NsReader;
453    ///
454    /// let mut reader = NsReader::from_str(r#"
455    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
456    ///        <y:tag2><!--Test comment-->Test</y:tag2>
457    ///        <y:tag2>Test 2</y:tag2>
458    ///     </x:tag1>
459    /// "#);
460    /// reader.config_mut().trim_text(true);
461    ///
462    /// let mut count = 0;
463    /// let mut buf = Vec::new();
464    /// let mut txt = Vec::new();
465    /// loop {
466    ///     match reader.read_resolved_event_into(&mut buf).unwrap() {
467    ///         (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
468    ///             count += 1;
469    ///             assert_eq!(e.local_name(), QName(b"tag1").into());
470    ///         }
471    ///         (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
472    ///             count += 1;
473    ///             assert_eq!(e.local_name(), QName(b"tag2").into());
474    ///         }
475    ///         (_, Event::Start(_)) => unreachable!(),
476    ///
477    ///         (_, Event::Text(e)) => {
478    ///             txt.push(e.decode().unwrap().into_owned())
479    ///         }
480    ///         (_, Event::Eof) => break,
481    ///         _ => (),
482    ///     }
483    ///     buf.clear();
484    /// }
485    /// assert_eq!(count, 3);
486    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
487    /// ```
488    ///
489    /// [`Start`]: Event::Start
490    /// [`Empty`]: Event::Empty
491    /// [`End`]: Event::End
492    /// [`read_event_into()`]: Self::read_event_into
493    #[inline]
494    pub fn read_resolved_event_into<'b>(
495        &mut self,
496        buf: &'b mut Vec<u8>,
497    ) -> Result<(ResolveResult<'_>, Event<'b>)> {
498        let event = self.read_event_impl(buf)?;
499        Ok(self.ns_resolver.resolve_event(event))
500    }
501
502    /// Reads until end element is found using provided buffer as intermediate
503    /// storage for events content. This function is supposed to be called after
504    /// you already read a [`Start`] event.
505    ///
506    /// Returns a span that cover content between `>` of an opening tag and `<` of
507    /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
508    /// this method was called after reading expanded [`Start`] event.
509    ///
510    /// Manages nested cases where parent and child elements have the _literally_
511    /// same name.
512    ///
513    /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
514    /// will be returned. In particularly, that error will be returned if you call
515    /// this method without consuming the corresponding [`Start`] event first.
516    ///
517    /// If your reader created from a string slice or byte array slice, it is
518    /// better to use [`read_to_end()`] method, because it will not copy bytes
519    /// into intermediate buffer.
520    ///
521    /// The provided `buf` buffer will be filled only by one event content at time.
522    /// Before reading of each event the buffer will be cleared. If you know an
523    /// appropriate size of each event, you can preallocate the buffer to reduce
524    /// number of reallocations.
525    ///
526    /// The `end` parameter should contain name of the end element _in the reader
527    /// encoding_. It is good practice to always get that parameter using
528    /// [`BytesStart::to_end()`] method.
529    ///
530    /// # Namespaces
531    ///
532    /// While the `NsReader` does namespace resolution, namespaces does not
533    /// change the algorithm for comparing names. Although the names `a:name`
534    /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
535    /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
536    /// according to [the specification]
537    ///
538    /// > The end of every element that begins with a **start-tag** MUST be marked
539    /// > by an **end-tag** containing a name that echoes the element's type as
540    /// > given in the **start-tag**
541    ///
542    /// # Examples
543    ///
544    /// This example shows, how you can skip XML content after you read the
545    /// start event.
546    ///
547    /// ```
548    /// # use pretty_assertions::assert_eq;
549    /// use quick_xml::events::{BytesStart, Event};
550    /// use quick_xml::name::{Namespace, ResolveResult};
551    /// use quick_xml::reader::NsReader;
552    ///
553    /// let mut reader = NsReader::from_str(r#"
554    ///     <outer xmlns="namespace 1">
555    ///         <inner xmlns="namespace 2">
556    ///             <outer></outer>
557    ///         </inner>
558    ///         <inner>
559    ///             <inner></inner>
560    ///             <inner/>
561    ///             <outer></outer>
562    ///             <p:outer xmlns:p="ns"></p:outer>
563    ///             <outer/>
564    ///         </inner>
565    ///     </outer>
566    /// "#);
567    /// reader.config_mut().trim_text(true);
568    /// let mut buf = Vec::new();
569    ///
570    /// let ns = Namespace(b"namespace 1");
571    /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
572    /// let end   = start.to_end().into_owned();
573    ///
574    /// // First, we read a start event...
575    /// assert_eq!(
576    ///     reader.read_resolved_event_into(&mut buf).unwrap(),
577    ///     (ResolveResult::Bound(ns), Event::Start(start))
578    /// );
579    ///
580    /// // ...then, we could skip all events to the corresponding end event.
581    /// // This call will correctly handle nested <outer> elements.
582    /// // Note, however, that this method does not handle namespaces.
583    /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
584    ///
585    /// // At the end we should get an Eof event, because we ate the whole XML
586    /// assert_eq!(
587    ///     reader.read_resolved_event_into(&mut buf).unwrap(),
588    ///     (ResolveResult::Unbound, Event::Eof)
589    /// );
590    /// ```
591    ///
592    /// [`Start`]: Event::Start
593    /// [`End`]: Event::End
594    /// [`IllFormed`]: crate::errors::Error::IllFormed
595    /// [`read_to_end()`]: Self::read_to_end
596    /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
597    /// [`expand_empty_elements`]: Config::expand_empty_elements
598    /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
599    #[inline]
600    pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
601        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
602        // match literally the start name. See `Config::check_end_names` documentation
603        self.reader.read_to_end_into(end, buf)
604    }
605}
606
607impl NsReader<BufReader<File>> {
608    /// Creates an XML reader from a file path.
609    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
610        Ok(Self::new(Reader::from_file(path)?))
611    }
612}
613
614impl<'i> NsReader<&'i [u8]> {
615    /// Creates an XML reader from a string slice.
616    #[inline]
617    #[allow(clippy::should_implement_trait)]
618    pub fn from_str(s: &'i str) -> Self {
619        Self::new(Reader::from_str(s))
620    }
621
622    /// Reads the next event, borrow its content from the input buffer.
623    ///
624    /// This method manages namespaces but doesn't resolve them automatically.
625    /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
626    ///
627    /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
628    /// as soon as you get an event.
629    ///
630    /// There is no asynchronous `read_event_async()` version of this function,
631    /// because it is not necessary -- the contents are already in memory and no IO
632    /// is needed, therefore there is no potential for blocking.
633    ///
634    /// # Examples
635    ///
636    /// ```
637    /// # use pretty_assertions::assert_eq;
638    /// use quick_xml::events::Event;
639    /// use quick_xml::name::{Namespace, ResolveResult::*};
640    /// use quick_xml::reader::NsReader;
641    ///
642    /// let mut reader = NsReader::from_str(r#"
643    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
644    ///        <y:tag2><!--Test comment-->Test</y:tag2>
645    ///        <y:tag2>Test 2</y:tag2>
646    ///     </x:tag1>
647    /// "#);
648    /// reader.config_mut().trim_text(true);
649    ///
650    /// let mut count = 0;
651    /// let mut txt = Vec::new();
652    /// loop {
653    ///     match reader.read_event().unwrap() {
654    ///         Event::Start(e) => {
655    ///             count += 1;
656    ///             let (ns, local) = reader.resolver().resolve_element(e.name());
657    ///             match local.as_ref() {
658    ///                 b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
659    ///                 b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
660    ///                 _ => unreachable!(),
661    ///             }
662    ///         }
663    ///         Event::Text(e) => {
664    ///             txt.push(e.decode().unwrap().into_owned())
665    ///         }
666    ///         Event::Eof => break,
667    ///         _ => (),
668    ///     }
669    /// }
670    /// assert_eq!(count, 3);
671    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
672    /// ```
673    ///
674    /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
675    /// [`read_resolved_event()`]: Self::read_resolved_event
676    #[inline]
677    pub fn read_event(&mut self) -> Result<Event<'i>> {
678        self.read_event_impl(())
679    }
680
681    /// Reads the next event, borrow its content from the input buffer, and resolves
682    /// its namespace (if applicable).
683    ///
684    /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
685    /// For all other events the concept of namespace is not defined, so
686    /// a [`ResolveResult::Unbound`] is returned.
687    ///
688    /// If you are not interested in namespaces, you can use [`read_event()`]
689    /// which will not automatically resolve namespaces for you.
690    ///
691    /// There is no asynchronous `read_resolved_event_async()` version of this function,
692    /// because it is not necessary -- the contents are already in memory and no IO
693    /// is needed, therefore there is no potential for blocking.
694    ///
695    /// # Examples
696    ///
697    /// ```
698    /// # use pretty_assertions::assert_eq;
699    /// use quick_xml::events::Event;
700    /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
701    /// use quick_xml::reader::NsReader;
702    ///
703    /// let mut reader = NsReader::from_str(r#"
704    ///     <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
705    ///        <y:tag2><!--Test comment-->Test</y:tag2>
706    ///        <y:tag2>Test 2</y:tag2>
707    ///     </x:tag1>
708    /// "#);
709    /// reader.config_mut().trim_text(true);
710    ///
711    /// let mut count = 0;
712    /// let mut txt = Vec::new();
713    /// loop {
714    ///     match reader.read_resolved_event().unwrap() {
715    ///         (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
716    ///             count += 1;
717    ///             assert_eq!(e.local_name(), QName(b"tag1").into());
718    ///         }
719    ///         (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
720    ///             count += 1;
721    ///             assert_eq!(e.local_name(), QName(b"tag2").into());
722    ///         }
723    ///         (_, Event::Start(_)) => unreachable!(),
724    ///
725    ///         (_, Event::Text(e)) => {
726    ///             txt.push(e.decode().unwrap().into_owned())
727    ///         }
728    ///         (_, Event::Eof) => break,
729    ///         _ => (),
730    ///     }
731    /// }
732    /// assert_eq!(count, 3);
733    /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
734    /// ```
735    ///
736    /// [`Start`]: Event::Start
737    /// [`Empty`]: Event::Empty
738    /// [`End`]: Event::End
739    /// [`read_event()`]: Self::read_event
740    #[inline]
741    pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
742        let event = self.read_event_impl(())?;
743        Ok(self.ns_resolver.resolve_event(event))
744    }
745
746    /// Reads until end element is found. This function is supposed to be called
747    /// after you already read a [`Start`] event.
748    ///
749    /// Returns a span that cover content between `>` of an opening tag and `<` of
750    /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
751    /// this method was called after reading expanded [`Start`] event.
752    ///
753    /// Manages nested cases where parent and child elements have the _literally_
754    /// same name.
755    ///
756    /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
757    /// will be returned. In particularly, that error will be returned if you call
758    /// this method without consuming the corresponding [`Start`] event first.
759    ///
760    /// The `end` parameter should contain name of the end element _in the reader
761    /// encoding_. It is good practice to always get that parameter using
762    /// [`BytesStart::to_end()`] method.
763    ///
764    /// There is no asynchronous `read_to_end_async()` version of this function,
765    /// because it is not necessary -- the contents are already in memory and no IO
766    /// is needed, therefore there is no potential for blocking.
767    ///
768    /// # Namespaces
769    ///
770    /// While the `NsReader` does namespace resolution, namespaces does not
771    /// change the algorithm for comparing names. Although the names `a:name`
772    /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
773    /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
774    /// according to [the specification]
775    ///
776    /// > The end of every element that begins with a **start-tag** MUST be marked
777    /// > by an **end-tag** containing a name that echoes the element's type as
778    /// > given in the **start-tag**
779    ///
780    /// # Examples
781    ///
782    /// This example shows, how you can skip XML content after you read the
783    /// start event.
784    ///
785    /// ```
786    /// # use pretty_assertions::assert_eq;
787    /// use quick_xml::events::{BytesStart, Event};
788    /// use quick_xml::name::{Namespace, ResolveResult};
789    /// use quick_xml::reader::NsReader;
790    ///
791    /// let mut reader = NsReader::from_str(r#"
792    ///     <outer xmlns="namespace 1">
793    ///         <inner xmlns="namespace 2">
794    ///             <outer></outer>
795    ///         </inner>
796    ///         <inner>
797    ///             <inner></inner>
798    ///             <inner/>
799    ///             <outer></outer>
800    ///             <p:outer xmlns:p="ns"></p:outer>
801    ///             <outer/>
802    ///         </inner>
803    ///     </outer>
804    /// "#);
805    /// reader.config_mut().trim_text(true);
806    ///
807    /// let ns = Namespace(b"namespace 1");
808    /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
809    /// let end   = start.to_end().into_owned();
810    ///
811    /// // First, we read a start event...
812    /// assert_eq!(
813    ///     reader.read_resolved_event().unwrap(),
814    ///     (ResolveResult::Bound(ns), Event::Start(start))
815    /// );
816    ///
817    /// // ...then, we could skip all events to the corresponding end event.
818    /// // This call will correctly handle nested <outer> elements.
819    /// // Note, however, that this method does not handle namespaces.
820    /// reader.read_to_end(end.name()).unwrap();
821    ///
822    /// // At the end we should get an Eof event, because we ate the whole XML
823    /// assert_eq!(
824    ///     reader.read_resolved_event().unwrap(),
825    ///     (ResolveResult::Unbound, Event::Eof)
826    /// );
827    /// ```
828    ///
829    /// [`Start`]: Event::Start
830    /// [`End`]: Event::End
831    /// [`IllFormed`]: crate::errors::Error::IllFormed
832    /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
833    /// [`expand_empty_elements`]: Config::expand_empty_elements
834    /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
835    #[inline]
836    pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
837        // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
838        // match literally the start name. See `Config::check_end_names` documentation
839        self.reader.read_to_end(end)
840    }
841
842    /// Reads content between start and end tags, including any markup. This
843    /// function is supposed to be called after you already read a [`Start`] event.
844    ///
845    /// Manages nested cases where parent and child elements have the _literally_
846    /// same name.
847    ///
848    /// This method does not unescape read data, instead it returns content
849    /// "as is" of the XML document. This is because it has no idea what text
850    /// it reads, and if, for example, it contains CDATA section, attempt to
851    /// unescape it content will spoil data.
852    ///
853    /// Any text will be decoded using the XML current [`decoder()`].
854    ///
855    /// Actually, this method perform the following code:
856    ///
857    /// ```ignore
858    /// let span = reader.read_to_end(end)?;
859    /// let text = reader.decoder().decode(&reader.inner_slice[span]);
860    /// ```
861    ///
862    /// # Examples
863    ///
864    /// This example shows, how you can read a HTML content from your XML document.
865    ///
866    /// ```
867    /// # use pretty_assertions::assert_eq;
868    /// # use std::borrow::Cow;
869    /// use quick_xml::events::{BytesStart, Event};
870    /// use quick_xml::reader::NsReader;
871    ///
872    /// let mut reader = NsReader::from_str(r#"
873    ///     <html>
874    ///         <title>This is a HTML text</title>
875    ///         <p>Usual XML rules does not apply inside it
876    ///         <p>For example, elements not needed to be &quot;closed&quot;
877    ///     </html>
878    /// "#);
879    /// reader.config_mut().trim_text(true);
880    ///
881    /// let start = BytesStart::new("html");
882    /// let end   = start.to_end().into_owned();
883    ///
884    /// // First, we read a start event...
885    /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
886    /// // ...and disable checking of end names because we expect HTML further...
887    /// reader.config_mut().check_end_names = false;
888    ///
889    /// // ...then, we could read text content until close tag.
890    /// // This call will correctly handle nested <html> elements.
891    /// let text = reader.read_text(end.name()).unwrap();
892    /// assert_eq!(text, Cow::Borrowed(r#"
893    ///         <title>This is a HTML text</title>
894    ///         <p>Usual XML rules does not apply inside it
895    ///         <p>For example, elements not needed to be &quot;closed&quot;
896    ///     "#));
897    ///
898    /// // Now we can enable checks again
899    /// reader.config_mut().check_end_names = true;
900    ///
901    /// // At the end we should get an Eof event, because we ate the whole XML
902    /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
903    /// ```
904    ///
905    /// [`Start`]: Event::Start
906    /// [`decoder()`]: Reader::decoder()
907    #[inline]
908    pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
909        self.reader.read_text(end)
910    }
911}
912
913impl<R> Deref for NsReader<R> {
914    type Target = Reader<R>;
915
916    #[inline]
917    fn deref(&self) -> &Self::Target {
918        &self.reader
919    }
920}