quick_xml/reader/ns_reader.rs
1//! A reader that manages namespace declarations found in the input and able
2//! to resolve [qualified names] to [expanded names].
3//!
4//! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname
5//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
6
7use std::borrow::Cow;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::ops::Deref;
11use std::path::Path;
12
13use crate::errors::Result;
14use crate::events::Event;
15use crate::name::{LocalName, NamespaceBindingsIter, NamespaceResolver, QName, ResolveResult};
16use crate::reader::{Config, Reader, Span, XmlSource};
17
18/// A low level encoding-agnostic XML event reader that performs namespace resolution.
19///
20/// Consumes a [`BufRead`] and streams XML `Event`s.
21#[derive(Debug, Clone)]
22pub struct NsReader<R> {
23 /// An XML reader
24 pub(super) reader: Reader<R>,
25 /// A buffer to manage namespaces
26 ns_resolver: NamespaceResolver,
27 /// We cannot pop data from the namespace stack until returned `Empty` or `End`
28 /// event will be processed by the user, so we only mark that we should that
29 /// in the next [`Self::read_event_impl()`] call.
30 pending_pop: bool,
31}
32
33/// Builder methods
34impl<R> NsReader<R> {
35 /// Creates a `NsReader` that reads from a reader.
36 #[inline]
37 pub fn from_reader(reader: R) -> Self {
38 Self::new(Reader::from_reader(reader))
39 }
40
41 /// Returns reference to the parser configuration
42 #[inline]
43 pub const fn config(&self) -> &Config {
44 self.reader.config()
45 }
46
47 /// Returns mutable reference to the parser configuration
48 #[inline]
49 pub fn config_mut(&mut self) -> &mut Config {
50 self.reader.config_mut()
51 }
52
53 /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces.
54 ///
55 /// # Examples
56 ///
57 /// This example shows what results the returned iterator would return after
58 /// reading each event of a simple XML.
59 ///
60 /// ```
61 /// # use pretty_assertions::assert_eq;
62 /// use quick_xml::name::{Namespace, PrefixDeclaration};
63 /// use quick_xml::NsReader;
64 ///
65 /// let src = "<root>
66 /// <a xmlns=\"a1\" xmlns:a=\"a2\">
67 /// <b xmlns=\"b1\" xmlns:b=\"b2\">
68 /// <c/>
69 /// </b>
70 /// <d/>
71 /// </a>
72 /// </root>";
73 /// let mut reader = NsReader::from_str(src);
74 /// reader.config_mut().trim_text(true);
75 /// // No prefixes at the beginning
76 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
77 ///
78 /// reader.read_resolved_event()?; // <root>
79 /// // No prefixes declared on root
80 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
81 ///
82 /// reader.read_resolved_event()?; // <a>
83 /// // Two prefixes declared on "a"
84 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
85 /// (PrefixDeclaration::Default, Namespace(b"a1")),
86 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
87 /// ]);
88 ///
89 /// reader.read_resolved_event()?; // <b>
90 /// // The default prefix got overridden and new "b" prefix
91 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
92 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
93 /// (PrefixDeclaration::Default, Namespace(b"b1")),
94 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
95 /// ]);
96 ///
97 /// reader.read_resolved_event()?; // <c/>
98 /// // Still the same
99 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
100 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
101 /// (PrefixDeclaration::Default, Namespace(b"b1")),
102 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
103 /// ]);
104 ///
105 /// reader.read_resolved_event()?; // </b>
106 /// // Still the same
107 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
108 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")),
109 /// (PrefixDeclaration::Default, Namespace(b"b1")),
110 /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2"))
111 /// ]);
112 ///
113 /// reader.read_resolved_event()?; // <d/>
114 /// // </b> got closed so back to the prefixes declared on <a>
115 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
116 /// (PrefixDeclaration::Default, Namespace(b"a1")),
117 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
118 /// ]);
119 ///
120 /// reader.read_resolved_event()?; // </a>
121 /// // Still the same
122 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![
123 /// (PrefixDeclaration::Default, Namespace(b"a1")),
124 /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2"))
125 /// ]);
126 ///
127 /// reader.read_resolved_event()?; // </root>
128 /// // <a> got closed
129 /// assert_eq!(reader.prefixes().collect::<Vec<_>>(), vec![]);
130 /// # quick_xml::Result::Ok(())
131 /// ```
132 #[inline]
133 pub const fn prefixes(&self) -> NamespaceBindingsIter<'_> {
134 self.ns_resolver.bindings()
135 }
136}
137
138/// Private methods
139impl<R> NsReader<R> {
140 #[inline]
141 fn new(reader: Reader<R>) -> Self {
142 Self {
143 reader,
144 ns_resolver: NamespaceResolver::default(),
145 pending_pop: false,
146 }
147 }
148
149 fn read_event_impl<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
150 where
151 R: XmlSource<'i, B>,
152 {
153 self.pop();
154 let event = self.reader.read_event_impl(buf);
155 self.process_event(event)
156 }
157
158 pub(super) fn pop(&mut self) {
159 if self.pending_pop {
160 self.ns_resolver.pop();
161 self.pending_pop = false;
162 }
163 }
164
165 pub(super) fn process_event<'i>(&mut self, event: Result<Event<'i>>) -> Result<Event<'i>> {
166 match event {
167 Ok(Event::Start(e)) => {
168 self.ns_resolver.push(&e)?;
169 Ok(Event::Start(e))
170 }
171 Ok(Event::Empty(e)) => {
172 self.ns_resolver.push(&e)?;
173 // notify next `read_event_impl()` invocation that it needs to pop this
174 // namespace scope
175 self.pending_pop = true;
176 Ok(Event::Empty(e))
177 }
178 Ok(Event::End(e)) => {
179 // notify next `read_event_impl()` invocation that it needs to pop this
180 // namespace scope
181 self.pending_pop = true;
182 Ok(Event::End(e))
183 }
184 e => e,
185 }
186 }
187}
188
189/// Getters
190impl<R> NsReader<R> {
191 /// Consumes `NsReader` returning the underlying reader
192 ///
193 /// See the [`Reader::into_inner`] for examples
194 #[inline]
195 pub fn into_inner(self) -> R {
196 self.reader.into_inner()
197 }
198
199 /// Gets a mutable reference to the underlying reader.
200 pub fn get_mut(&mut self) -> &mut R {
201 self.reader.get_mut()
202 }
203
204 /// Returns a storage of namespace bindings associated with this reader.
205 #[inline]
206 pub const fn resolver(&self) -> &NamespaceResolver {
207 &self.ns_resolver
208 }
209
210 /// Resolves a potentially qualified **element name** or **attribute name**
211 /// into _(namespace name, local name)_.
212 ///
213 /// _Qualified_ names have the form `local-name` or `prefix:local-name` where the `prefix`
214 /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
215 /// The namespace prefix can be defined on the same element as the name in question.
216 ///
217 /// The method returns following results depending on the `name` shape, `attribute` flag
218 /// and the presence of the default namespace on element or any of its parents:
219 ///
220 /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName
221 /// |---------|-------------|-------------------|-----------------------|------------
222 /// |`true` |_(any)_ |`local-name` |[`Unbound`] |`local-name`
223 /// |`true` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
224 /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name`
225 /// |`false` |Defined |`local-name` |[`Bound`] (to `xmlns`) |`local-name`
226 /// |`false` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
227 ///
228 /// If you want to clearly indicate that name that you resolve is an element
229 /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`]
230 /// methods.
231 ///
232 /// # Lifetimes
233 ///
234 /// - `'n`: lifetime of a name. Returned local name will be bound to the same
235 /// lifetime as the name in question.
236 /// - returned namespace name will be bound to the reader itself
237 ///
238 /// [`Bound`]: ResolveResult::Bound
239 /// [`Unbound`]: ResolveResult::Unbound
240 /// [`Unknown`]: ResolveResult::Unknown
241 /// [`resolve_attribute()`]: Self::resolve_attribute()
242 /// [`resolve_element()`]: Self::resolve_element()
243 #[inline]
244 pub fn resolve<'n>(
245 &self,
246 name: QName<'n>,
247 attribute: bool,
248 ) -> (ResolveResult<'_>, LocalName<'n>) {
249 self.ns_resolver.resolve(name, !attribute)
250 }
251
252 /// Resolves a potentially qualified **element name** into _(namespace name, local name)_.
253 ///
254 /// _Qualified_ element names have the form `prefix:local-name` where the
255 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
256 /// The namespace prefix can be defined on the same element as the element
257 /// in question.
258 ///
259 /// _Unqualified_ elements inherits the current _default namespace_.
260 ///
261 /// The method returns following results depending on the `name` shape and
262 /// the presence of the default namespace:
263 ///
264 /// |`xmlns="..."`|QName |ResolveResult |LocalName
265 /// |-------------|-------------------|-----------------------|------------
266 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
267 /// |Defined |`local-name` |[`Bound`] (default) |`local-name`
268 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
269 ///
270 /// # Lifetimes
271 ///
272 /// - `'n`: lifetime of an element name. Returned local name will be bound
273 /// to the same lifetime as the name in question.
274 /// - returned namespace name will be bound to the reader itself
275 ///
276 /// # Examples
277 ///
278 /// This example shows how you can resolve qualified name into a namespace.
279 /// Note, that in the code like this you do not need to do that manually,
280 /// because the namespace resolution result returned by the [`read_resolved_event()`].
281 ///
282 /// ```
283 /// # use pretty_assertions::assert_eq;
284 /// use quick_xml::events::Event;
285 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
286 /// use quick_xml::reader::NsReader;
287 ///
288 /// let mut reader = NsReader::from_str("<tag xmlns='root namespace'/>");
289 ///
290 /// match reader.read_event().unwrap() {
291 /// Event::Empty(e) => assert_eq!(
292 /// reader.resolve_element(e.name()),
293 /// (Bound(Namespace(b"root namespace")), QName(b"tag").into())
294 /// ),
295 /// _ => unreachable!(),
296 /// }
297 /// ```
298 ///
299 /// [`Bound`]: ResolveResult::Bound
300 /// [`Unbound`]: ResolveResult::Unbound
301 /// [`Unknown`]: ResolveResult::Unknown
302 /// [`read_resolved_event()`]: Self::read_resolved_event
303 #[inline]
304 pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
305 self.ns_resolver.resolve_element(name)
306 }
307
308 /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_.
309 ///
310 /// _Qualified_ attribute names have the form `prefix:local-name` where the
311 /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`.
312 /// The namespace prefix can be defined on the same element as the attribute
313 /// in question.
314 ///
315 /// _Unqualified_ attribute names do *not* inherit the current _default namespace_.
316 ///
317 /// The method returns following results depending on the `name` shape and
318 /// the presence of the default namespace:
319 ///
320 /// |`xmlns="..."`|QName |ResolveResult |LocalName
321 /// |-------------|-------------------|-----------------------|------------
322 /// |Not defined |`local-name` |[`Unbound`] |`local-name`
323 /// |Defined |`local-name` |[`Unbound`] |`local-name`
324 /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name`
325 ///
326 /// # Lifetimes
327 ///
328 /// - `'n`: lifetime of an attribute name. Returned local name will be bound
329 /// to the same lifetime as the name in question.
330 /// - returned namespace name will be bound to the reader itself
331 ///
332 /// # Examples
333 ///
334 /// ```
335 /// # use pretty_assertions::assert_eq;
336 /// use quick_xml::events::Event;
337 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
338 /// use quick_xml::reader::NsReader;
339 ///
340 /// let mut reader = NsReader::from_str("
341 /// <tag one='1'
342 /// p:two='2'
343 /// xmlns='root namespace'
344 /// xmlns:p='other namespace'/>
345 /// ");
346 /// reader.config_mut().trim_text(true);
347 ///
348 /// match reader.read_event().unwrap() {
349 /// Event::Empty(e) => {
350 /// let mut iter = e.attributes();
351 ///
352 /// // Unlike elements, attributes without explicit namespace
353 /// // not bound to any namespace
354 /// let one = iter.next().unwrap().unwrap();
355 /// assert_eq!(
356 /// reader.resolve_attribute(one.key),
357 /// (Unbound, QName(b"one").into())
358 /// );
359 ///
360 /// let two = iter.next().unwrap().unwrap();
361 /// assert_eq!(
362 /// reader.resolve_attribute(two.key),
363 /// (Bound(Namespace(b"other namespace")), QName(b"two").into())
364 /// );
365 /// }
366 /// _ => unreachable!(),
367 /// }
368 /// ```
369 ///
370 /// [`Bound`]: ResolveResult::Bound
371 /// [`Unbound`]: ResolveResult::Unbound
372 /// [`Unknown`]: ResolveResult::Unknown
373 #[inline]
374 pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) {
375 self.ns_resolver.resolve_attribute(name)
376 }
377}
378
379impl<R: BufRead> NsReader<R> {
380 /// Reads the next event into given buffer.
381 ///
382 /// This method manages namespaces but doesn't resolve them automatically.
383 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
384 ///
385 /// You also can use [`read_resolved_event_into()`] instead if you want to resolve
386 /// namespace as soon as you get an event.
387 ///
388 /// # Examples
389 ///
390 /// ```
391 /// # use pretty_assertions::assert_eq;
392 /// use quick_xml::events::Event;
393 /// use quick_xml::name::{Namespace, ResolveResult::*};
394 /// use quick_xml::reader::NsReader;
395 ///
396 /// let mut reader = NsReader::from_str(r#"
397 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
398 /// <y:tag2><!--Test comment-->Test</y:tag2>
399 /// <y:tag2>Test 2</y:tag2>
400 /// </x:tag1>
401 /// "#);
402 /// reader.config_mut().trim_text(true);
403 ///
404 /// let mut count = 0;
405 /// let mut buf = Vec::new();
406 /// let mut txt = Vec::new();
407 /// loop {
408 /// match reader.read_event_into(&mut buf).unwrap() {
409 /// Event::Start(e) => {
410 /// count += 1;
411 /// let (ns, local) = reader.resolver().resolve_element(e.name());
412 /// match local.as_ref() {
413 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
414 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
415 /// _ => unreachable!(),
416 /// }
417 /// }
418 /// Event::Text(e) => {
419 /// txt.push(e.decode().unwrap().into_owned())
420 /// }
421 /// Event::Eof => break,
422 /// _ => (),
423 /// }
424 /// buf.clear();
425 /// }
426 /// assert_eq!(count, 3);
427 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
428 /// ```
429 ///
430 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
431 /// [`read_resolved_event_into()`]: Self::read_resolved_event_into
432 #[inline]
433 pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
434 self.read_event_impl(buf)
435 }
436
437 /// Reads the next event into given buffer and resolves its namespace (if applicable).
438 ///
439 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
440 /// For all other events the concept of namespace is not defined, so
441 /// a [`ResolveResult::Unbound`] is returned.
442 ///
443 /// If you are not interested in namespaces, you can use [`read_event_into()`]
444 /// which will not automatically resolve namespaces for you.
445 ///
446 /// # Examples
447 ///
448 /// ```
449 /// # use pretty_assertions::assert_eq;
450 /// use quick_xml::events::Event;
451 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
452 /// use quick_xml::reader::NsReader;
453 ///
454 /// let mut reader = NsReader::from_str(r#"
455 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
456 /// <y:tag2><!--Test comment-->Test</y:tag2>
457 /// <y:tag2>Test 2</y:tag2>
458 /// </x:tag1>
459 /// "#);
460 /// reader.config_mut().trim_text(true);
461 ///
462 /// let mut count = 0;
463 /// let mut buf = Vec::new();
464 /// let mut txt = Vec::new();
465 /// loop {
466 /// match reader.read_resolved_event_into(&mut buf).unwrap() {
467 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
468 /// count += 1;
469 /// assert_eq!(e.local_name(), QName(b"tag1").into());
470 /// }
471 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
472 /// count += 1;
473 /// assert_eq!(e.local_name(), QName(b"tag2").into());
474 /// }
475 /// (_, Event::Start(_)) => unreachable!(),
476 ///
477 /// (_, Event::Text(e)) => {
478 /// txt.push(e.decode().unwrap().into_owned())
479 /// }
480 /// (_, Event::Eof) => break,
481 /// _ => (),
482 /// }
483 /// buf.clear();
484 /// }
485 /// assert_eq!(count, 3);
486 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
487 /// ```
488 ///
489 /// [`Start`]: Event::Start
490 /// [`Empty`]: Event::Empty
491 /// [`End`]: Event::End
492 /// [`read_event_into()`]: Self::read_event_into
493 #[inline]
494 pub fn read_resolved_event_into<'b>(
495 &mut self,
496 buf: &'b mut Vec<u8>,
497 ) -> Result<(ResolveResult<'_>, Event<'b>)> {
498 let event = self.read_event_impl(buf)?;
499 Ok(self.ns_resolver.resolve_event(event))
500 }
501
502 /// Reads until end element is found using provided buffer as intermediate
503 /// storage for events content. This function is supposed to be called after
504 /// you already read a [`Start`] event.
505 ///
506 /// Returns a span that cover content between `>` of an opening tag and `<` of
507 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
508 /// this method was called after reading expanded [`Start`] event.
509 ///
510 /// Manages nested cases where parent and child elements have the _literally_
511 /// same name.
512 ///
513 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
514 /// will be returned. In particularly, that error will be returned if you call
515 /// this method without consuming the corresponding [`Start`] event first.
516 ///
517 /// If your reader created from a string slice or byte array slice, it is
518 /// better to use [`read_to_end()`] method, because it will not copy bytes
519 /// into intermediate buffer.
520 ///
521 /// The provided `buf` buffer will be filled only by one event content at time.
522 /// Before reading of each event the buffer will be cleared. If you know an
523 /// appropriate size of each event, you can preallocate the buffer to reduce
524 /// number of reallocations.
525 ///
526 /// The `end` parameter should contain name of the end element _in the reader
527 /// encoding_. It is good practice to always get that parameter using
528 /// [`BytesStart::to_end()`] method.
529 ///
530 /// # Namespaces
531 ///
532 /// While the `NsReader` does namespace resolution, namespaces does not
533 /// change the algorithm for comparing names. Although the names `a:name`
534 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
535 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
536 /// according to [the specification]
537 ///
538 /// > The end of every element that begins with a **start-tag** MUST be marked
539 /// > by an **end-tag** containing a name that echoes the element's type as
540 /// > given in the **start-tag**
541 ///
542 /// # Examples
543 ///
544 /// This example shows, how you can skip XML content after you read the
545 /// start event.
546 ///
547 /// ```
548 /// # use pretty_assertions::assert_eq;
549 /// use quick_xml::events::{BytesStart, Event};
550 /// use quick_xml::name::{Namespace, ResolveResult};
551 /// use quick_xml::reader::NsReader;
552 ///
553 /// let mut reader = NsReader::from_str(r#"
554 /// <outer xmlns="namespace 1">
555 /// <inner xmlns="namespace 2">
556 /// <outer></outer>
557 /// </inner>
558 /// <inner>
559 /// <inner></inner>
560 /// <inner/>
561 /// <outer></outer>
562 /// <p:outer xmlns:p="ns"></p:outer>
563 /// <outer/>
564 /// </inner>
565 /// </outer>
566 /// "#);
567 /// reader.config_mut().trim_text(true);
568 /// let mut buf = Vec::new();
569 ///
570 /// let ns = Namespace(b"namespace 1");
571 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
572 /// let end = start.to_end().into_owned();
573 ///
574 /// // First, we read a start event...
575 /// assert_eq!(
576 /// reader.read_resolved_event_into(&mut buf).unwrap(),
577 /// (ResolveResult::Bound(ns), Event::Start(start))
578 /// );
579 ///
580 /// // ...then, we could skip all events to the corresponding end event.
581 /// // This call will correctly handle nested <outer> elements.
582 /// // Note, however, that this method does not handle namespaces.
583 /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
584 ///
585 /// // At the end we should get an Eof event, because we ate the whole XML
586 /// assert_eq!(
587 /// reader.read_resolved_event_into(&mut buf).unwrap(),
588 /// (ResolveResult::Unbound, Event::Eof)
589 /// );
590 /// ```
591 ///
592 /// [`Start`]: Event::Start
593 /// [`End`]: Event::End
594 /// [`IllFormed`]: crate::errors::Error::IllFormed
595 /// [`read_to_end()`]: Self::read_to_end
596 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
597 /// [`expand_empty_elements`]: Config::expand_empty_elements
598 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
599 #[inline]
600 pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<Span> {
601 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
602 // match literally the start name. See `Config::check_end_names` documentation
603 self.reader.read_to_end_into(end, buf)
604 }
605}
606
607impl NsReader<BufReader<File>> {
608 /// Creates an XML reader from a file path.
609 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
610 Ok(Self::new(Reader::from_file(path)?))
611 }
612}
613
614impl<'i> NsReader<&'i [u8]> {
615 /// Creates an XML reader from a string slice.
616 #[inline]
617 #[allow(clippy::should_implement_trait)]
618 pub fn from_str(s: &'i str) -> Self {
619 Self::new(Reader::from_str(s))
620 }
621
622 /// Reads the next event, borrow its content from the input buffer.
623 ///
624 /// This method manages namespaces but doesn't resolve them automatically.
625 /// You should call [`resolver().resolve_element()`] if you want to get a namespace.
626 ///
627 /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace
628 /// as soon as you get an event.
629 ///
630 /// There is no asynchronous `read_event_async()` version of this function,
631 /// because it is not necessary -- the contents are already in memory and no IO
632 /// is needed, therefore there is no potential for blocking.
633 ///
634 /// # Examples
635 ///
636 /// ```
637 /// # use pretty_assertions::assert_eq;
638 /// use quick_xml::events::Event;
639 /// use quick_xml::name::{Namespace, ResolveResult::*};
640 /// use quick_xml::reader::NsReader;
641 ///
642 /// let mut reader = NsReader::from_str(r#"
643 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
644 /// <y:tag2><!--Test comment-->Test</y:tag2>
645 /// <y:tag2>Test 2</y:tag2>
646 /// </x:tag1>
647 /// "#);
648 /// reader.config_mut().trim_text(true);
649 ///
650 /// let mut count = 0;
651 /// let mut txt = Vec::new();
652 /// loop {
653 /// match reader.read_event().unwrap() {
654 /// Event::Start(e) => {
655 /// count += 1;
656 /// let (ns, local) = reader.resolver().resolve_element(e.name());
657 /// match local.as_ref() {
658 /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))),
659 /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))),
660 /// _ => unreachable!(),
661 /// }
662 /// }
663 /// Event::Text(e) => {
664 /// txt.push(e.decode().unwrap().into_owned())
665 /// }
666 /// Event::Eof => break,
667 /// _ => (),
668 /// }
669 /// }
670 /// assert_eq!(count, 3);
671 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
672 /// ```
673 ///
674 /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element
675 /// [`read_resolved_event()`]: Self::read_resolved_event
676 #[inline]
677 pub fn read_event(&mut self) -> Result<Event<'i>> {
678 self.read_event_impl(())
679 }
680
681 /// Reads the next event, borrow its content from the input buffer, and resolves
682 /// its namespace (if applicable).
683 ///
684 /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events.
685 /// For all other events the concept of namespace is not defined, so
686 /// a [`ResolveResult::Unbound`] is returned.
687 ///
688 /// If you are not interested in namespaces, you can use [`read_event()`]
689 /// which will not automatically resolve namespaces for you.
690 ///
691 /// There is no asynchronous `read_resolved_event_async()` version of this function,
692 /// because it is not necessary -- the contents are already in memory and no IO
693 /// is needed, therefore there is no potential for blocking.
694 ///
695 /// # Examples
696 ///
697 /// ```
698 /// # use pretty_assertions::assert_eq;
699 /// use quick_xml::events::Event;
700 /// use quick_xml::name::{Namespace, QName, ResolveResult::*};
701 /// use quick_xml::reader::NsReader;
702 ///
703 /// let mut reader = NsReader::from_str(r#"
704 /// <x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
705 /// <y:tag2><!--Test comment-->Test</y:tag2>
706 /// <y:tag2>Test 2</y:tag2>
707 /// </x:tag1>
708 /// "#);
709 /// reader.config_mut().trim_text(true);
710 ///
711 /// let mut count = 0;
712 /// let mut txt = Vec::new();
713 /// loop {
714 /// match reader.read_resolved_event().unwrap() {
715 /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => {
716 /// count += 1;
717 /// assert_eq!(e.local_name(), QName(b"tag1").into());
718 /// }
719 /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => {
720 /// count += 1;
721 /// assert_eq!(e.local_name(), QName(b"tag2").into());
722 /// }
723 /// (_, Event::Start(_)) => unreachable!(),
724 ///
725 /// (_, Event::Text(e)) => {
726 /// txt.push(e.decode().unwrap().into_owned())
727 /// }
728 /// (_, Event::Eof) => break,
729 /// _ => (),
730 /// }
731 /// }
732 /// assert_eq!(count, 3);
733 /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]);
734 /// ```
735 ///
736 /// [`Start`]: Event::Start
737 /// [`Empty`]: Event::Empty
738 /// [`End`]: Event::End
739 /// [`read_event()`]: Self::read_event
740 #[inline]
741 pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> {
742 let event = self.read_event_impl(())?;
743 Ok(self.ns_resolver.resolve_event(event))
744 }
745
746 /// Reads until end element is found. This function is supposed to be called
747 /// after you already read a [`Start`] event.
748 ///
749 /// Returns a span that cover content between `>` of an opening tag and `<` of
750 /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and
751 /// this method was called after reading expanded [`Start`] event.
752 ///
753 /// Manages nested cases where parent and child elements have the _literally_
754 /// same name.
755 ///
756 /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`]
757 /// will be returned. In particularly, that error will be returned if you call
758 /// this method without consuming the corresponding [`Start`] event first.
759 ///
760 /// The `end` parameter should contain name of the end element _in the reader
761 /// encoding_. It is good practice to always get that parameter using
762 /// [`BytesStart::to_end()`] method.
763 ///
764 /// There is no asynchronous `read_to_end_async()` version of this function,
765 /// because it is not necessary -- the contents are already in memory and no IO
766 /// is needed, therefore there is no potential for blocking.
767 ///
768 /// # Namespaces
769 ///
770 /// While the `NsReader` does namespace resolution, namespaces does not
771 /// change the algorithm for comparing names. Although the names `a:name`
772 /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace,
773 /// are semantically equivalent, `</b:name>` cannot close `<a:name>`, because
774 /// according to [the specification]
775 ///
776 /// > The end of every element that begins with a **start-tag** MUST be marked
777 /// > by an **end-tag** containing a name that echoes the element's type as
778 /// > given in the **start-tag**
779 ///
780 /// # Examples
781 ///
782 /// This example shows, how you can skip XML content after you read the
783 /// start event.
784 ///
785 /// ```
786 /// # use pretty_assertions::assert_eq;
787 /// use quick_xml::events::{BytesStart, Event};
788 /// use quick_xml::name::{Namespace, ResolveResult};
789 /// use quick_xml::reader::NsReader;
790 ///
791 /// let mut reader = NsReader::from_str(r#"
792 /// <outer xmlns="namespace 1">
793 /// <inner xmlns="namespace 2">
794 /// <outer></outer>
795 /// </inner>
796 /// <inner>
797 /// <inner></inner>
798 /// <inner/>
799 /// <outer></outer>
800 /// <p:outer xmlns:p="ns"></p:outer>
801 /// <outer/>
802 /// </inner>
803 /// </outer>
804 /// "#);
805 /// reader.config_mut().trim_text(true);
806 ///
807 /// let ns = Namespace(b"namespace 1");
808 /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5);
809 /// let end = start.to_end().into_owned();
810 ///
811 /// // First, we read a start event...
812 /// assert_eq!(
813 /// reader.read_resolved_event().unwrap(),
814 /// (ResolveResult::Bound(ns), Event::Start(start))
815 /// );
816 ///
817 /// // ...then, we could skip all events to the corresponding end event.
818 /// // This call will correctly handle nested <outer> elements.
819 /// // Note, however, that this method does not handle namespaces.
820 /// reader.read_to_end(end.name()).unwrap();
821 ///
822 /// // At the end we should get an Eof event, because we ate the whole XML
823 /// assert_eq!(
824 /// reader.read_resolved_event().unwrap(),
825 /// (ResolveResult::Unbound, Event::Eof)
826 /// );
827 /// ```
828 ///
829 /// [`Start`]: Event::Start
830 /// [`End`]: Event::End
831 /// [`IllFormed`]: crate::errors::Error::IllFormed
832 /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
833 /// [`expand_empty_elements`]: Config::expand_empty_elements
834 /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
835 #[inline]
836 pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
837 // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
838 // match literally the start name. See `Config::check_end_names` documentation
839 self.reader.read_to_end(end)
840 }
841
842 /// Reads content between start and end tags, including any markup. This
843 /// function is supposed to be called after you already read a [`Start`] event.
844 ///
845 /// Manages nested cases where parent and child elements have the _literally_
846 /// same name.
847 ///
848 /// This method does not unescape read data, instead it returns content
849 /// "as is" of the XML document. This is because it has no idea what text
850 /// it reads, and if, for example, it contains CDATA section, attempt to
851 /// unescape it content will spoil data.
852 ///
853 /// Any text will be decoded using the XML current [`decoder()`].
854 ///
855 /// Actually, this method perform the following code:
856 ///
857 /// ```ignore
858 /// let span = reader.read_to_end(end)?;
859 /// let text = reader.decoder().decode(&reader.inner_slice[span]);
860 /// ```
861 ///
862 /// # Examples
863 ///
864 /// This example shows, how you can read a HTML content from your XML document.
865 ///
866 /// ```
867 /// # use pretty_assertions::assert_eq;
868 /// # use std::borrow::Cow;
869 /// use quick_xml::events::{BytesStart, Event};
870 /// use quick_xml::reader::NsReader;
871 ///
872 /// let mut reader = NsReader::from_str(r#"
873 /// <html>
874 /// <title>This is a HTML text</title>
875 /// <p>Usual XML rules does not apply inside it
876 /// <p>For example, elements not needed to be "closed"
877 /// </html>
878 /// "#);
879 /// reader.config_mut().trim_text(true);
880 ///
881 /// let start = BytesStart::new("html");
882 /// let end = start.to_end().into_owned();
883 ///
884 /// // First, we read a start event...
885 /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
886 /// // ...and disable checking of end names because we expect HTML further...
887 /// reader.config_mut().check_end_names = false;
888 ///
889 /// // ...then, we could read text content until close tag.
890 /// // This call will correctly handle nested <html> elements.
891 /// let text = reader.read_text(end.name()).unwrap();
892 /// assert_eq!(text, Cow::Borrowed(r#"
893 /// <title>This is a HTML text</title>
894 /// <p>Usual XML rules does not apply inside it
895 /// <p>For example, elements not needed to be "closed"
896 /// "#));
897 ///
898 /// // Now we can enable checks again
899 /// reader.config_mut().check_end_names = true;
900 ///
901 /// // At the end we should get an Eof event, because we ate the whole XML
902 /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
903 /// ```
904 ///
905 /// [`Start`]: Event::Start
906 /// [`decoder()`]: Reader::decoder()
907 #[inline]
908 pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
909 self.reader.read_text(end)
910 }
911}
912
913impl<R> Deref for NsReader<R> {
914 type Target = Reader<R>;
915
916 #[inline]
917 fn deref(&self) -> &Self::Target {
918 &self.reader
919 }
920}