quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53 partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74/// <element a1 = 'val1' a2=\"val2\" />\
75/// <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93 /// content of the element, before any utf8 conversion
94 pub(crate) buf: Cow<'a, [u8]>,
95 /// end of the element name, the name starts at that the start of `buf`
96 pub(crate) name_len: usize,
97 /// Encoding used for `buf`
98 decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103 #[inline]
104 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105 BytesStart {
106 buf: Cow::Borrowed(content),
107 name_len,
108 decoder,
109 }
110 }
111
112 /// Creates a new `BytesStart` from the given name.
113 ///
114 /// # Warning
115 ///
116 /// `name` must be a valid name.
117 #[inline]
118 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119 let buf = str_cow_to_bytes(name);
120 BytesStart {
121 name_len: buf.len(),
122 buf,
123 decoder: Decoder::utf8(),
124 }
125 }
126
127 /// Creates a new `BytesStart` from the given content (name + attributes).
128 ///
129 /// # Warning
130 ///
131 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132 /// must be correctly-formed attributes. Neither are checked, it is possible
133 /// to generate invalid XML if `content` or `name_len` are incorrect.
134 #[inline]
135 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136 BytesStart {
137 buf: str_cow_to_bytes(content),
138 name_len,
139 decoder: Decoder::utf8(),
140 }
141 }
142
143 /// Converts the event into an owned event.
144 pub fn into_owned(self) -> BytesStart<'static> {
145 BytesStart {
146 buf: Cow::Owned(self.buf.into_owned()),
147 name_len: self.name_len,
148 decoder: self.decoder,
149 }
150 }
151
152 /// Converts the event into an owned event without taking ownership of Event
153 pub fn to_owned(&self) -> BytesStart<'static> {
154 BytesStart {
155 buf: Cow::Owned(self.buf.clone().into_owned()),
156 name_len: self.name_len,
157 decoder: self.decoder,
158 }
159 }
160
161 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// use quick_xml::events::{BytesStart, Event};
167 /// # use quick_xml::writer::Writer;
168 /// # use quick_xml::Error;
169 ///
170 /// struct SomeStruct<'a> {
171 /// attrs: BytesStart<'a>,
172 /// // ...
173 /// }
174 /// # impl<'a> SomeStruct<'a> {
175 /// # fn example(&self) -> Result<(), Error> {
176 /// # let mut writer = Writer::new(Vec::new());
177 ///
178 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179 /// // ...
180 /// writer.write_event(Event::End(self.attrs.to_end()))?;
181 /// # Ok(())
182 /// # }}
183 /// ```
184 ///
185 /// [`to_end`]: Self::to_end
186 pub fn borrow(&self) -> BytesStart<'_> {
187 BytesStart {
188 buf: Cow::Borrowed(&self.buf),
189 name_len: self.name_len,
190 decoder: self.decoder,
191 }
192 }
193
194 /// Creates new paired close tag
195 #[inline]
196 pub fn to_end(&self) -> BytesEnd<'_> {
197 BytesEnd::from(self.name())
198 }
199
200 /// Get the decoder, used to decode bytes, read by the reader which produces
201 /// this event, to the strings.
202 ///
203 /// When event was created manually, encoding is UTF-8.
204 ///
205 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206 /// defaults to UTF-8.
207 ///
208 /// [`encoding`]: ../index.html#encoding
209 #[inline]
210 pub const fn decoder(&self) -> Decoder {
211 self.decoder
212 }
213
214 /// Gets the undecoded raw tag name, as present in the input stream.
215 #[inline]
216 pub fn name(&self) -> QName<'_> {
217 QName(&self.buf[..self.name_len])
218 }
219
220 /// Gets the undecoded raw local tag name (excluding namespace) as present
221 /// in the input stream.
222 ///
223 /// All content up to and including the first `:` character is removed from the tag name.
224 #[inline]
225 pub fn local_name(&self) -> LocalName<'_> {
226 self.name().into()
227 }
228
229 /// Edit the name of the BytesStart in-place
230 ///
231 /// # Warning
232 ///
233 /// `name` must be a valid name.
234 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235 let bytes = self.buf.to_mut();
236 bytes.splice(..self.name_len, name.iter().cloned());
237 self.name_len = name.len();
238 self
239 }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245 ///
246 /// The yielded items must be convertible to [`Attribute`] using `Into`.
247 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248 where
249 I: IntoIterator,
250 I::Item: Into<Attribute<'b>>,
251 {
252 self.extend_attributes(attributes);
253 self
254 }
255
256 /// Add additional attributes to this tag using an iterator.
257 ///
258 /// The yielded items must be convertible to [`Attribute`] using `Into`.
259 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260 where
261 I: IntoIterator,
262 I::Item: Into<Attribute<'b>>,
263 {
264 for attr in attributes {
265 self.push_attribute(attr);
266 }
267 self
268 }
269
270 /// Adds an attribute to this element.
271 pub fn push_attribute<'b, A>(&mut self, attr: A)
272 where
273 A: Into<Attribute<'b>>,
274 {
275 self.buf.to_mut().push(b' ');
276 self.push_attr(attr.into());
277 }
278
279 /// Remove all attributes from the ByteStart
280 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281 self.buf.to_mut().truncate(self.name_len);
282 self
283 }
284
285 /// Returns an iterator over the attributes of this tag.
286 pub fn attributes(&self) -> Attributes<'_> {
287 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288 }
289
290 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291 pub fn html_attributes(&self) -> Attributes<'_> {
292 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293 }
294
295 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296 /// including the whitespace after the tag name if there is any.
297 #[inline]
298 pub fn attributes_raw(&self) -> &[u8] {
299 &self.buf[self.name_len..]
300 }
301
302 /// Try to get an attribute
303 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304 &'a self,
305 attr_name: N,
306 ) -> Result<Option<Attribute<'a>>, AttrError> {
307 for a in self.attributes().with_checks(false) {
308 let a = a?;
309 if a.key.as_ref() == attr_name.as_ref() {
310 return Ok(Some(a));
311 }
312 }
313 Ok(None)
314 }
315
316 /// Adds an attribute to this element.
317 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318 let bytes = self.buf.to_mut();
319 bytes.extend_from_slice(attr.key.as_ref());
320 bytes.extend_from_slice(b"=\"");
321 // FIXME: need to escape attribute content
322 bytes.extend_from_slice(attr.value.as_ref());
323 bytes.push(b'"');
324 }
325
326 /// Adds new line in existing element
327 pub(crate) fn push_newline(&mut self) {
328 self.buf.to_mut().push(b'\n');
329 }
330
331 /// Adds indentation bytes in existing element
332 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333 self.buf.to_mut().extend_from_slice(indent);
334 }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339 write!(f, "BytesStart {{ buf: ")?;
340 write_cow_string(f, &self.buf)?;
341 write!(f, ", name_len: {} }}", self.name_len)
342 }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346 type Target = [u8];
347
348 fn deref(&self) -> &[u8] {
349 &self.buf
350 }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356 let s = <&str>::arbitrary(u)?;
357 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358 return Err(arbitrary::Error::IncorrectFormat);
359 }
360 let mut result = Self::new(s);
361 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
362 Ok(result)
363 }
364
365 fn size_hint(depth: usize) -> (usize, Option<usize>) {
366 return <&str as arbitrary::Arbitrary>::size_hint(depth);
367 }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406 name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411 #[inline]
412 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413 BytesEnd { name }
414 }
415
416 /// Creates a new `BytesEnd` borrowing a slice.
417 ///
418 /// # Warning
419 ///
420 /// `name` must be a valid name.
421 #[inline]
422 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423 Self::wrap(str_cow_to_bytes(name))
424 }
425
426 /// Converts the event into an owned event.
427 pub fn into_owned(self) -> BytesEnd<'static> {
428 BytesEnd {
429 name: Cow::Owned(self.name.into_owned()),
430 }
431 }
432
433 /// Converts the event into a borrowed event.
434 #[inline]
435 pub fn borrow(&self) -> BytesEnd<'_> {
436 BytesEnd {
437 name: Cow::Borrowed(&self.name),
438 }
439 }
440
441 /// Gets the undecoded raw tag name, as present in the input stream.
442 #[inline]
443 pub fn name(&self) -> QName<'_> {
444 QName(&self.name)
445 }
446
447 /// Gets the undecoded raw local tag name (excluding namespace) as present
448 /// in the input stream.
449 ///
450 /// All content up to and including the first `:` character is removed from the tag name.
451 #[inline]
452 pub fn local_name(&self) -> LocalName<'_> {
453 self.name().into()
454 }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459 write!(f, "BytesEnd {{ name: ")?;
460 write_cow_string(f, &self.name)?;
461 write!(f, " }}")
462 }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466 type Target = [u8];
467
468 fn deref(&self) -> &[u8] {
469 &self.name
470 }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474 #[inline]
475 fn from(name: QName<'a>) -> Self {
476 Self::wrap(name.into_inner().into())
477 }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483 Ok(Self::new(<&str>::arbitrary(u)?))
484 }
485 fn size_hint(depth: usize) -> (usize, Option<usize>) {
486 return <&str as arbitrary::Arbitrary>::size_hint(depth);
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508/// <!DOCTYPE comment or text >\
509/// comment or text \
510/// <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525 /// Escaped then encoded content of the event. Content is encoded in the XML
526 /// document encoding when event comes from the reader and should be in the
527 /// document encoding when event passed to the writer
528 content: Cow<'a, [u8]>,
529 /// Encoding in which the `content` is stored inside the event
530 decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535 #[inline]
536 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537 Self {
538 content: content.into(),
539 decoder,
540 }
541 }
542
543 /// Creates a new `BytesText` from an escaped string.
544 #[inline]
545 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547 }
548
549 /// Creates a new `BytesText` from a string. The string is expected not to
550 /// be escaped.
551 #[inline]
552 pub fn new(content: &'a str) -> Self {
553 Self::from_escaped(escape(content))
554 }
555
556 /// Ensures that all data is owned to extend the object's lifetime if
557 /// necessary.
558 #[inline]
559 pub fn into_owned(self) -> BytesText<'static> {
560 BytesText {
561 content: self.content.into_owned().into(),
562 decoder: self.decoder,
563 }
564 }
565
566 /// Extracts the inner `Cow` from the `BytesText` event container.
567 #[inline]
568 pub fn into_inner(self) -> Cow<'a, [u8]> {
569 self.content
570 }
571
572 /// Converts the event into a borrowed event.
573 #[inline]
574 pub fn borrow(&self) -> BytesText<'_> {
575 BytesText {
576 content: Cow::Borrowed(&self.content),
577 decoder: self.decoder,
578 }
579 }
580
581 /// Decodes the content of the event.
582 ///
583 /// This will allocate if the value contains any escape sequences or in
584 /// non-UTF-8 encoding.
585 ///
586 /// This method does not normalizes end-of-line characters as required by [specification].
587 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588 ///
589 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591 self.decoder.decode_cow(&self.content)
592 }
593
594 /// Decodes the content of the XML 1.0 or HTML event.
595 ///
596 /// When this event produced by the reader, it uses the encoding information
597 /// associated with that reader to interpret the raw bytes contained within
598 /// this text event.
599 ///
600 /// This will allocate if the value contains any escape sequences or in non-UTF-8
601 /// encoding, or EOL normalization is required.
602 ///
603 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
604 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
605 ///
606 /// This method also can be used to get HTML content, because rules the same.
607 ///
608 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
609 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
610 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
611 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
612 self.decoder.content(&self.content, normalize_xml10_eols)
613 }
614
615 /// Decodes the content of the XML 1.1 event.
616 ///
617 /// When this event produced by the reader, it uses the encoding information
618 /// associated with that reader to interpret the raw bytes contained within
619 /// this text event.
620 ///
621 /// This will allocate if the value contains any escape sequences or in non-UTF-8
622 /// encoding, or EOL normalization is required.
623 ///
624 /// Note, that this method should be used only if event represents XML 1.1 content,
625 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
626 ///
627 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
628 ///
629 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
630 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
631 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
632 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
633 self.decoder.content(&self.content, normalize_xml11_eols)
634 }
635
636 /// Alias for [`xml11_content()`](Self::xml11_content).
637 #[inline]
638 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
639 self.xml11_content()
640 }
641
642 /// Alias for [`xml10_content()`](Self::xml10_content).
643 #[inline]
644 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
645 self.xml10_content()
646 }
647
648 /// Removes leading XML whitespace bytes from text content.
649 ///
650 /// Returns `true` if content is empty after that
651 pub fn inplace_trim_start(&mut self) -> bool {
652 self.content = trim_cow(
653 replace(&mut self.content, Cow::Borrowed(b"")),
654 trim_xml_start,
655 );
656 self.content.is_empty()
657 }
658
659 /// Removes trailing XML whitespace bytes from text content.
660 ///
661 /// Returns `true` if content is empty after that
662 pub fn inplace_trim_end(&mut self) -> bool {
663 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
664 self.content.is_empty()
665 }
666}
667
668impl<'a> Debug for BytesText<'a> {
669 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
670 write!(f, "BytesText {{ content: ")?;
671 write_cow_string(f, &self.content)?;
672 write!(f, " }}")
673 }
674}
675
676impl<'a> Deref for BytesText<'a> {
677 type Target = [u8];
678
679 fn deref(&self) -> &[u8] {
680 &self.content
681 }
682}
683
684#[cfg(feature = "arbitrary")]
685impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
686 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
687 let s = <&str>::arbitrary(u)?;
688 if !s.chars().all(char::is_alphanumeric) {
689 return Err(arbitrary::Error::IncorrectFormat);
690 }
691 Ok(Self::new(s))
692 }
693
694 fn size_hint(depth: usize) -> (usize, Option<usize>) {
695 return <&str as arbitrary::Arbitrary>::size_hint(depth);
696 }
697}
698
699////////////////////////////////////////////////////////////////////////////////////////////////////
700
701/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
702/// [convert](Self::escape) it to [`BytesText`].
703///
704/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
705/// returns the content of this event between `<![CDATA[` and `]]>`.
706///
707/// Note, that inner text will not contain `]]>` sequence inside:
708///
709/// ```
710/// # use quick_xml::events::{BytesCData, Event};
711/// # use quick_xml::reader::Reader;
712/// # use pretty_assertions::assert_eq;
713/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
714/// let content = " CDATA section ";
715/// let event = BytesCData::new(content);
716///
717/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
718/// // deref coercion of &BytesCData to &[u8]
719/// assert_eq!(&event as &[u8], content.as_bytes());
720/// // AsRef<[u8]> for &T + deref coercion
721/// assert_eq!(event.as_ref(), content.as_bytes());
722/// ```
723#[derive(Clone, Eq, PartialEq)]
724pub struct BytesCData<'a> {
725 content: Cow<'a, [u8]>,
726 /// Encoding in which the `content` is stored inside the event
727 decoder: Decoder,
728}
729
730impl<'a> BytesCData<'a> {
731 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
732 #[inline]
733 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
734 Self {
735 content: content.into(),
736 decoder,
737 }
738 }
739
740 /// Creates a new `BytesCData` from a string.
741 ///
742 /// # Warning
743 ///
744 /// `content` must not contain the `]]>` sequence. You can use
745 /// [`BytesCData::escaped`] to escape the content instead.
746 #[inline]
747 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
748 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
749 }
750
751 /// Creates an iterator of `BytesCData` from a string.
752 ///
753 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
754 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
755 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
756 /// for each of those sections.
757 ///
758 /// # Examples
759 ///
760 /// ```
761 /// # use quick_xml::events::BytesCData;
762 /// # use pretty_assertions::assert_eq;
763 /// let content = "";
764 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
765 /// assert_eq!(cdata, &[BytesCData::new("")]);
766 ///
767 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
768 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
769 /// assert_eq!(cdata, &[
770 /// BytesCData::new("Certain tokens like ]]"),
771 /// BytesCData::new("> can be difficult and <invalid>"),
772 /// ]);
773 ///
774 /// let content = "foo]]>bar]]>baz]]>quux";
775 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
776 /// assert_eq!(cdata, &[
777 /// BytesCData::new("foo]]"),
778 /// BytesCData::new(">bar]]"),
779 /// BytesCData::new(">baz]]"),
780 /// BytesCData::new(">quux"),
781 /// ]);
782 /// ```
783 #[inline]
784 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
785 CDataIterator {
786 unprocessed: content.as_bytes(),
787 finished: false,
788 }
789 }
790
791 /// Ensures that all data is owned to extend the object's lifetime if
792 /// necessary.
793 #[inline]
794 pub fn into_owned(self) -> BytesCData<'static> {
795 BytesCData {
796 content: self.content.into_owned().into(),
797 decoder: self.decoder,
798 }
799 }
800
801 /// Extracts the inner `Cow` from the `BytesCData` event container.
802 #[inline]
803 pub fn into_inner(self) -> Cow<'a, [u8]> {
804 self.content
805 }
806
807 /// Converts the event into a borrowed event.
808 #[inline]
809 pub fn borrow(&self) -> BytesCData<'_> {
810 BytesCData {
811 content: Cow::Borrowed(&self.content),
812 decoder: self.decoder,
813 }
814 }
815
816 /// Converts this CDATA content to an escaped version, that can be written
817 /// as an usual text in XML.
818 ///
819 /// This function performs following replacements:
820 ///
821 /// | Character | Replacement
822 /// |-----------|------------
823 /// | `<` | `<`
824 /// | `>` | `>`
825 /// | `&` | `&`
826 /// | `'` | `'`
827 /// | `"` | `"`
828 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
829 let decoded = self.decode()?;
830 Ok(BytesText::wrap(
831 match escape(decoded) {
832 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
833 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
834 },
835 Decoder::utf8(),
836 ))
837 }
838
839 /// Converts this CDATA content to an escaped version, that can be written
840 /// as an usual text in XML.
841 ///
842 /// In XML text content, it is allowed (though not recommended) to leave
843 /// the quote special characters `"` and `'` unescaped.
844 ///
845 /// This function performs following replacements:
846 ///
847 /// | Character | Replacement
848 /// |-----------|------------
849 /// | `<` | `<`
850 /// | `>` | `>`
851 /// | `&` | `&`
852 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
853 let decoded = self.decode()?;
854 Ok(BytesText::wrap(
855 match partial_escape(decoded) {
856 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
857 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
858 },
859 Decoder::utf8(),
860 ))
861 }
862
863 /// Converts this CDATA content to an escaped version, that can be written
864 /// as an usual text in XML. This method escapes only those characters that
865 /// must be escaped according to the [specification].
866 ///
867 /// This function performs following replacements:
868 ///
869 /// | Character | Replacement
870 /// |-----------|------------
871 /// | `<` | `<`
872 /// | `&` | `&`
873 ///
874 /// [specification]: https://www.w3.org/TR/xml11/#syntax
875 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
876 let decoded = self.decode()?;
877 Ok(BytesText::wrap(
878 match minimal_escape(decoded) {
879 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
880 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
881 },
882 Decoder::utf8(),
883 ))
884 }
885
886 /// Decodes the raw input byte content of the CDATA section into a string,
887 /// without performing XML entity escaping.
888 ///
889 /// When this event produced by the XML reader, it uses the encoding information
890 /// associated with that reader to interpret the raw bytes contained within this
891 /// CDATA event.
892 ///
893 /// This method does not normalizes end-of-line characters as required by [specification].
894 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
895 ///
896 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
897 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
898 self.decoder.decode_cow(&self.content)
899 }
900
901 /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
902 /// HTML event into a string.
903 ///
904 /// When this event produced by the reader, it uses the encoding information
905 /// associated with that reader to interpret the raw bytes contained within
906 /// this CDATA event.
907 ///
908 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
909 /// is required.
910 ///
911 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
912 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
913 ///
914 /// This method also can be used to get HTML content, because rules the same.
915 ///
916 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
917 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
918 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
919 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
920 self.decoder.content(&self.content, normalize_xml10_eols)
921 }
922
923 /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
924 /// into a string.
925 ///
926 /// When this event produced by the reader, it uses the encoding information
927 /// associated with that reader to interpret the raw bytes contained within
928 /// this CDATA event.
929 ///
930 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
931 /// is required.
932 ///
933 /// Note, that this method should be used only if event represents XML 1.1 content,
934 /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
935 ///
936 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
937 ///
938 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
939 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
940 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
941 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
942 self.decoder.content(&self.content, normalize_xml11_eols)
943 }
944
945 /// Alias for [`xml11_content()`](Self::xml11_content).
946 #[inline]
947 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
948 self.xml11_content()
949 }
950
951 /// Alias for [`xml10_content()`](Self::xml10_content).
952 #[inline]
953 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
954 self.xml10_content()
955 }
956}
957
958impl<'a> Debug for BytesCData<'a> {
959 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
960 write!(f, "BytesCData {{ content: ")?;
961 write_cow_string(f, &self.content)?;
962 write!(f, " }}")
963 }
964}
965
966impl<'a> Deref for BytesCData<'a> {
967 type Target = [u8];
968
969 fn deref(&self) -> &[u8] {
970 &self.content
971 }
972}
973
974#[cfg(feature = "arbitrary")]
975impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
976 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
977 Ok(Self::new(<&str>::arbitrary(u)?))
978 }
979 fn size_hint(depth: usize) -> (usize, Option<usize>) {
980 return <&str as arbitrary::Arbitrary>::size_hint(depth);
981 }
982}
983
984/// Iterator over `CDATA` sections in a string.
985///
986/// This iterator is created by the [`BytesCData::escaped`] method.
987#[derive(Clone)]
988pub struct CDataIterator<'a> {
989 /// The unprocessed data which should be emitted as `BytesCData` events.
990 /// At each iteration, the processed data is cut from this slice.
991 unprocessed: &'a [u8],
992 finished: bool,
993}
994
995impl<'a> Debug for CDataIterator<'a> {
996 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
997 f.debug_struct("CDataIterator")
998 .field("unprocessed", &Bytes(self.unprocessed))
999 .field("finished", &self.finished)
1000 .finish()
1001 }
1002}
1003
1004impl<'a> Iterator for CDataIterator<'a> {
1005 type Item = BytesCData<'a>;
1006
1007 fn next(&mut self) -> Option<BytesCData<'a>> {
1008 if self.finished {
1009 return None;
1010 }
1011
1012 for gt in memchr::memchr_iter(b'>', self.unprocessed) {
1013 if self.unprocessed[..gt].ends_with(b"]]") {
1014 let (slice, rest) = self.unprocessed.split_at(gt);
1015 self.unprocessed = rest;
1016 return Some(BytesCData::wrap(slice, Decoder::utf8()));
1017 }
1018 }
1019
1020 self.finished = true;
1021 Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
1022 }
1023}
1024
1025impl FusedIterator for CDataIterator<'_> {}
1026
1027////////////////////////////////////////////////////////////////////////////////////////////////////
1028
1029/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1030///
1031/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1032/// returns the content of this event between `<?` and `?>`.
1033///
1034/// Note, that inner text will not contain `?>` sequence inside:
1035///
1036/// ```
1037/// # use quick_xml::events::{BytesPI, Event};
1038/// # use quick_xml::reader::Reader;
1039/// # use pretty_assertions::assert_eq;
1040/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1041/// let content = "processing instruction >:-<~ ";
1042/// let event = BytesPI::new(content);
1043///
1044/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1045/// // deref coercion of &BytesPI to &[u8]
1046/// assert_eq!(&event as &[u8], content.as_bytes());
1047/// // AsRef<[u8]> for &T + deref coercion
1048/// assert_eq!(event.as_ref(), content.as_bytes());
1049/// ```
1050///
1051/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1052#[derive(Clone, Eq, PartialEq)]
1053pub struct BytesPI<'a> {
1054 content: BytesStart<'a>,
1055}
1056
1057impl<'a> BytesPI<'a> {
1058 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1059 #[inline]
1060 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1061 Self {
1062 content: BytesStart::wrap(content, target_len, decoder),
1063 }
1064 }
1065
1066 /// Creates a new `BytesPI` from a string.
1067 ///
1068 /// # Warning
1069 ///
1070 /// `content` must not contain the `?>` sequence.
1071 #[inline]
1072 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1073 let buf = str_cow_to_bytes(content);
1074 let name_len = name_len(&buf);
1075 Self {
1076 content: BytesStart {
1077 buf,
1078 name_len,
1079 decoder: Decoder::utf8(),
1080 },
1081 }
1082 }
1083
1084 /// Ensures that all data is owned to extend the object's lifetime if
1085 /// necessary.
1086 #[inline]
1087 pub fn into_owned(self) -> BytesPI<'static> {
1088 BytesPI {
1089 content: self.content.into_owned().into(),
1090 }
1091 }
1092
1093 /// Extracts the inner `Cow` from the `BytesPI` event container.
1094 #[inline]
1095 pub fn into_inner(self) -> Cow<'a, [u8]> {
1096 self.content.buf
1097 }
1098
1099 /// Converts the event into a borrowed event.
1100 #[inline]
1101 pub fn borrow(&self) -> BytesPI<'_> {
1102 BytesPI {
1103 content: self.content.borrow(),
1104 }
1105 }
1106
1107 /// A target used to identify the application to which the instruction is directed.
1108 ///
1109 /// # Example
1110 ///
1111 /// ```
1112 /// # use pretty_assertions::assert_eq;
1113 /// use quick_xml::events::BytesPI;
1114 ///
1115 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1116 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1117 /// ```
1118 #[inline]
1119 pub fn target(&self) -> &[u8] {
1120 self.content.name().0
1121 }
1122
1123 /// Content of the processing instruction. Contains everything between target
1124 /// name and the end of the instruction. A direct consequence is that the first
1125 /// character is always a space character.
1126 ///
1127 /// # Example
1128 ///
1129 /// ```
1130 /// # use pretty_assertions::assert_eq;
1131 /// use quick_xml::events::BytesPI;
1132 ///
1133 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1134 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1135 /// ```
1136 #[inline]
1137 pub fn content(&self) -> &[u8] {
1138 self.content.attributes_raw()
1139 }
1140
1141 /// A view of the processing instructions' content as a list of key-value pairs.
1142 ///
1143 /// Key-value pairs are used in some processing instructions, for example in
1144 /// `<?xml-stylesheet?>`.
1145 ///
1146 /// Returned iterator does not validate attribute values as may required by
1147 /// target's rules. For example, it doesn't check that substring `?>` is not
1148 /// present in the attribute value. That shouldn't be the problem when event
1149 /// is produced by the reader, because reader detects end of processing instruction
1150 /// by the first `?>` sequence, as required by the specification, and therefore
1151 /// this sequence cannot appear inside it.
1152 ///
1153 /// # Example
1154 ///
1155 /// ```
1156 /// # use pretty_assertions::assert_eq;
1157 /// use std::borrow::Cow;
1158 /// use quick_xml::events::attributes::Attribute;
1159 /// use quick_xml::events::BytesPI;
1160 /// use quick_xml::name::QName;
1161 ///
1162 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1163 /// for attr in instruction.attributes() {
1164 /// assert_eq!(attr, Ok(Attribute {
1165 /// key: QName(b"href"),
1166 /// value: Cow::Borrowed(b"style.css"),
1167 /// }));
1168 /// }
1169 /// ```
1170 #[inline]
1171 pub fn attributes(&self) -> Attributes<'_> {
1172 self.content.attributes()
1173 }
1174}
1175
1176impl<'a> Debug for BytesPI<'a> {
1177 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1178 write!(f, "BytesPI {{ content: ")?;
1179 write_cow_string(f, &self.content.buf)?;
1180 write!(f, " }}")
1181 }
1182}
1183
1184impl<'a> Deref for BytesPI<'a> {
1185 type Target = [u8];
1186
1187 fn deref(&self) -> &[u8] {
1188 &self.content
1189 }
1190}
1191
1192#[cfg(feature = "arbitrary")]
1193impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1194 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1195 Ok(Self::new(<&str>::arbitrary(u)?))
1196 }
1197 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1198 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1199 }
1200}
1201
1202////////////////////////////////////////////////////////////////////////////////////////////////////
1203
1204/// An XML declaration (`Event::Decl`).
1205///
1206/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1207///
1208/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1209/// returns the content of this event between `<?` and `?>`.
1210///
1211/// Note, that inner text will not contain `?>` sequence inside:
1212///
1213/// ```
1214/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1215/// # use quick_xml::reader::Reader;
1216/// # use pretty_assertions::assert_eq;
1217/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1218/// let content = "xml version = '1.0' ";
1219/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1220///
1221/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1222/// // deref coercion of &BytesDecl to &[u8]
1223/// assert_eq!(&event as &[u8], content.as_bytes());
1224/// // AsRef<[u8]> for &T + deref coercion
1225/// assert_eq!(event.as_ref(), content.as_bytes());
1226/// ```
1227#[derive(Clone, Debug, Eq, PartialEq)]
1228pub struct BytesDecl<'a> {
1229 content: BytesStart<'a>,
1230}
1231
1232impl<'a> BytesDecl<'a> {
1233 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1234 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1235 /// attribute.
1236 ///
1237 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1238 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1239 /// the double quote character is not allowed in any of the attribute values.
1240 pub fn new(
1241 version: &str,
1242 encoding: Option<&str>,
1243 standalone: Option<&str>,
1244 ) -> BytesDecl<'static> {
1245 // Compute length of the buffer based on supplied attributes
1246 // ' encoding=""' => 12
1247 let encoding_attr_len = if let Some(xs) = encoding {
1248 12 + xs.len()
1249 } else {
1250 0
1251 };
1252 // ' standalone=""' => 14
1253 let standalone_attr_len = if let Some(xs) = standalone {
1254 14 + xs.len()
1255 } else {
1256 0
1257 };
1258 // 'xml version=""' => 14
1259 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1260
1261 buf.push_str("xml version=\"");
1262 buf.push_str(version);
1263
1264 if let Some(encoding_val) = encoding {
1265 buf.push_str("\" encoding=\"");
1266 buf.push_str(encoding_val);
1267 }
1268
1269 if let Some(standalone_val) = standalone {
1270 buf.push_str("\" standalone=\"");
1271 buf.push_str(standalone_val);
1272 }
1273 buf.push('"');
1274
1275 BytesDecl {
1276 content: BytesStart::from_content(buf, 3),
1277 }
1278 }
1279
1280 /// Creates a `BytesDecl` from a `BytesStart`
1281 pub const fn from_start(start: BytesStart<'a>) -> Self {
1282 Self { content: start }
1283 }
1284
1285 /// Gets xml version, excluding quotes (`'` or `"`).
1286 ///
1287 /// According to the [grammar], the version *must* be the first thing in the declaration.
1288 /// This method tries to extract the first thing in the declaration and return it.
1289 /// In case of multiple attributes value of the first one is returned.
1290 ///
1291 /// If version is missed in the declaration, or the first thing is not a version,
1292 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1293 ///
1294 /// # Examples
1295 ///
1296 /// ```
1297 /// use quick_xml::errors::{Error, IllFormedError};
1298 /// use quick_xml::events::{BytesDecl, BytesStart};
1299 ///
1300 /// // <?xml version='1.1'?>
1301 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1302 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1303 ///
1304 /// // <?xml version='1.0' version='1.1'?>
1305 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1306 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1307 ///
1308 /// // <?xml encoding='utf-8'?>
1309 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1310 /// match decl.version() {
1311 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1312 /// _ => assert!(false),
1313 /// }
1314 ///
1315 /// // <?xml encoding='utf-8' version='1.1'?>
1316 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1317 /// match decl.version() {
1318 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1319 /// _ => assert!(false),
1320 /// }
1321 ///
1322 /// // <?xml?>
1323 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1324 /// match decl.version() {
1325 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1326 /// _ => assert!(false),
1327 /// }
1328 /// ```
1329 ///
1330 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1331 pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1332 // The version *must* be the first thing in the declaration.
1333 match self.content.attributes().with_checks(false).next() {
1334 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1335 // first attribute was not "version"
1336 Some(Ok(a)) => {
1337 let found = from_utf8(a.key.as_ref())
1338 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1339 .to_string();
1340 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1341 found,
1342 ))))
1343 }
1344 // error parsing attributes
1345 Some(Err(e)) => Err(e.into()),
1346 // no attributes
1347 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1348 }
1349 }
1350
1351 /// Gets xml encoding, excluding quotes (`'` or `"`).
1352 ///
1353 /// Although according to the [grammar] encoding must appear before `"standalone"`
1354 /// and after `"version"`, this method does not check that. The first occurrence
1355 /// of the attribute will be returned even if there are several. Also, method does
1356 /// not restrict symbols that can forming the encoding, so the returned encoding
1357 /// name may not correspond to the grammar.
1358 ///
1359 /// # Examples
1360 ///
1361 /// ```
1362 /// use std::borrow::Cow;
1363 /// use quick_xml::Error;
1364 /// use quick_xml::events::{BytesDecl, BytesStart};
1365 ///
1366 /// // <?xml version='1.1'?>
1367 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1368 /// assert!(decl.encoding().is_none());
1369 ///
1370 /// // <?xml encoding='utf-8'?>
1371 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1372 /// match decl.encoding() {
1373 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1374 /// _ => assert!(false),
1375 /// }
1376 ///
1377 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1378 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1379 /// match decl.encoding() {
1380 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1381 /// _ => assert!(false),
1382 /// }
1383 /// ```
1384 ///
1385 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1386 pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1387 self.content
1388 .try_get_attribute("encoding")
1389 .map(|a| a.map(|a| a.value))
1390 .transpose()
1391 }
1392
1393 /// Gets xml standalone, excluding quotes (`'` or `"`).
1394 ///
1395 /// Although according to the [grammar] standalone flag must appear after `"version"`
1396 /// and `"encoding"`, this method does not check that. The first occurrence of the
1397 /// attribute will be returned even if there are several. Also, method does not
1398 /// restrict symbols that can forming the value, so the returned flag name may not
1399 /// correspond to the grammar.
1400 ///
1401 /// # Examples
1402 ///
1403 /// ```
1404 /// use std::borrow::Cow;
1405 /// use quick_xml::Error;
1406 /// use quick_xml::events::{BytesDecl, BytesStart};
1407 ///
1408 /// // <?xml version='1.1'?>
1409 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1410 /// assert!(decl.standalone().is_none());
1411 ///
1412 /// // <?xml standalone='yes'?>
1413 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1414 /// match decl.standalone() {
1415 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1416 /// _ => assert!(false),
1417 /// }
1418 ///
1419 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1420 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1421 /// match decl.standalone() {
1422 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1423 /// _ => assert!(false),
1424 /// }
1425 /// ```
1426 ///
1427 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1428 pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1429 self.content
1430 .try_get_attribute("standalone")
1431 .map(|a| a.map(|a| a.value))
1432 .transpose()
1433 }
1434
1435 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1436 /// algorithm.
1437 ///
1438 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1439 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1440 /// one, is returned.
1441 #[cfg(feature = "encoding")]
1442 pub fn encoder(&self) -> Option<&'static Encoding> {
1443 self.encoding()
1444 .and_then(|e| e.ok())
1445 .and_then(|e| Encoding::for_label(&e))
1446 }
1447
1448 /// Converts the event into an owned event.
1449 pub fn into_owned(self) -> BytesDecl<'static> {
1450 BytesDecl {
1451 content: self.content.into_owned(),
1452 }
1453 }
1454
1455 /// Converts the event into a borrowed event.
1456 #[inline]
1457 pub fn borrow(&self) -> BytesDecl<'_> {
1458 BytesDecl {
1459 content: self.content.borrow(),
1460 }
1461 }
1462}
1463
1464impl<'a> Deref for BytesDecl<'a> {
1465 type Target = [u8];
1466
1467 fn deref(&self) -> &[u8] {
1468 &self.content
1469 }
1470}
1471
1472#[cfg(feature = "arbitrary")]
1473impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1474 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1475 Ok(Self::new(
1476 <&str>::arbitrary(u)?,
1477 Option::<&str>::arbitrary(u)?,
1478 Option::<&str>::arbitrary(u)?,
1479 ))
1480 }
1481
1482 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1483 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1484 }
1485}
1486
1487////////////////////////////////////////////////////////////////////////////////////////////////////
1488
1489/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1490///
1491/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1492/// returns the content of this event between `&` and `;`:
1493///
1494/// ```
1495/// # use quick_xml::events::{BytesRef, Event};
1496/// # use quick_xml::reader::Reader;
1497/// # use pretty_assertions::assert_eq;
1498/// let mut reader = Reader::from_str(r#"&entity;"#);
1499/// let content = "entity";
1500/// let event = BytesRef::new(content);
1501///
1502/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1503/// // deref coercion of &BytesRef to &[u8]
1504/// assert_eq!(&event as &[u8], content.as_bytes());
1505/// // AsRef<[u8]> for &T + deref coercion
1506/// assert_eq!(event.as_ref(), content.as_bytes());
1507/// ```
1508#[derive(Clone, Eq, PartialEq)]
1509pub struct BytesRef<'a> {
1510 content: Cow<'a, [u8]>,
1511 /// Encoding in which the `content` is stored inside the event.
1512 decoder: Decoder,
1513}
1514
1515impl<'a> BytesRef<'a> {
1516 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1517 #[inline]
1518 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1519 Self {
1520 content: Cow::Borrowed(content),
1521 decoder,
1522 }
1523 }
1524
1525 /// Creates a new `BytesRef` borrowing a slice.
1526 ///
1527 /// # Warning
1528 ///
1529 /// `name` must be a valid name.
1530 #[inline]
1531 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1532 Self {
1533 content: str_cow_to_bytes(name),
1534 decoder: Decoder::utf8(),
1535 }
1536 }
1537
1538 /// Converts the event into an owned event.
1539 pub fn into_owned(self) -> BytesRef<'static> {
1540 BytesRef {
1541 content: Cow::Owned(self.content.into_owned()),
1542 decoder: self.decoder,
1543 }
1544 }
1545
1546 /// Extracts the inner `Cow` from the `BytesRef` event container.
1547 #[inline]
1548 pub fn into_inner(self) -> Cow<'a, [u8]> {
1549 self.content
1550 }
1551
1552 /// Converts the event into a borrowed event.
1553 #[inline]
1554 pub fn borrow(&self) -> BytesRef<'_> {
1555 BytesRef {
1556 content: Cow::Borrowed(&self.content),
1557 decoder: self.decoder,
1558 }
1559 }
1560
1561 /// Decodes the content of the event.
1562 ///
1563 /// This will allocate if the value contains any escape sequences or in
1564 /// non-UTF-8 encoding.
1565 ///
1566 /// This method does not normalizes end-of-line characters as required by [specification].
1567 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1568 ///
1569 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1570 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1571 self.decoder.decode_cow(&self.content)
1572 }
1573
1574 /// Decodes the content of the XML 1.0 or HTML event.
1575 ///
1576 /// When this event produced by the reader, it uses the encoding information
1577 /// associated with that reader to interpret the raw bytes contained within
1578 /// this general reference event.
1579 ///
1580 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1581 /// is required.
1582 ///
1583 /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1584 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1585 ///
1586 /// This method also can be used to get HTML content, because rules the same.
1587 ///
1588 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1589 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1590 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1591 pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1592 self.decoder.content(&self.content, normalize_xml10_eols)
1593 }
1594
1595 /// Decodes the content of the XML 1.1 event.
1596 ///
1597 /// When this event produced by the reader, it uses the encoding information
1598 /// associated with that reader to interpret the raw bytes contained within
1599 /// this general reference event.
1600 ///
1601 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1602 /// is required.
1603 ///
1604 /// Note, that this method should be used only if event represents XML 1.1 content,
1605 /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1606 ///
1607 /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1608 ///
1609 /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1610 /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1611 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1612 pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1613 self.decoder.content(&self.content, normalize_xml11_eols)
1614 }
1615
1616 /// Alias for [`xml11_content()`](Self::xml11_content).
1617 #[inline]
1618 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1619 self.xml11_content()
1620 }
1621
1622 /// Alias for [`xml10_content()`](Self::xml10_content).
1623 #[inline]
1624 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1625 self.xml10_content()
1626 }
1627
1628 /// Returns `true` if the specified reference represents the character reference
1629 /// (`&#<number>;`).
1630 ///
1631 /// ```
1632 /// # use quick_xml::events::BytesRef;
1633 /// # use pretty_assertions::assert_eq;
1634 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1635 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1636 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1637 /// ```
1638 pub fn is_char_ref(&self) -> bool {
1639 matches!(self.content.first(), Some(b'#'))
1640 }
1641
1642 /// If this reference represents character reference, then resolves it and
1643 /// returns the character, otherwise returns `None`.
1644 ///
1645 /// This method does not check if character is allowed for XML, in other words,
1646 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1647 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1648 ///
1649 /// ```
1650 /// # use quick_xml::events::BytesRef;
1651 /// # use pretty_assertions::assert_eq;
1652 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1653 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1654 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1655 /// ```
1656 ///
1657 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1658 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1659 if let Some(num) = self.decode()?.strip_prefix('#') {
1660 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1661 return Ok(Some(ch));
1662 }
1663 Ok(None)
1664 }
1665}
1666
1667impl<'a> Debug for BytesRef<'a> {
1668 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1669 write!(f, "BytesRef {{ content: ")?;
1670 write_cow_string(f, &self.content)?;
1671 write!(f, " }}")
1672 }
1673}
1674
1675impl<'a> Deref for BytesRef<'a> {
1676 type Target = [u8];
1677
1678 fn deref(&self) -> &[u8] {
1679 &self.content
1680 }
1681}
1682
1683#[cfg(feature = "arbitrary")]
1684impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1685 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1686 Ok(Self::new(<&str>::arbitrary(u)?))
1687 }
1688
1689 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1690 <&str as arbitrary::Arbitrary>::size_hint(depth)
1691 }
1692}
1693
1694////////////////////////////////////////////////////////////////////////////////////////////////////
1695
1696/// Event emitted by [`Reader::read_event_into`].
1697///
1698/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1699#[derive(Clone, Debug, Eq, PartialEq)]
1700#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1701pub enum Event<'a> {
1702 /// Start tag (with attributes) `<tag attr="value">`.
1703 Start(BytesStart<'a>),
1704 /// End tag `</tag>`.
1705 End(BytesEnd<'a>),
1706 /// Empty element tag (with attributes) `<tag attr="value" />`.
1707 Empty(BytesStart<'a>),
1708 /// Escaped character data between tags.
1709 Text(BytesText<'a>),
1710 /// Unescaped character data stored in `<![CDATA[...]]>`.
1711 CData(BytesCData<'a>),
1712 /// Comment `<!-- ... -->`.
1713 Comment(BytesText<'a>),
1714 /// XML declaration `<?xml ...?>`.
1715 Decl(BytesDecl<'a>),
1716 /// Processing instruction `<?...?>`.
1717 PI(BytesPI<'a>),
1718 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1719 DocType(BytesText<'a>),
1720 /// General reference `&entity;` in the textual data. Can be either an entity
1721 /// reference, or a character reference.
1722 GeneralRef(BytesRef<'a>),
1723 /// End of XML document.
1724 Eof,
1725}
1726
1727impl<'a> Event<'a> {
1728 /// Converts the event to an owned version, untied to the lifetime of
1729 /// buffer used when reading but incurring a new, separate allocation.
1730 pub fn into_owned(self) -> Event<'static> {
1731 match self {
1732 Event::Start(e) => Event::Start(e.into_owned()),
1733 Event::End(e) => Event::End(e.into_owned()),
1734 Event::Empty(e) => Event::Empty(e.into_owned()),
1735 Event::Text(e) => Event::Text(e.into_owned()),
1736 Event::Comment(e) => Event::Comment(e.into_owned()),
1737 Event::CData(e) => Event::CData(e.into_owned()),
1738 Event::Decl(e) => Event::Decl(e.into_owned()),
1739 Event::PI(e) => Event::PI(e.into_owned()),
1740 Event::DocType(e) => Event::DocType(e.into_owned()),
1741 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1742 Event::Eof => Event::Eof,
1743 }
1744 }
1745
1746 /// Converts the event into a borrowed event.
1747 #[inline]
1748 pub fn borrow(&self) -> Event<'_> {
1749 match self {
1750 Event::Start(e) => Event::Start(e.borrow()),
1751 Event::End(e) => Event::End(e.borrow()),
1752 Event::Empty(e) => Event::Empty(e.borrow()),
1753 Event::Text(e) => Event::Text(e.borrow()),
1754 Event::Comment(e) => Event::Comment(e.borrow()),
1755 Event::CData(e) => Event::CData(e.borrow()),
1756 Event::Decl(e) => Event::Decl(e.borrow()),
1757 Event::PI(e) => Event::PI(e.borrow()),
1758 Event::DocType(e) => Event::DocType(e.borrow()),
1759 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1760 Event::Eof => Event::Eof,
1761 }
1762 }
1763}
1764
1765impl<'a> Deref for Event<'a> {
1766 type Target = [u8];
1767
1768 fn deref(&self) -> &[u8] {
1769 match *self {
1770 Event::Start(ref e) | Event::Empty(ref e) => e,
1771 Event::End(ref e) => e,
1772 Event::Text(ref e) => e,
1773 Event::Decl(ref e) => e,
1774 Event::PI(ref e) => e,
1775 Event::CData(ref e) => e,
1776 Event::Comment(ref e) => e,
1777 Event::DocType(ref e) => e,
1778 Event::GeneralRef(ref e) => e,
1779 Event::Eof => &[],
1780 }
1781 }
1782}
1783
1784impl<'a> AsRef<Event<'a>> for Event<'a> {
1785 fn as_ref(&self) -> &Event<'a> {
1786 self
1787 }
1788}
1789
1790////////////////////////////////////////////////////////////////////////////////////////////////////
1791
1792#[inline]
1793fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1794 match content.into() {
1795 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1796 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1797 }
1798}
1799
1800fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1801where
1802 F: FnOnce(&[u8]) -> &[u8],
1803{
1804 match value {
1805 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1806 Cow::Owned(mut bytes) => {
1807 let trimmed = trim(&bytes);
1808 if trimmed.len() != bytes.len() {
1809 bytes = trimmed.to_vec();
1810 }
1811 Cow::Owned(bytes)
1812 }
1813 }
1814}
1815
1816#[cfg(test)]
1817mod test {
1818 use super::*;
1819 use pretty_assertions::assert_eq;
1820
1821 #[test]
1822 fn bytestart_create() {
1823 let b = BytesStart::new("test");
1824 assert_eq!(b.len(), 4);
1825 assert_eq!(b.name(), QName(b"test"));
1826 }
1827
1828 #[test]
1829 fn bytestart_set_name() {
1830 let mut b = BytesStart::new("test");
1831 assert_eq!(b.len(), 4);
1832 assert_eq!(b.name(), QName(b"test"));
1833 assert_eq!(b.attributes_raw(), b"");
1834 b.push_attribute(("x", "a"));
1835 assert_eq!(b.len(), 10);
1836 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1837 b.set_name(b"g");
1838 assert_eq!(b.len(), 7);
1839 assert_eq!(b.name(), QName(b"g"));
1840 }
1841
1842 #[test]
1843 fn bytestart_clear_attributes() {
1844 let mut b = BytesStart::new("test");
1845 b.push_attribute(("x", "y\"z"));
1846 b.push_attribute(("x", "y\"z"));
1847 b.clear_attributes();
1848 assert!(b.attributes().next().is_none());
1849 assert_eq!(b.len(), 4);
1850 assert_eq!(b.name(), QName(b"test"));
1851 }
1852}