quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52    escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number,
53    partial_escape, EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74///     <element a1 = 'val1' a2=\"val2\" />\
75///     <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93    /// content of the element, before any utf8 conversion
94    pub(crate) buf: Cow<'a, [u8]>,
95    /// end of the element name, the name starts at that the start of `buf`
96    pub(crate) name_len: usize,
97    /// Encoding used for `buf`
98    decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103    #[inline]
104    pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105        BytesStart {
106            buf: Cow::Borrowed(content),
107            name_len,
108            decoder,
109        }
110    }
111
112    /// Creates a new `BytesStart` from the given name.
113    ///
114    /// # Warning
115    ///
116    /// `name` must be a valid name.
117    #[inline]
118    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119        let buf = str_cow_to_bytes(name);
120        BytesStart {
121            name_len: buf.len(),
122            buf,
123            decoder: Decoder::utf8(),
124        }
125    }
126
127    /// Creates a new `BytesStart` from the given content (name + attributes).
128    ///
129    /// # Warning
130    ///
131    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132    /// must be correctly-formed attributes. Neither are checked, it is possible
133    /// to generate invalid XML if `content` or `name_len` are incorrect.
134    #[inline]
135    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136        BytesStart {
137            buf: str_cow_to_bytes(content),
138            name_len,
139            decoder: Decoder::utf8(),
140        }
141    }
142
143    /// Converts the event into an owned event.
144    pub fn into_owned(self) -> BytesStart<'static> {
145        BytesStart {
146            buf: Cow::Owned(self.buf.into_owned()),
147            name_len: self.name_len,
148            decoder: self.decoder,
149        }
150    }
151
152    /// Converts the event into an owned event without taking ownership of Event
153    pub fn to_owned(&self) -> BytesStart<'static> {
154        BytesStart {
155            buf: Cow::Owned(self.buf.clone().into_owned()),
156            name_len: self.name_len,
157            decoder: self.decoder,
158        }
159    }
160
161    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// use quick_xml::events::{BytesStart, Event};
167    /// # use quick_xml::writer::Writer;
168    /// # use quick_xml::Error;
169    ///
170    /// struct SomeStruct<'a> {
171    ///     attrs: BytesStart<'a>,
172    ///     // ...
173    /// }
174    /// # impl<'a> SomeStruct<'a> {
175    /// # fn example(&self) -> Result<(), Error> {
176    /// # let mut writer = Writer::new(Vec::new());
177    ///
178    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179    /// // ...
180    /// writer.write_event(Event::End(self.attrs.to_end()))?;
181    /// # Ok(())
182    /// # }}
183    /// ```
184    ///
185    /// [`to_end`]: Self::to_end
186    pub fn borrow(&self) -> BytesStart<'_> {
187        BytesStart {
188            buf: Cow::Borrowed(&self.buf),
189            name_len: self.name_len,
190            decoder: self.decoder,
191        }
192    }
193
194    /// Creates new paired close tag
195    #[inline]
196    pub fn to_end(&self) -> BytesEnd<'_> {
197        BytesEnd::from(self.name())
198    }
199
200    /// Get the decoder, used to decode bytes, read by the reader which produces
201    /// this event, to the strings.
202    ///
203    /// When event was created manually, encoding is UTF-8.
204    ///
205    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206    /// defaults to UTF-8.
207    ///
208    /// [`encoding`]: ../index.html#encoding
209    #[inline]
210    pub const fn decoder(&self) -> Decoder {
211        self.decoder
212    }
213
214    /// Gets the undecoded raw tag name, as present in the input stream.
215    #[inline]
216    pub fn name(&self) -> QName<'_> {
217        QName(&self.buf[..self.name_len])
218    }
219
220    /// Gets the undecoded raw local tag name (excluding namespace) as present
221    /// in the input stream.
222    ///
223    /// All content up to and including the first `:` character is removed from the tag name.
224    #[inline]
225    pub fn local_name(&self) -> LocalName<'_> {
226        self.name().into()
227    }
228
229    /// Edit the name of the BytesStart in-place
230    ///
231    /// # Warning
232    ///
233    /// `name` must be a valid name.
234    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235        let bytes = self.buf.to_mut();
236        bytes.splice(..self.name_len, name.iter().cloned());
237        self.name_len = name.len();
238        self
239    }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245    ///
246    /// The yielded items must be convertible to [`Attribute`] using `Into`.
247    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248    where
249        I: IntoIterator,
250        I::Item: Into<Attribute<'b>>,
251    {
252        self.extend_attributes(attributes);
253        self
254    }
255
256    /// Add additional attributes to this tag using an iterator.
257    ///
258    /// The yielded items must be convertible to [`Attribute`] using `Into`.
259    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260    where
261        I: IntoIterator,
262        I::Item: Into<Attribute<'b>>,
263    {
264        for attr in attributes {
265            self.push_attribute(attr);
266        }
267        self
268    }
269
270    /// Adds an attribute to this element.
271    pub fn push_attribute<'b, A>(&mut self, attr: A)
272    where
273        A: Into<Attribute<'b>>,
274    {
275        self.buf.to_mut().push(b' ');
276        self.push_attr(attr.into());
277    }
278
279    /// Remove all attributes from the ByteStart
280    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281        self.buf.to_mut().truncate(self.name_len);
282        self
283    }
284
285    /// Returns an iterator over the attributes of this tag.
286    pub fn attributes(&self) -> Attributes<'_> {
287        Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288    }
289
290    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291    pub fn html_attributes(&self) -> Attributes<'_> {
292        Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293    }
294
295    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296    /// including the whitespace after the tag name if there is any.
297    #[inline]
298    pub fn attributes_raw(&self) -> &[u8] {
299        &self.buf[self.name_len..]
300    }
301
302    /// Try to get an attribute
303    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304        &'a self,
305        attr_name: N,
306    ) -> Result<Option<Attribute<'a>>, AttrError> {
307        for a in self.attributes().with_checks(false) {
308            let a = a?;
309            if a.key.as_ref() == attr_name.as_ref() {
310                return Ok(Some(a));
311            }
312        }
313        Ok(None)
314    }
315
316    /// Adds an attribute to this element.
317    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318        let bytes = self.buf.to_mut();
319        bytes.extend_from_slice(attr.key.as_ref());
320        bytes.extend_from_slice(b"=\"");
321        // FIXME: need to escape attribute content
322        bytes.extend_from_slice(attr.value.as_ref());
323        bytes.push(b'"');
324    }
325
326    /// Adds new line in existing element
327    pub(crate) fn push_newline(&mut self) {
328        self.buf.to_mut().push(b'\n');
329    }
330
331    /// Adds indentation bytes in existing element
332    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333        self.buf.to_mut().extend_from_slice(indent);
334    }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339        write!(f, "BytesStart {{ buf: ")?;
340        write_cow_string(f, &self.buf)?;
341        write!(f, ", name_len: {} }}", self.name_len)
342    }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346    type Target = [u8];
347
348    fn deref(&self) -> &[u8] {
349        &self.buf
350    }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356        let s = <&str>::arbitrary(u)?;
357        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358            return Err(arbitrary::Error::IncorrectFormat);
359        }
360        let mut result = Self::new(s);
361        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
362        Ok(result)
363    }
364
365    fn size_hint(depth: usize) -> (usize, Option<usize>) {
366        return <&str as arbitrary::Arbitrary>::size_hint(depth);
367    }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406    name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411    #[inline]
412    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413        BytesEnd { name }
414    }
415
416    /// Creates a new `BytesEnd` borrowing a slice.
417    ///
418    /// # Warning
419    ///
420    /// `name` must be a valid name.
421    #[inline]
422    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423        Self::wrap(str_cow_to_bytes(name))
424    }
425
426    /// Converts the event into an owned event.
427    pub fn into_owned(self) -> BytesEnd<'static> {
428        BytesEnd {
429            name: Cow::Owned(self.name.into_owned()),
430        }
431    }
432
433    /// Converts the event into a borrowed event.
434    #[inline]
435    pub fn borrow(&self) -> BytesEnd<'_> {
436        BytesEnd {
437            name: Cow::Borrowed(&self.name),
438        }
439    }
440
441    /// Gets the undecoded raw tag name, as present in the input stream.
442    #[inline]
443    pub fn name(&self) -> QName<'_> {
444        QName(&self.name)
445    }
446
447    /// Gets the undecoded raw local tag name (excluding namespace) as present
448    /// in the input stream.
449    ///
450    /// All content up to and including the first `:` character is removed from the tag name.
451    #[inline]
452    pub fn local_name(&self) -> LocalName<'_> {
453        self.name().into()
454    }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459        write!(f, "BytesEnd {{ name: ")?;
460        write_cow_string(f, &self.name)?;
461        write!(f, " }}")
462    }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466    type Target = [u8];
467
468    fn deref(&self) -> &[u8] {
469        &self.name
470    }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474    #[inline]
475    fn from(name: QName<'a>) -> Self {
476        Self::wrap(name.into_inner().into())
477    }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483        Ok(Self::new(<&str>::arbitrary(u)?))
484    }
485    fn size_hint(depth: usize) -> (usize, Option<usize>) {
486        return <&str as arbitrary::Arbitrary>::size_hint(depth);
487    }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508///     <!DOCTYPE comment or text >\
509///     comment or text \
510///     <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525    /// Escaped then encoded content of the event. Content is encoded in the XML
526    /// document encoding when event comes from the reader and should be in the
527    /// document encoding when event passed to the writer
528    content: Cow<'a, [u8]>,
529    /// Encoding in which the `content` is stored inside the event
530    decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535    #[inline]
536    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537        Self {
538            content: content.into(),
539            decoder,
540        }
541    }
542
543    /// Creates a new `BytesText` from an escaped string.
544    #[inline]
545    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547    }
548
549    /// Creates a new `BytesText` from a string. The string is expected not to
550    /// be escaped.
551    #[inline]
552    pub fn new(content: &'a str) -> Self {
553        Self::from_escaped(escape(content))
554    }
555
556    /// Ensures that all data is owned to extend the object's lifetime if
557    /// necessary.
558    #[inline]
559    pub fn into_owned(self) -> BytesText<'static> {
560        BytesText {
561            content: self.content.into_owned().into(),
562            decoder: self.decoder,
563        }
564    }
565
566    /// Extracts the inner `Cow` from the `BytesText` event container.
567    #[inline]
568    pub fn into_inner(self) -> Cow<'a, [u8]> {
569        self.content
570    }
571
572    /// Converts the event into a borrowed event.
573    #[inline]
574    pub fn borrow(&self) -> BytesText<'_> {
575        BytesText {
576            content: Cow::Borrowed(&self.content),
577            decoder: self.decoder,
578        }
579    }
580
581    /// Decodes the content of the event.
582    ///
583    /// This will allocate if the value contains any escape sequences or in
584    /// non-UTF-8 encoding.
585    ///
586    /// This method does not normalizes end-of-line characters as required by [specification].
587    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588    ///
589    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591        self.decoder.decode_cow(&self.content)
592    }
593
594    /// Decodes the content of the XML 1.0 or HTML event.
595    ///
596    /// When this event produced by the reader, it uses the encoding information
597    /// associated with that reader to interpret the raw bytes contained within
598    /// this text event.
599    ///
600    /// This will allocate if the value contains any escape sequences or in non-UTF-8
601    /// encoding, or EOL normalization is required.
602    ///
603    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
604    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
605    ///
606    /// This method also can be used to get HTML content, because rules the same.
607    ///
608    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
609    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
610    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
611    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
612        self.decoder.content(&self.content, normalize_xml10_eols)
613    }
614
615    /// Decodes the content of the XML 1.1 event.
616    ///
617    /// When this event produced by the reader, it uses the encoding information
618    /// associated with that reader to interpret the raw bytes contained within
619    /// this text event.
620    ///
621    /// This will allocate if the value contains any escape sequences or in non-UTF-8
622    /// encoding, or EOL normalization is required.
623    ///
624    /// Note, that this method should be used only if event represents XML 1.1 content,
625    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
626    ///
627    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
628    ///
629    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
630    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
631    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
632    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
633        self.decoder.content(&self.content, normalize_xml11_eols)
634    }
635
636    /// Alias for [`xml11_content()`](Self::xml11_content).
637    #[inline]
638    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
639        self.xml11_content()
640    }
641
642    /// Alias for [`xml10_content()`](Self::xml10_content).
643    #[inline]
644    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
645        self.xml10_content()
646    }
647
648    /// Removes leading XML whitespace bytes from text content.
649    ///
650    /// Returns `true` if content is empty after that
651    pub fn inplace_trim_start(&mut self) -> bool {
652        self.content = trim_cow(
653            replace(&mut self.content, Cow::Borrowed(b"")),
654            trim_xml_start,
655        );
656        self.content.is_empty()
657    }
658
659    /// Removes trailing XML whitespace bytes from text content.
660    ///
661    /// Returns `true` if content is empty after that
662    pub fn inplace_trim_end(&mut self) -> bool {
663        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
664        self.content.is_empty()
665    }
666}
667
668impl<'a> Debug for BytesText<'a> {
669    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
670        write!(f, "BytesText {{ content: ")?;
671        write_cow_string(f, &self.content)?;
672        write!(f, " }}")
673    }
674}
675
676impl<'a> Deref for BytesText<'a> {
677    type Target = [u8];
678
679    fn deref(&self) -> &[u8] {
680        &self.content
681    }
682}
683
684#[cfg(feature = "arbitrary")]
685impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
686    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
687        let s = <&str>::arbitrary(u)?;
688        if !s.chars().all(char::is_alphanumeric) {
689            return Err(arbitrary::Error::IncorrectFormat);
690        }
691        Ok(Self::new(s))
692    }
693
694    fn size_hint(depth: usize) -> (usize, Option<usize>) {
695        return <&str as arbitrary::Arbitrary>::size_hint(depth);
696    }
697}
698
699////////////////////////////////////////////////////////////////////////////////////////////////////
700
701/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
702/// [convert](Self::escape) it to [`BytesText`].
703///
704/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
705/// returns the content of this event between `<![CDATA[` and `]]>`.
706///
707/// Note, that inner text will not contain `]]>` sequence inside:
708///
709/// ```
710/// # use quick_xml::events::{BytesCData, Event};
711/// # use quick_xml::reader::Reader;
712/// # use pretty_assertions::assert_eq;
713/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
714/// let content = " CDATA section ";
715/// let event = BytesCData::new(content);
716///
717/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
718/// // deref coercion of &BytesCData to &[u8]
719/// assert_eq!(&event as &[u8], content.as_bytes());
720/// // AsRef<[u8]> for &T + deref coercion
721/// assert_eq!(event.as_ref(), content.as_bytes());
722/// ```
723#[derive(Clone, Eq, PartialEq)]
724pub struct BytesCData<'a> {
725    content: Cow<'a, [u8]>,
726    /// Encoding in which the `content` is stored inside the event
727    decoder: Decoder,
728}
729
730impl<'a> BytesCData<'a> {
731    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
732    #[inline]
733    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
734        Self {
735            content: content.into(),
736            decoder,
737        }
738    }
739
740    /// Creates a new `BytesCData` from a string.
741    ///
742    /// # Warning
743    ///
744    /// `content` must not contain the `]]>` sequence. You can use
745    /// [`BytesCData::escaped`] to escape the content instead.
746    #[inline]
747    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
748        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
749    }
750
751    /// Creates an iterator of `BytesCData` from a string.
752    ///
753    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
754    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
755    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
756    /// for each of those sections.
757    ///
758    /// # Examples
759    ///
760    /// ```
761    /// # use quick_xml::events::BytesCData;
762    /// # use pretty_assertions::assert_eq;
763    /// let content = "";
764    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
765    /// assert_eq!(cdata, &[BytesCData::new("")]);
766    ///
767    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
768    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
769    /// assert_eq!(cdata, &[
770    ///     BytesCData::new("Certain tokens like ]]"),
771    ///     BytesCData::new("> can be difficult and <invalid>"),
772    /// ]);
773    ///
774    /// let content = "foo]]>bar]]>baz]]>quux";
775    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
776    /// assert_eq!(cdata, &[
777    ///     BytesCData::new("foo]]"),
778    ///     BytesCData::new(">bar]]"),
779    ///     BytesCData::new(">baz]]"),
780    ///     BytesCData::new(">quux"),
781    /// ]);
782    /// ```
783    #[inline]
784    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
785        CDataIterator {
786            unprocessed: content.as_bytes(),
787            finished: false,
788        }
789    }
790
791    /// Ensures that all data is owned to extend the object's lifetime if
792    /// necessary.
793    #[inline]
794    pub fn into_owned(self) -> BytesCData<'static> {
795        BytesCData {
796            content: self.content.into_owned().into(),
797            decoder: self.decoder,
798        }
799    }
800
801    /// Extracts the inner `Cow` from the `BytesCData` event container.
802    #[inline]
803    pub fn into_inner(self) -> Cow<'a, [u8]> {
804        self.content
805    }
806
807    /// Converts the event into a borrowed event.
808    #[inline]
809    pub fn borrow(&self) -> BytesCData<'_> {
810        BytesCData {
811            content: Cow::Borrowed(&self.content),
812            decoder: self.decoder,
813        }
814    }
815
816    /// Converts this CDATA content to an escaped version, that can be written
817    /// as an usual text in XML.
818    ///
819    /// This function performs following replacements:
820    ///
821    /// | Character | Replacement
822    /// |-----------|------------
823    /// | `<`       | `&lt;`
824    /// | `>`       | `&gt;`
825    /// | `&`       | `&amp;`
826    /// | `'`       | `&apos;`
827    /// | `"`       | `&quot;`
828    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
829        let decoded = self.decode()?;
830        Ok(BytesText::wrap(
831            match escape(decoded) {
832                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
833                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
834            },
835            Decoder::utf8(),
836        ))
837    }
838
839    /// Converts this CDATA content to an escaped version, that can be written
840    /// as an usual text in XML.
841    ///
842    /// In XML text content, it is allowed (though not recommended) to leave
843    /// the quote special characters `"` and `'` unescaped.
844    ///
845    /// This function performs following replacements:
846    ///
847    /// | Character | Replacement
848    /// |-----------|------------
849    /// | `<`       | `&lt;`
850    /// | `>`       | `&gt;`
851    /// | `&`       | `&amp;`
852    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
853        let decoded = self.decode()?;
854        Ok(BytesText::wrap(
855            match partial_escape(decoded) {
856                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
857                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
858            },
859            Decoder::utf8(),
860        ))
861    }
862
863    /// Converts this CDATA content to an escaped version, that can be written
864    /// as an usual text in XML. This method escapes only those characters that
865    /// must be escaped according to the [specification].
866    ///
867    /// This function performs following replacements:
868    ///
869    /// | Character | Replacement
870    /// |-----------|------------
871    /// | `<`       | `&lt;`
872    /// | `&`       | `&amp;`
873    ///
874    /// [specification]: https://www.w3.org/TR/xml11/#syntax
875    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
876        let decoded = self.decode()?;
877        Ok(BytesText::wrap(
878            match minimal_escape(decoded) {
879                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
880                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
881            },
882            Decoder::utf8(),
883        ))
884    }
885
886    /// Decodes the raw input byte content of the CDATA section into a string,
887    /// without performing XML entity escaping.
888    ///
889    /// When this event produced by the XML reader, it uses the encoding information
890    /// associated with that reader to interpret the raw bytes contained within this
891    /// CDATA event.
892    ///
893    /// This method does not normalizes end-of-line characters as required by [specification].
894    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
895    ///
896    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
897    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
898        self.decoder.decode_cow(&self.content)
899    }
900
901    /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or
902    /// HTML event into a string.
903    ///
904    /// When this event produced by the reader, it uses the encoding information
905    /// associated with that reader to interpret the raw bytes contained within
906    /// this CDATA event.
907    ///
908    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
909    /// is required.
910    ///
911    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
912    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
913    ///
914    /// This method also can be used to get HTML content, because rules the same.
915    ///
916    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
917    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
918    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
919    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
920        self.decoder.content(&self.content, normalize_xml10_eols)
921    }
922
923    /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event
924    /// into a string.
925    ///
926    /// When this event produced by the reader, it uses the encoding information
927    /// associated with that reader to interpret the raw bytes contained within
928    /// this CDATA event.
929    ///
930    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
931    /// is required.
932    ///
933    /// Note, that this method should be used only if event represents XML 1.1 content,
934    /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs.
935    ///
936    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
937    ///
938    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
939    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
940    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
941    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
942        self.decoder.content(&self.content, normalize_xml11_eols)
943    }
944
945    /// Alias for [`xml11_content()`](Self::xml11_content).
946    #[inline]
947    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
948        self.xml11_content()
949    }
950
951    /// Alias for [`xml10_content()`](Self::xml10_content).
952    #[inline]
953    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
954        self.xml10_content()
955    }
956}
957
958impl<'a> Debug for BytesCData<'a> {
959    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
960        write!(f, "BytesCData {{ content: ")?;
961        write_cow_string(f, &self.content)?;
962        write!(f, " }}")
963    }
964}
965
966impl<'a> Deref for BytesCData<'a> {
967    type Target = [u8];
968
969    fn deref(&self) -> &[u8] {
970        &self.content
971    }
972}
973
974#[cfg(feature = "arbitrary")]
975impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
976    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
977        Ok(Self::new(<&str>::arbitrary(u)?))
978    }
979    fn size_hint(depth: usize) -> (usize, Option<usize>) {
980        return <&str as arbitrary::Arbitrary>::size_hint(depth);
981    }
982}
983
984/// Iterator over `CDATA` sections in a string.
985///
986/// This iterator is created by the [`BytesCData::escaped`] method.
987#[derive(Clone)]
988pub struct CDataIterator<'a> {
989    /// The unprocessed data which should be emitted as `BytesCData` events.
990    /// At each iteration, the processed data is cut from this slice.
991    unprocessed: &'a [u8],
992    finished: bool,
993}
994
995impl<'a> Debug for CDataIterator<'a> {
996    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
997        f.debug_struct("CDataIterator")
998            .field("unprocessed", &Bytes(self.unprocessed))
999            .field("finished", &self.finished)
1000            .finish()
1001    }
1002}
1003
1004impl<'a> Iterator for CDataIterator<'a> {
1005    type Item = BytesCData<'a>;
1006
1007    fn next(&mut self) -> Option<BytesCData<'a>> {
1008        if self.finished {
1009            return None;
1010        }
1011
1012        for gt in memchr::memchr_iter(b'>', self.unprocessed) {
1013            if self.unprocessed[..gt].ends_with(b"]]") {
1014                let (slice, rest) = self.unprocessed.split_at(gt);
1015                self.unprocessed = rest;
1016                return Some(BytesCData::wrap(slice, Decoder::utf8()));
1017            }
1018        }
1019
1020        self.finished = true;
1021        Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
1022    }
1023}
1024
1025impl FusedIterator for CDataIterator<'_> {}
1026
1027////////////////////////////////////////////////////////////////////////////////////////////////////
1028
1029/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1030///
1031/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1032/// returns the content of this event between `<?` and `?>`.
1033///
1034/// Note, that inner text will not contain `?>` sequence inside:
1035///
1036/// ```
1037/// # use quick_xml::events::{BytesPI, Event};
1038/// # use quick_xml::reader::Reader;
1039/// # use pretty_assertions::assert_eq;
1040/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1041/// let content = "processing instruction >:-<~ ";
1042/// let event = BytesPI::new(content);
1043///
1044/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1045/// // deref coercion of &BytesPI to &[u8]
1046/// assert_eq!(&event as &[u8], content.as_bytes());
1047/// // AsRef<[u8]> for &T + deref coercion
1048/// assert_eq!(event.as_ref(), content.as_bytes());
1049/// ```
1050///
1051/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1052#[derive(Clone, Eq, PartialEq)]
1053pub struct BytesPI<'a> {
1054    content: BytesStart<'a>,
1055}
1056
1057impl<'a> BytesPI<'a> {
1058    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1059    #[inline]
1060    pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1061        Self {
1062            content: BytesStart::wrap(content, target_len, decoder),
1063        }
1064    }
1065
1066    /// Creates a new `BytesPI` from a string.
1067    ///
1068    /// # Warning
1069    ///
1070    /// `content` must not contain the `?>` sequence.
1071    #[inline]
1072    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1073        let buf = str_cow_to_bytes(content);
1074        let name_len = name_len(&buf);
1075        Self {
1076            content: BytesStart {
1077                buf,
1078                name_len,
1079                decoder: Decoder::utf8(),
1080            },
1081        }
1082    }
1083
1084    /// Ensures that all data is owned to extend the object's lifetime if
1085    /// necessary.
1086    #[inline]
1087    pub fn into_owned(self) -> BytesPI<'static> {
1088        BytesPI {
1089            content: self.content.into_owned().into(),
1090        }
1091    }
1092
1093    /// Extracts the inner `Cow` from the `BytesPI` event container.
1094    #[inline]
1095    pub fn into_inner(self) -> Cow<'a, [u8]> {
1096        self.content.buf
1097    }
1098
1099    /// Converts the event into a borrowed event.
1100    #[inline]
1101    pub fn borrow(&self) -> BytesPI<'_> {
1102        BytesPI {
1103            content: self.content.borrow(),
1104        }
1105    }
1106
1107    /// A target used to identify the application to which the instruction is directed.
1108    ///
1109    /// # Example
1110    ///
1111    /// ```
1112    /// # use pretty_assertions::assert_eq;
1113    /// use quick_xml::events::BytesPI;
1114    ///
1115    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1116    /// assert_eq!(instruction.target(), b"xml-stylesheet");
1117    /// ```
1118    #[inline]
1119    pub fn target(&self) -> &[u8] {
1120        self.content.name().0
1121    }
1122
1123    /// Content of the processing instruction. Contains everything between target
1124    /// name and the end of the instruction. A direct consequence is that the first
1125    /// character is always a space character.
1126    ///
1127    /// # Example
1128    ///
1129    /// ```
1130    /// # use pretty_assertions::assert_eq;
1131    /// use quick_xml::events::BytesPI;
1132    ///
1133    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1134    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1135    /// ```
1136    #[inline]
1137    pub fn content(&self) -> &[u8] {
1138        self.content.attributes_raw()
1139    }
1140
1141    /// A view of the processing instructions' content as a list of key-value pairs.
1142    ///
1143    /// Key-value pairs are used in some processing instructions, for example in
1144    /// `<?xml-stylesheet?>`.
1145    ///
1146    /// Returned iterator does not validate attribute values as may required by
1147    /// target's rules. For example, it doesn't check that substring `?>` is not
1148    /// present in the attribute value. That shouldn't be the problem when event
1149    /// is produced by the reader, because reader detects end of processing instruction
1150    /// by the first `?>` sequence, as required by the specification, and therefore
1151    /// this sequence cannot appear inside it.
1152    ///
1153    /// # Example
1154    ///
1155    /// ```
1156    /// # use pretty_assertions::assert_eq;
1157    /// use std::borrow::Cow;
1158    /// use quick_xml::events::attributes::Attribute;
1159    /// use quick_xml::events::BytesPI;
1160    /// use quick_xml::name::QName;
1161    ///
1162    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1163    /// for attr in instruction.attributes() {
1164    ///     assert_eq!(attr, Ok(Attribute {
1165    ///         key: QName(b"href"),
1166    ///         value: Cow::Borrowed(b"style.css"),
1167    ///     }));
1168    /// }
1169    /// ```
1170    #[inline]
1171    pub fn attributes(&self) -> Attributes<'_> {
1172        self.content.attributes()
1173    }
1174}
1175
1176impl<'a> Debug for BytesPI<'a> {
1177    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1178        write!(f, "BytesPI {{ content: ")?;
1179        write_cow_string(f, &self.content.buf)?;
1180        write!(f, " }}")
1181    }
1182}
1183
1184impl<'a> Deref for BytesPI<'a> {
1185    type Target = [u8];
1186
1187    fn deref(&self) -> &[u8] {
1188        &self.content
1189    }
1190}
1191
1192#[cfg(feature = "arbitrary")]
1193impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1194    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1195        Ok(Self::new(<&str>::arbitrary(u)?))
1196    }
1197    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1198        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1199    }
1200}
1201
1202////////////////////////////////////////////////////////////////////////////////////////////////////
1203
1204/// An XML declaration (`Event::Decl`).
1205///
1206/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1207///
1208/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1209/// returns the content of this event between `<?` and `?>`.
1210///
1211/// Note, that inner text will not contain `?>` sequence inside:
1212///
1213/// ```
1214/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1215/// # use quick_xml::reader::Reader;
1216/// # use pretty_assertions::assert_eq;
1217/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1218/// let content = "xml version = '1.0' ";
1219/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1220///
1221/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1222/// // deref coercion of &BytesDecl to &[u8]
1223/// assert_eq!(&event as &[u8], content.as_bytes());
1224/// // AsRef<[u8]> for &T + deref coercion
1225/// assert_eq!(event.as_ref(), content.as_bytes());
1226/// ```
1227#[derive(Clone, Debug, Eq, PartialEq)]
1228pub struct BytesDecl<'a> {
1229    content: BytesStart<'a>,
1230}
1231
1232impl<'a> BytesDecl<'a> {
1233    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1234    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1235    /// attribute.
1236    ///
1237    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1238    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1239    /// the double quote character is not allowed in any of the attribute values.
1240    pub fn new(
1241        version: &str,
1242        encoding: Option<&str>,
1243        standalone: Option<&str>,
1244    ) -> BytesDecl<'static> {
1245        // Compute length of the buffer based on supplied attributes
1246        // ' encoding=""'   => 12
1247        let encoding_attr_len = if let Some(xs) = encoding {
1248            12 + xs.len()
1249        } else {
1250            0
1251        };
1252        // ' standalone=""' => 14
1253        let standalone_attr_len = if let Some(xs) = standalone {
1254            14 + xs.len()
1255        } else {
1256            0
1257        };
1258        // 'xml version=""' => 14
1259        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1260
1261        buf.push_str("xml version=\"");
1262        buf.push_str(version);
1263
1264        if let Some(encoding_val) = encoding {
1265            buf.push_str("\" encoding=\"");
1266            buf.push_str(encoding_val);
1267        }
1268
1269        if let Some(standalone_val) = standalone {
1270            buf.push_str("\" standalone=\"");
1271            buf.push_str(standalone_val);
1272        }
1273        buf.push('"');
1274
1275        BytesDecl {
1276            content: BytesStart::from_content(buf, 3),
1277        }
1278    }
1279
1280    /// Creates a `BytesDecl` from a `BytesStart`
1281    pub const fn from_start(start: BytesStart<'a>) -> Self {
1282        Self { content: start }
1283    }
1284
1285    /// Gets xml version, excluding quotes (`'` or `"`).
1286    ///
1287    /// According to the [grammar], the version *must* be the first thing in the declaration.
1288    /// This method tries to extract the first thing in the declaration and return it.
1289    /// In case of multiple attributes value of the first one is returned.
1290    ///
1291    /// If version is missed in the declaration, or the first thing is not a version,
1292    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1293    ///
1294    /// # Examples
1295    ///
1296    /// ```
1297    /// use quick_xml::errors::{Error, IllFormedError};
1298    /// use quick_xml::events::{BytesDecl, BytesStart};
1299    ///
1300    /// // <?xml version='1.1'?>
1301    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1302    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1303    ///
1304    /// // <?xml version='1.0' version='1.1'?>
1305    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1306    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1307    ///
1308    /// // <?xml encoding='utf-8'?>
1309    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1310    /// match decl.version() {
1311    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1312    ///     _ => assert!(false),
1313    /// }
1314    ///
1315    /// // <?xml encoding='utf-8' version='1.1'?>
1316    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1317    /// match decl.version() {
1318    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1319    ///     _ => assert!(false),
1320    /// }
1321    ///
1322    /// // <?xml?>
1323    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1324    /// match decl.version() {
1325    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1326    ///     _ => assert!(false),
1327    /// }
1328    /// ```
1329    ///
1330    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1331    pub fn version(&self) -> Result<Cow<'_, [u8]>, Error> {
1332        // The version *must* be the first thing in the declaration.
1333        match self.content.attributes().with_checks(false).next() {
1334            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1335            // first attribute was not "version"
1336            Some(Ok(a)) => {
1337                let found = from_utf8(a.key.as_ref())
1338                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1339                    .to_string();
1340                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1341                    found,
1342                ))))
1343            }
1344            // error parsing attributes
1345            Some(Err(e)) => Err(e.into()),
1346            // no attributes
1347            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1348        }
1349    }
1350
1351    /// Gets xml encoding, excluding quotes (`'` or `"`).
1352    ///
1353    /// Although according to the [grammar] encoding must appear before `"standalone"`
1354    /// and after `"version"`, this method does not check that. The first occurrence
1355    /// of the attribute will be returned even if there are several. Also, method does
1356    /// not restrict symbols that can forming the encoding, so the returned encoding
1357    /// name may not correspond to the grammar.
1358    ///
1359    /// # Examples
1360    ///
1361    /// ```
1362    /// use std::borrow::Cow;
1363    /// use quick_xml::Error;
1364    /// use quick_xml::events::{BytesDecl, BytesStart};
1365    ///
1366    /// // <?xml version='1.1'?>
1367    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1368    /// assert!(decl.encoding().is_none());
1369    ///
1370    /// // <?xml encoding='utf-8'?>
1371    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1372    /// match decl.encoding() {
1373    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1374    ///     _ => assert!(false),
1375    /// }
1376    ///
1377    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1378    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1379    /// match decl.encoding() {
1380    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1381    ///     _ => assert!(false),
1382    /// }
1383    /// ```
1384    ///
1385    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1386    pub fn encoding(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1387        self.content
1388            .try_get_attribute("encoding")
1389            .map(|a| a.map(|a| a.value))
1390            .transpose()
1391    }
1392
1393    /// Gets xml standalone, excluding quotes (`'` or `"`).
1394    ///
1395    /// Although according to the [grammar] standalone flag must appear after `"version"`
1396    /// and `"encoding"`, this method does not check that. The first occurrence of the
1397    /// attribute will be returned even if there are several. Also, method does not
1398    /// restrict symbols that can forming the value, so the returned flag name may not
1399    /// correspond to the grammar.
1400    ///
1401    /// # Examples
1402    ///
1403    /// ```
1404    /// use std::borrow::Cow;
1405    /// use quick_xml::Error;
1406    /// use quick_xml::events::{BytesDecl, BytesStart};
1407    ///
1408    /// // <?xml version='1.1'?>
1409    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1410    /// assert!(decl.standalone().is_none());
1411    ///
1412    /// // <?xml standalone='yes'?>
1413    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1414    /// match decl.standalone() {
1415    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1416    ///     _ => assert!(false),
1417    /// }
1418    ///
1419    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1420    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1421    /// match decl.standalone() {
1422    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1423    ///     _ => assert!(false),
1424    /// }
1425    /// ```
1426    ///
1427    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1428    pub fn standalone(&self) -> Option<Result<Cow<'_, [u8]>, AttrError>> {
1429        self.content
1430            .try_get_attribute("standalone")
1431            .map(|a| a.map(|a| a.value))
1432            .transpose()
1433    }
1434
1435    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1436    /// algorithm.
1437    ///
1438    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1439    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1440    /// one, is returned.
1441    #[cfg(feature = "encoding")]
1442    pub fn encoder(&self) -> Option<&'static Encoding> {
1443        self.encoding()
1444            .and_then(|e| e.ok())
1445            .and_then(|e| Encoding::for_label(&e))
1446    }
1447
1448    /// Converts the event into an owned event.
1449    pub fn into_owned(self) -> BytesDecl<'static> {
1450        BytesDecl {
1451            content: self.content.into_owned(),
1452        }
1453    }
1454
1455    /// Converts the event into a borrowed event.
1456    #[inline]
1457    pub fn borrow(&self) -> BytesDecl<'_> {
1458        BytesDecl {
1459            content: self.content.borrow(),
1460        }
1461    }
1462}
1463
1464impl<'a> Deref for BytesDecl<'a> {
1465    type Target = [u8];
1466
1467    fn deref(&self) -> &[u8] {
1468        &self.content
1469    }
1470}
1471
1472#[cfg(feature = "arbitrary")]
1473impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1474    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1475        Ok(Self::new(
1476            <&str>::arbitrary(u)?,
1477            Option::<&str>::arbitrary(u)?,
1478            Option::<&str>::arbitrary(u)?,
1479        ))
1480    }
1481
1482    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1483        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1484    }
1485}
1486
1487////////////////////////////////////////////////////////////////////////////////////////////////////
1488
1489/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1490///
1491/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1492/// returns the content of this event between `&` and `;`:
1493///
1494/// ```
1495/// # use quick_xml::events::{BytesRef, Event};
1496/// # use quick_xml::reader::Reader;
1497/// # use pretty_assertions::assert_eq;
1498/// let mut reader = Reader::from_str(r#"&entity;"#);
1499/// let content = "entity";
1500/// let event = BytesRef::new(content);
1501///
1502/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1503/// // deref coercion of &BytesRef to &[u8]
1504/// assert_eq!(&event as &[u8], content.as_bytes());
1505/// // AsRef<[u8]> for &T + deref coercion
1506/// assert_eq!(event.as_ref(), content.as_bytes());
1507/// ```
1508#[derive(Clone, Eq, PartialEq)]
1509pub struct BytesRef<'a> {
1510    content: Cow<'a, [u8]>,
1511    /// Encoding in which the `content` is stored inside the event.
1512    decoder: Decoder,
1513}
1514
1515impl<'a> BytesRef<'a> {
1516    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1517    #[inline]
1518    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1519        Self {
1520            content: Cow::Borrowed(content),
1521            decoder,
1522        }
1523    }
1524
1525    /// Creates a new `BytesRef` borrowing a slice.
1526    ///
1527    /// # Warning
1528    ///
1529    /// `name` must be a valid name.
1530    #[inline]
1531    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1532        Self {
1533            content: str_cow_to_bytes(name),
1534            decoder: Decoder::utf8(),
1535        }
1536    }
1537
1538    /// Converts the event into an owned event.
1539    pub fn into_owned(self) -> BytesRef<'static> {
1540        BytesRef {
1541            content: Cow::Owned(self.content.into_owned()),
1542            decoder: self.decoder,
1543        }
1544    }
1545
1546    /// Extracts the inner `Cow` from the `BytesRef` event container.
1547    #[inline]
1548    pub fn into_inner(self) -> Cow<'a, [u8]> {
1549        self.content
1550    }
1551
1552    /// Converts the event into a borrowed event.
1553    #[inline]
1554    pub fn borrow(&self) -> BytesRef<'_> {
1555        BytesRef {
1556            content: Cow::Borrowed(&self.content),
1557            decoder: self.decoder,
1558        }
1559    }
1560
1561    /// Decodes the content of the event.
1562    ///
1563    /// This will allocate if the value contains any escape sequences or in
1564    /// non-UTF-8 encoding.
1565    ///
1566    /// This method does not normalizes end-of-line characters as required by [specification].
1567    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1568    ///
1569    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1570    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1571        self.decoder.decode_cow(&self.content)
1572    }
1573
1574    /// Decodes the content of the XML 1.0 or HTML event.
1575    ///
1576    /// When this event produced by the reader, it uses the encoding information
1577    /// associated with that reader to interpret the raw bytes contained within
1578    /// this general reference event.
1579    ///
1580    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1581    /// is required.
1582    ///
1583    /// Note, that this method should be used only if event represents XML 1.0 or HTML content,
1584    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1585    ///
1586    /// This method also can be used to get HTML content, because rules the same.
1587    ///
1588    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1589    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1590    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1591    pub fn xml10_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1592        self.decoder.content(&self.content, normalize_xml10_eols)
1593    }
1594
1595    /// Decodes the content of the XML 1.1 event.
1596    ///
1597    /// When this event produced by the reader, it uses the encoding information
1598    /// associated with that reader to interpret the raw bytes contained within
1599    /// this general reference event.
1600    ///
1601    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1602    /// is required.
1603    ///
1604    /// Note, that this method should be used only if event represents XML 1.1 content,
1605    /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs.
1606    ///
1607    /// To get HTML content use [`xml10_content()`](Self::xml10_content).
1608    ///
1609    /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends
1610    /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends
1611    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1612    pub fn xml11_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1613        self.decoder.content(&self.content, normalize_xml11_eols)
1614    }
1615
1616    /// Alias for [`xml11_content()`](Self::xml11_content).
1617    #[inline]
1618    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1619        self.xml11_content()
1620    }
1621
1622    /// Alias for [`xml10_content()`](Self::xml10_content).
1623    #[inline]
1624    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1625        self.xml10_content()
1626    }
1627
1628    /// Returns `true` if the specified reference represents the character reference
1629    /// (`&#<number>;`).
1630    ///
1631    /// ```
1632    /// # use quick_xml::events::BytesRef;
1633    /// # use pretty_assertions::assert_eq;
1634    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1635    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1636    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
1637    /// ```
1638    pub fn is_char_ref(&self) -> bool {
1639        matches!(self.content.first(), Some(b'#'))
1640    }
1641
1642    /// If this reference represents character reference, then resolves it and
1643    /// returns the character, otherwise returns `None`.
1644    ///
1645    /// This method does not check if character is allowed for XML, in other words,
1646    /// well-formedness constraint [WFC: Legal Char] is not enforced.
1647    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1648    ///
1649    /// ```
1650    /// # use quick_xml::events::BytesRef;
1651    /// # use pretty_assertions::assert_eq;
1652    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1653    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1654    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
1655    /// ```
1656    ///
1657    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1658    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1659        if let Some(num) = self.decode()?.strip_prefix('#') {
1660            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1661            return Ok(Some(ch));
1662        }
1663        Ok(None)
1664    }
1665}
1666
1667impl<'a> Debug for BytesRef<'a> {
1668    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1669        write!(f, "BytesRef {{ content: ")?;
1670        write_cow_string(f, &self.content)?;
1671        write!(f, " }}")
1672    }
1673}
1674
1675impl<'a> Deref for BytesRef<'a> {
1676    type Target = [u8];
1677
1678    fn deref(&self) -> &[u8] {
1679        &self.content
1680    }
1681}
1682
1683#[cfg(feature = "arbitrary")]
1684impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1685    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1686        Ok(Self::new(<&str>::arbitrary(u)?))
1687    }
1688
1689    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1690        <&str as arbitrary::Arbitrary>::size_hint(depth)
1691    }
1692}
1693
1694////////////////////////////////////////////////////////////////////////////////////////////////////
1695
1696/// Event emitted by [`Reader::read_event_into`].
1697///
1698/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1699#[derive(Clone, Debug, Eq, PartialEq)]
1700#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1701pub enum Event<'a> {
1702    /// Start tag (with attributes) `<tag attr="value">`.
1703    Start(BytesStart<'a>),
1704    /// End tag `</tag>`.
1705    End(BytesEnd<'a>),
1706    /// Empty element tag (with attributes) `<tag attr="value" />`.
1707    Empty(BytesStart<'a>),
1708    /// Escaped character data between tags.
1709    Text(BytesText<'a>),
1710    /// Unescaped character data stored in `<![CDATA[...]]>`.
1711    CData(BytesCData<'a>),
1712    /// Comment `<!-- ... -->`.
1713    Comment(BytesText<'a>),
1714    /// XML declaration `<?xml ...?>`.
1715    Decl(BytesDecl<'a>),
1716    /// Processing instruction `<?...?>`.
1717    PI(BytesPI<'a>),
1718    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1719    DocType(BytesText<'a>),
1720    /// General reference `&entity;` in the textual data. Can be either an entity
1721    /// reference, or a character reference.
1722    GeneralRef(BytesRef<'a>),
1723    /// End of XML document.
1724    Eof,
1725}
1726
1727impl<'a> Event<'a> {
1728    /// Converts the event to an owned version, untied to the lifetime of
1729    /// buffer used when reading but incurring a new, separate allocation.
1730    pub fn into_owned(self) -> Event<'static> {
1731        match self {
1732            Event::Start(e) => Event::Start(e.into_owned()),
1733            Event::End(e) => Event::End(e.into_owned()),
1734            Event::Empty(e) => Event::Empty(e.into_owned()),
1735            Event::Text(e) => Event::Text(e.into_owned()),
1736            Event::Comment(e) => Event::Comment(e.into_owned()),
1737            Event::CData(e) => Event::CData(e.into_owned()),
1738            Event::Decl(e) => Event::Decl(e.into_owned()),
1739            Event::PI(e) => Event::PI(e.into_owned()),
1740            Event::DocType(e) => Event::DocType(e.into_owned()),
1741            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1742            Event::Eof => Event::Eof,
1743        }
1744    }
1745
1746    /// Converts the event into a borrowed event.
1747    #[inline]
1748    pub fn borrow(&self) -> Event<'_> {
1749        match self {
1750            Event::Start(e) => Event::Start(e.borrow()),
1751            Event::End(e) => Event::End(e.borrow()),
1752            Event::Empty(e) => Event::Empty(e.borrow()),
1753            Event::Text(e) => Event::Text(e.borrow()),
1754            Event::Comment(e) => Event::Comment(e.borrow()),
1755            Event::CData(e) => Event::CData(e.borrow()),
1756            Event::Decl(e) => Event::Decl(e.borrow()),
1757            Event::PI(e) => Event::PI(e.borrow()),
1758            Event::DocType(e) => Event::DocType(e.borrow()),
1759            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1760            Event::Eof => Event::Eof,
1761        }
1762    }
1763}
1764
1765impl<'a> Deref for Event<'a> {
1766    type Target = [u8];
1767
1768    fn deref(&self) -> &[u8] {
1769        match *self {
1770            Event::Start(ref e) | Event::Empty(ref e) => e,
1771            Event::End(ref e) => e,
1772            Event::Text(ref e) => e,
1773            Event::Decl(ref e) => e,
1774            Event::PI(ref e) => e,
1775            Event::CData(ref e) => e,
1776            Event::Comment(ref e) => e,
1777            Event::DocType(ref e) => e,
1778            Event::GeneralRef(ref e) => e,
1779            Event::Eof => &[],
1780        }
1781    }
1782}
1783
1784impl<'a> AsRef<Event<'a>> for Event<'a> {
1785    fn as_ref(&self) -> &Event<'a> {
1786        self
1787    }
1788}
1789
1790////////////////////////////////////////////////////////////////////////////////////////////////////
1791
1792#[inline]
1793fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1794    match content.into() {
1795        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1796        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1797    }
1798}
1799
1800fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1801where
1802    F: FnOnce(&[u8]) -> &[u8],
1803{
1804    match value {
1805        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1806        Cow::Owned(mut bytes) => {
1807            let trimmed = trim(&bytes);
1808            if trimmed.len() != bytes.len() {
1809                bytes = trimmed.to_vec();
1810            }
1811            Cow::Owned(bytes)
1812        }
1813    }
1814}
1815
1816#[cfg(test)]
1817mod test {
1818    use super::*;
1819    use pretty_assertions::assert_eq;
1820
1821    #[test]
1822    fn bytestart_create() {
1823        let b = BytesStart::new("test");
1824        assert_eq!(b.len(), 4);
1825        assert_eq!(b.name(), QName(b"test"));
1826    }
1827
1828    #[test]
1829    fn bytestart_set_name() {
1830        let mut b = BytesStart::new("test");
1831        assert_eq!(b.len(), 4);
1832        assert_eq!(b.name(), QName(b"test"));
1833        assert_eq!(b.attributes_raw(), b"");
1834        b.push_attribute(("x", "a"));
1835        assert_eq!(b.len(), 10);
1836        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1837        b.set_name(b"g");
1838        assert_eq!(b.len(), 7);
1839        assert_eq!(b.name(), QName(b"g"));
1840    }
1841
1842    #[test]
1843    fn bytestart_clear_attributes() {
1844        let mut b = BytesStart::new("test");
1845        b.push_attribute(("x", "y\"z"));
1846        b.push_attribute(("x", "y\"z"));
1847        b.clear_attributes();
1848        assert!(b.attributes().next().is_none());
1849        assert_eq!(b.len(), 4);
1850        assert_eq!(b.name(), QName(b"test"));
1851    }
1852}