pulldown_cmark/
html.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! HTML renderer that takes an iterator of events as input.
22
23use std::collections::HashMap;
24
25use crate::strings::CowStr;
26use crate::Event::*;
27use crate::{Alignment, BlockQuoteKind, CodeBlockKind, Event, LinkType, Tag, TagEnd};
28use pulldown_cmark_escape::{
29    escape_href, escape_html, escape_html_body_text, FmtWriter, IoWriter, StrWrite,
30};
31
32enum TableState {
33    Head,
34    Body,
35}
36
37struct HtmlWriter<'a, I, W> {
38    /// Iterator supplying events.
39    iter: I,
40
41    /// Writer to write to.
42    writer: W,
43
44    /// Whether or not the last write wrote a newline.
45    end_newline: bool,
46
47    /// Whether if inside a metadata block (text should not be written)
48    in_non_writing_block: bool,
49
50    table_state: TableState,
51    table_alignments: Vec<Alignment>,
52    table_cell_index: usize,
53    numbers: HashMap<CowStr<'a>, usize>,
54}
55
56impl<'a, I, W> HtmlWriter<'a, I, W>
57where
58    I: Iterator<Item = Event<'a>>,
59    W: StrWrite,
60{
61    fn new(iter: I, writer: W) -> Self {
62        Self {
63            iter,
64            writer,
65            end_newline: true,
66            in_non_writing_block: false,
67            table_state: TableState::Head,
68            table_alignments: vec![],
69            table_cell_index: 0,
70            numbers: HashMap::new(),
71        }
72    }
73
74    /// Writes a new line.
75    #[inline]
76    fn write_newline(&mut self) -> Result<(), W::Error> {
77        self.end_newline = true;
78        self.writer.write_str("\n")
79    }
80
81    /// Writes a buffer, and tracks whether or not a newline was written.
82    #[inline]
83    fn write(&mut self, s: &str) -> Result<(), W::Error> {
84        self.writer.write_str(s)?;
85
86        if !s.is_empty() {
87            self.end_newline = s.ends_with('\n');
88        }
89        Ok(())
90    }
91
92    fn run(mut self) -> Result<(), W::Error> {
93        while let Some(event) = self.iter.next() {
94            match event {
95                Start(tag) => {
96                    self.start_tag(tag)?;
97                }
98                End(tag) => {
99                    self.end_tag(tag)?;
100                }
101                Text(text) => {
102                    if !self.in_non_writing_block {
103                        escape_html_body_text(&mut self.writer, &text)?;
104                        self.end_newline = text.ends_with('\n');
105                    }
106                }
107                Code(text) => {
108                    self.write("<code>")?;
109                    escape_html_body_text(&mut self.writer, &text)?;
110                    self.write("</code>")?;
111                }
112                InlineMath(text) => {
113                    self.write(r#"<span class="math math-inline">"#)?;
114                    escape_html(&mut self.writer, &text)?;
115                    self.write("</span>")?;
116                }
117                DisplayMath(text) => {
118                    self.write(r#"<span class="math math-display">"#)?;
119                    escape_html(&mut self.writer, &text)?;
120                    self.write("</span>")?;
121                }
122                Html(html) | InlineHtml(html) => {
123                    self.write(&html)?;
124                }
125                SoftBreak => {
126                    self.write_newline()?;
127                }
128                HardBreak => {
129                    self.write("<br />\n")?;
130                }
131                Rule => {
132                    if self.end_newline {
133                        self.write("<hr />\n")?;
134                    } else {
135                        self.write("\n<hr />\n")?;
136                    }
137                }
138                FootnoteReference(name) => {
139                    let len = self.numbers.len() + 1;
140                    self.write("<sup class=\"footnote-reference\"><a href=\"#")?;
141                    escape_html(&mut self.writer, &name)?;
142                    self.write("\">")?;
143                    let number = *self.numbers.entry(name).or_insert(len);
144                    write!(&mut self.writer, "{}", number)?;
145                    self.write("</a></sup>")?;
146                }
147                TaskListMarker(true) => {
148                    self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?;
149                }
150                TaskListMarker(false) => {
151                    self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?;
152                }
153            }
154        }
155        Ok(())
156    }
157
158    /// Writes the start of an HTML tag.
159    fn start_tag(&mut self, tag: Tag<'a>) -> Result<(), W::Error> {
160        match tag {
161            Tag::HtmlBlock => Ok(()),
162            Tag::Paragraph => {
163                if self.end_newline {
164                    self.write("<p>")
165                } else {
166                    self.write("\n<p>")
167                }
168            }
169            Tag::Heading {
170                level,
171                id,
172                classes,
173                attrs,
174            } => {
175                if self.end_newline {
176                    self.write("<")?;
177                } else {
178                    self.write("\n<")?;
179                }
180                write!(&mut self.writer, "{}", level)?;
181                if let Some(id) = id {
182                    self.write(" id=\"")?;
183                    escape_html(&mut self.writer, &id)?;
184                    self.write("\"")?;
185                }
186                let mut classes = classes.iter();
187                if let Some(class) = classes.next() {
188                    self.write(" class=\"")?;
189                    escape_html(&mut self.writer, class)?;
190                    for class in classes {
191                        self.write(" ")?;
192                        escape_html(&mut self.writer, class)?;
193                    }
194                    self.write("\"")?;
195                }
196                for (attr, value) in attrs {
197                    self.write(" ")?;
198                    escape_html(&mut self.writer, &attr)?;
199                    if let Some(val) = value {
200                        self.write("=\"")?;
201                        escape_html(&mut self.writer, &val)?;
202                        self.write("\"")?;
203                    } else {
204                        self.write("=\"\"")?;
205                    }
206                }
207                self.write(">")
208            }
209            Tag::Table(alignments) => {
210                self.table_alignments = alignments;
211                self.write("<table>")
212            }
213            Tag::TableHead => {
214                self.table_state = TableState::Head;
215                self.table_cell_index = 0;
216                self.write("<thead><tr>")
217            }
218            Tag::TableRow => {
219                self.table_cell_index = 0;
220                self.write("<tr>")
221            }
222            Tag::TableCell => {
223                match self.table_state {
224                    TableState::Head => {
225                        self.write("<th")?;
226                    }
227                    TableState::Body => {
228                        self.write("<td")?;
229                    }
230                }
231                match self.table_alignments.get(self.table_cell_index) {
232                    Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"),
233                    Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"),
234                    Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"),
235                    _ => self.write(">"),
236                }
237            }
238            Tag::BlockQuote(kind) => {
239                let class_str = match kind {
240                    None => "",
241                    Some(kind) => match kind {
242                        BlockQuoteKind::Note => " class=\"markdown-alert-note\"",
243                        BlockQuoteKind::Tip => " class=\"markdown-alert-tip\"",
244                        BlockQuoteKind::Important => " class=\"markdown-alert-important\"",
245                        BlockQuoteKind::Warning => " class=\"markdown-alert-warning\"",
246                        BlockQuoteKind::Caution => " class=\"markdown-alert-caution\"",
247                    },
248                };
249                if self.end_newline {
250                    self.write(&format!("<blockquote{}>\n", class_str))
251                } else {
252                    self.write(&format!("\n<blockquote{}>\n", class_str))
253                }
254            }
255            Tag::CodeBlock(info) => {
256                if !self.end_newline {
257                    self.write_newline()?;
258                }
259                match info {
260                    CodeBlockKind::Fenced(info) => {
261                        let lang = info.split(' ').next().unwrap();
262                        if lang.is_empty() {
263                            self.write("<pre><code>")
264                        } else {
265                            self.write("<pre><code class=\"language-")?;
266                            escape_html(&mut self.writer, lang)?;
267                            self.write("\">")
268                        }
269                    }
270                    CodeBlockKind::Indented => self.write("<pre><code>"),
271                }
272            }
273            Tag::List(Some(1)) => {
274                if self.end_newline {
275                    self.write("<ol>\n")
276                } else {
277                    self.write("\n<ol>\n")
278                }
279            }
280            Tag::List(Some(start)) => {
281                if self.end_newline {
282                    self.write("<ol start=\"")?;
283                } else {
284                    self.write("\n<ol start=\"")?;
285                }
286                write!(&mut self.writer, "{}", start)?;
287                self.write("\">\n")
288            }
289            Tag::List(None) => {
290                if self.end_newline {
291                    self.write("<ul>\n")
292                } else {
293                    self.write("\n<ul>\n")
294                }
295            }
296            Tag::Item => {
297                if self.end_newline {
298                    self.write("<li>")
299                } else {
300                    self.write("\n<li>")
301                }
302            }
303            Tag::DefinitionList => {
304                if self.end_newline {
305                    self.write("<dl>\n")
306                } else {
307                    self.write("\n<dl>\n")
308                }
309            }
310            Tag::DefinitionListTitle => {
311                if self.end_newline {
312                    self.write("<dt>")
313                } else {
314                    self.write("\n<dt>")
315                }
316            }
317            Tag::DefinitionListDefinition => {
318                if self.end_newline {
319                    self.write("<dd>")
320                } else {
321                    self.write("\n<dd>")
322                }
323            }
324            Tag::Subscript => self.write("<sub>"),
325            Tag::Superscript => self.write("<sup>"),
326            Tag::Emphasis => self.write("<em>"),
327            Tag::Strong => self.write("<strong>"),
328            Tag::Strikethrough => self.write("<del>"),
329            Tag::Link {
330                link_type: LinkType::Email,
331                dest_url,
332                title,
333                id: _,
334            } => {
335                self.write("<a href=\"mailto:")?;
336                escape_href(&mut self.writer, &dest_url)?;
337                if !title.is_empty() {
338                    self.write("\" title=\"")?;
339                    escape_html(&mut self.writer, &title)?;
340                }
341                self.write("\">")
342            }
343            Tag::Link {
344                link_type: _,
345                dest_url,
346                title,
347                id: _,
348            } => {
349                self.write("<a href=\"")?;
350                escape_href(&mut self.writer, &dest_url)?;
351                if !title.is_empty() {
352                    self.write("\" title=\"")?;
353                    escape_html(&mut self.writer, &title)?;
354                }
355                self.write("\">")
356            }
357            Tag::Image {
358                link_type: _,
359                dest_url,
360                title,
361                id: _,
362            } => {
363                self.write("<img src=\"")?;
364                escape_href(&mut self.writer, &dest_url)?;
365                self.write("\" alt=\"")?;
366                self.raw_text()?;
367                if !title.is_empty() {
368                    self.write("\" title=\"")?;
369                    escape_html(&mut self.writer, &title)?;
370                }
371                self.write("\" />")
372            }
373            Tag::FootnoteDefinition(name) => {
374                if self.end_newline {
375                    self.write("<div class=\"footnote-definition\" id=\"")?;
376                } else {
377                    self.write("\n<div class=\"footnote-definition\" id=\"")?;
378                }
379                escape_html(&mut self.writer, &name)?;
380                self.write("\"><sup class=\"footnote-definition-label\">")?;
381                let len = self.numbers.len() + 1;
382                let number = *self.numbers.entry(name).or_insert(len);
383                write!(&mut self.writer, "{}", number)?;
384                self.write("</sup>")
385            }
386            Tag::MetadataBlock(_) => {
387                self.in_non_writing_block = true;
388                Ok(())
389            }
390        }
391    }
392
393    fn end_tag(&mut self, tag: TagEnd) -> Result<(), W::Error> {
394        match tag {
395            TagEnd::HtmlBlock => {}
396            TagEnd::Paragraph => {
397                self.write("</p>\n")?;
398            }
399            TagEnd::Heading(level) => {
400                self.write("</")?;
401                write!(&mut self.writer, "{}", level)?;
402                self.write(">\n")?;
403            }
404            TagEnd::Table => {
405                self.write("</tbody></table>\n")?;
406            }
407            TagEnd::TableHead => {
408                self.write("</tr></thead><tbody>\n")?;
409                self.table_state = TableState::Body;
410            }
411            TagEnd::TableRow => {
412                self.write("</tr>\n")?;
413            }
414            TagEnd::TableCell => {
415                match self.table_state {
416                    TableState::Head => {
417                        self.write("</th>")?;
418                    }
419                    TableState::Body => {
420                        self.write("</td>")?;
421                    }
422                }
423                self.table_cell_index += 1;
424            }
425            TagEnd::BlockQuote(_) => {
426                self.write("</blockquote>\n")?;
427            }
428            TagEnd::CodeBlock => {
429                self.write("</code></pre>\n")?;
430            }
431            TagEnd::List(true) => {
432                self.write("</ol>\n")?;
433            }
434            TagEnd::List(false) => {
435                self.write("</ul>\n")?;
436            }
437            TagEnd::Item => {
438                self.write("</li>\n")?;
439            }
440            TagEnd::DefinitionList => {
441                self.write("</dl>\n")?;
442            }
443            TagEnd::DefinitionListTitle => {
444                self.write("</dt>\n")?;
445            }
446            TagEnd::DefinitionListDefinition => {
447                self.write("</dd>\n")?;
448            }
449            TagEnd::Emphasis => {
450                self.write("</em>")?;
451            }
452            TagEnd::Superscript => {
453                self.write("</sup>")?;
454            }
455            TagEnd::Subscript => {
456                self.write("</sub>")?;
457            }
458            TagEnd::Strong => {
459                self.write("</strong>")?;
460            }
461            TagEnd::Strikethrough => {
462                self.write("</del>")?;
463            }
464            TagEnd::Link => {
465                self.write("</a>")?;
466            }
467            TagEnd::Image => (), // shouldn't happen, handled in start
468            TagEnd::FootnoteDefinition => {
469                self.write("</div>\n")?;
470            }
471            TagEnd::MetadataBlock(_) => {
472                self.in_non_writing_block = false;
473            }
474        }
475        Ok(())
476    }
477
478    // run raw text, consuming end tag
479    fn raw_text(&mut self) -> Result<(), W::Error> {
480        let mut nest = 0;
481        while let Some(event) = self.iter.next() {
482            match event {
483                Start(_) => nest += 1,
484                End(_) => {
485                    if nest == 0 {
486                        break;
487                    }
488                    nest -= 1;
489                }
490                Html(_) => {}
491                InlineHtml(text) | Code(text) | Text(text) => {
492                    // Don't use escape_html_body_text here.
493                    // The output of this function is used in the `alt` attribute.
494                    escape_html(&mut self.writer, &text)?;
495                    self.end_newline = text.ends_with('\n');
496                }
497                InlineMath(text) => {
498                    self.write("$")?;
499                    escape_html(&mut self.writer, &text)?;
500                    self.write("$")?;
501                }
502                DisplayMath(text) => {
503                    self.write("$$")?;
504                    escape_html(&mut self.writer, &text)?;
505                    self.write("$$")?;
506                }
507                SoftBreak | HardBreak | Rule => {
508                    self.write(" ")?;
509                }
510                FootnoteReference(name) => {
511                    let len = self.numbers.len() + 1;
512                    let number = *self.numbers.entry(name).or_insert(len);
513                    write!(&mut self.writer, "[{}]", number)?;
514                }
515                TaskListMarker(true) => self.write("[x]")?,
516                TaskListMarker(false) => self.write("[ ]")?,
517            }
518        }
519        Ok(())
520    }
521}
522
523/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
524/// push it to a `String`.
525///
526/// # Examples
527///
528/// ```
529/// use pulldown_cmark::{html, Parser};
530///
531/// let markdown_str = r#"
532/// hello
533/// =====
534///
535/// * alpha
536/// * beta
537/// "#;
538/// let parser = Parser::new(markdown_str);
539///
540/// let mut html_buf = String::new();
541/// html::push_html(&mut html_buf, parser);
542///
543/// assert_eq!(html_buf, r#"<h1>hello</h1>
544/// <ul>
545/// <li>alpha</li>
546/// <li>beta</li>
547/// </ul>
548/// "#);
549/// ```
550pub fn push_html<'a, I>(s: &mut String, iter: I)
551where
552    I: Iterator<Item = Event<'a>>,
553{
554    write_html_fmt(s, iter).unwrap()
555}
556
557/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
558/// write it out to an I/O stream.
559///
560/// **Note**: using this function with an unbuffered writer like a file or socket
561/// will result in poor performance. Wrap these in a
562/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to
563/// prevent unnecessary slowdowns.
564///
565/// # Examples
566///
567/// ```
568/// use pulldown_cmark::{html, Parser};
569/// use std::io::Cursor;
570///
571/// let markdown_str = r#"
572/// hello
573/// =====
574///
575/// * alpha
576/// * beta
577/// "#;
578/// let mut bytes = Vec::new();
579/// let parser = Parser::new(markdown_str);
580///
581/// html::write_html_io(Cursor::new(&mut bytes), parser);
582///
583/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1>
584/// <ul>
585/// <li>alpha</li>
586/// <li>beta</li>
587/// </ul>
588/// "#);
589/// ```
590pub fn write_html_io<'a, I, W>(writer: W, iter: I) -> std::io::Result<()>
591where
592    I: Iterator<Item = Event<'a>>,
593    W: std::io::Write,
594{
595    HtmlWriter::new(iter, IoWriter(writer)).run()
596}
597
598/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
599/// write it into Unicode-accepting buffer or stream.
600///
601/// # Examples
602///
603/// ```
604/// use pulldown_cmark::{html, Parser};
605///
606/// let markdown_str = r#"
607/// hello
608/// =====
609///
610/// * alpha
611/// * beta
612/// "#;
613/// let mut buf = String::new();
614/// let parser = Parser::new(markdown_str);
615///
616/// html::write_html_fmt(&mut buf, parser);
617///
618/// assert_eq!(buf, r#"<h1>hello</h1>
619/// <ul>
620/// <li>alpha</li>
621/// <li>beta</li>
622/// </ul>
623/// "#);
624/// ```
625pub fn write_html_fmt<'a, I, W>(writer: W, iter: I) -> std::fmt::Result
626where
627    I: Iterator<Item = Event<'a>>,
628    W: std::fmt::Write,
629{
630    HtmlWriter::new(iter, FmtWriter(writer)).run()
631}