pulldown_cmark/
html.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! HTML renderer that takes an iterator of events as input.
22
23use std::collections::HashMap;
24
25use crate::strings::CowStr;
26use crate::Event::*;
27use crate::{Alignment, BlockQuoteKind, CodeBlockKind, Event, LinkType, Tag, TagEnd};
28use pulldown_cmark_escape::{
29    escape_href, escape_html, escape_html_body_text, FmtWriter, IoWriter, StrWrite,
30};
31
32enum TableState {
33    Head,
34    Body,
35}
36
37struct HtmlWriter<'a, I, W> {
38    /// Iterator supplying events.
39    iter: I,
40
41    /// Writer to write to.
42    writer: W,
43
44    /// Whether or not the last write wrote a newline.
45    end_newline: bool,
46
47    /// Whether if inside a metadata block (text should not be written)
48    in_non_writing_block: bool,
49
50    table_state: TableState,
51    table_alignments: Vec<Alignment>,
52    table_cell_index: usize,
53    numbers: HashMap<CowStr<'a>, usize>,
54}
55
56impl<'a, I, W> HtmlWriter<'a, I, W>
57where
58    I: Iterator<Item = Event<'a>>,
59    W: StrWrite,
60{
61    fn new(iter: I, writer: W) -> Self {
62        Self {
63            iter,
64            writer,
65            end_newline: true,
66            in_non_writing_block: false,
67            table_state: TableState::Head,
68            table_alignments: vec![],
69            table_cell_index: 0,
70            numbers: HashMap::new(),
71        }
72    }
73
74    /// Writes a new line.
75    #[inline]
76    fn write_newline(&mut self) -> Result<(), W::Error> {
77        self.end_newline = true;
78        self.writer.write_str("\n")
79    }
80
81    /// Writes a buffer, and tracks whether or not a newline was written.
82    #[inline]
83    fn write(&mut self, s: &str) -> Result<(), W::Error> {
84        self.writer.write_str(s)?;
85
86        if !s.is_empty() {
87            self.end_newline = s.ends_with('\n');
88        }
89        Ok(())
90    }
91
92    fn run(mut self) -> Result<(), W::Error> {
93        while let Some(event) = self.iter.next() {
94            match event {
95                Start(tag) => {
96                    self.start_tag(tag)?;
97                }
98                End(tag) => {
99                    self.end_tag(tag)?;
100                }
101                Text(text) => {
102                    if !self.in_non_writing_block {
103                        escape_html_body_text(&mut self.writer, &text)?;
104                        self.end_newline = text.ends_with('\n');
105                    }
106                }
107                Code(text) => {
108                    self.write("<code>")?;
109                    escape_html_body_text(&mut self.writer, &text)?;
110                    self.write("</code>")?;
111                }
112                InlineMath(text) => {
113                    self.write(r#"<span class="math math-inline">"#)?;
114                    escape_html(&mut self.writer, &text)?;
115                    self.write("</span>")?;
116                }
117                DisplayMath(text) => {
118                    self.write(r#"<span class="math math-display">"#)?;
119                    escape_html(&mut self.writer, &text)?;
120                    self.write("</span>")?;
121                }
122                Html(html) | InlineHtml(html) => {
123                    self.write(&html)?;
124                }
125                SoftBreak => {
126                    self.write_newline()?;
127                }
128                HardBreak => {
129                    self.write("<br />\n")?;
130                }
131                Rule => {
132                    if self.end_newline {
133                        self.write("<hr />\n")?;
134                    } else {
135                        self.write("\n<hr />\n")?;
136                    }
137                }
138                FootnoteReference(name) => {
139                    let len = self.numbers.len() + 1;
140                    self.write("<sup class=\"footnote-reference\"><a href=\"#")?;
141                    escape_html(&mut self.writer, &name)?;
142                    self.write("\">")?;
143                    let number = *self.numbers.entry(name).or_insert(len);
144                    write!(&mut self.writer, "{}", number)?;
145                    self.write("</a></sup>")?;
146                }
147                TaskListMarker(true) => {
148                    self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>\n")?;
149                }
150                TaskListMarker(false) => {
151                    self.write("<input disabled=\"\" type=\"checkbox\"/>\n")?;
152                }
153            }
154        }
155        Ok(())
156    }
157
158    /// Writes the start of an HTML tag.
159    fn start_tag(&mut self, tag: Tag<'a>) -> Result<(), W::Error> {
160        match tag {
161            Tag::HtmlBlock => Ok(()),
162            Tag::Paragraph => {
163                if self.end_newline {
164                    self.write("<p>")
165                } else {
166                    self.write("\n<p>")
167                }
168            }
169            Tag::Heading {
170                level,
171                id,
172                classes,
173                attrs,
174            } => {
175                if self.end_newline {
176                    self.write("<")?;
177                } else {
178                    self.write("\n<")?;
179                }
180                write!(&mut self.writer, "{}", level)?;
181                if let Some(id) = id {
182                    self.write(" id=\"")?;
183                    escape_html(&mut self.writer, &id)?;
184                    self.write("\"")?;
185                }
186                let mut classes = classes.iter();
187                if let Some(class) = classes.next() {
188                    self.write(" class=\"")?;
189                    escape_html(&mut self.writer, class)?;
190                    for class in classes {
191                        self.write(" ")?;
192                        escape_html(&mut self.writer, class)?;
193                    }
194                    self.write("\"")?;
195                }
196                for (attr, value) in attrs {
197                    self.write(" ")?;
198                    escape_html(&mut self.writer, &attr)?;
199                    if let Some(val) = value {
200                        self.write("=\"")?;
201                        escape_html(&mut self.writer, &val)?;
202                        self.write("\"")?;
203                    } else {
204                        self.write("=\"\"")?;
205                    }
206                }
207                self.write(">")
208            }
209            Tag::Table(alignments) => {
210                self.table_alignments = alignments;
211                self.write("<table>")
212            }
213            Tag::TableHead => {
214                self.table_state = TableState::Head;
215                self.table_cell_index = 0;
216                self.write("<thead><tr>")
217            }
218            Tag::TableRow => {
219                self.table_cell_index = 0;
220                self.write("<tr>")
221            }
222            Tag::TableCell => {
223                match self.table_state {
224                    TableState::Head => {
225                        self.write("<th")?;
226                    }
227                    TableState::Body => {
228                        self.write("<td")?;
229                    }
230                }
231                match self.table_alignments.get(self.table_cell_index) {
232                    Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"),
233                    Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"),
234                    Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"),
235                    _ => self.write(">"),
236                }
237            }
238            Tag::BlockQuote(kind) => {
239                let class_str = match kind {
240                    None => "",
241                    Some(kind) => match kind {
242                        BlockQuoteKind::Note => " class=\"markdown-alert-note\"",
243                        BlockQuoteKind::Tip => " class=\"markdown-alert-tip\"",
244                        BlockQuoteKind::Important => " class=\"markdown-alert-important\"",
245                        BlockQuoteKind::Warning => " class=\"markdown-alert-warning\"",
246                        BlockQuoteKind::Caution => " class=\"markdown-alert-caution\"",
247                    },
248                };
249                if self.end_newline {
250                    self.write(&format!("<blockquote{}>\n", class_str))
251                } else {
252                    self.write(&format!("\n<blockquote{}>\n", class_str))
253                }
254            }
255            Tag::CodeBlock(info) => {
256                if !self.end_newline {
257                    self.write_newline()?;
258                }
259                match info {
260                    CodeBlockKind::Fenced(info) => {
261                        let lang = info.split(' ').next().unwrap();
262                        if lang.is_empty() {
263                            self.write("<pre><code>")
264                        } else {
265                            self.write("<pre><code class=\"language-")?;
266                            escape_html(&mut self.writer, lang)?;
267                            self.write("\">")
268                        }
269                    }
270                    CodeBlockKind::Indented => self.write("<pre><code>"),
271                }
272            }
273            Tag::List(Some(1)) => {
274                if self.end_newline {
275                    self.write("<ol>\n")
276                } else {
277                    self.write("\n<ol>\n")
278                }
279            }
280            Tag::List(Some(start)) => {
281                if self.end_newline {
282                    self.write("<ol start=\"")?;
283                } else {
284                    self.write("\n<ol start=\"")?;
285                }
286                write!(&mut self.writer, "{}", start)?;
287                self.write("\">\n")
288            }
289            Tag::List(None) => {
290                if self.end_newline {
291                    self.write("<ul>\n")
292                } else {
293                    self.write("\n<ul>\n")
294                }
295            }
296            Tag::Item => {
297                if self.end_newline {
298                    self.write("<li>")
299                } else {
300                    self.write("\n<li>")
301                }
302            }
303            Tag::DefinitionList => {
304                if self.end_newline {
305                    self.write("<dl>\n")
306                } else {
307                    self.write("\n<dl>\n")
308                }
309            }
310            Tag::DefinitionListTitle => {
311                if self.end_newline {
312                    self.write("<dt>")
313                } else {
314                    self.write("\n<dt>")
315                }
316            }
317            Tag::DefinitionListDefinition => {
318                if self.end_newline {
319                    self.write("<dd>")
320                } else {
321                    self.write("\n<dd>")
322                }
323            }
324            Tag::Emphasis => self.write("<em>"),
325            Tag::Strong => self.write("<strong>"),
326            Tag::Strikethrough => self.write("<del>"),
327            Tag::Link {
328                link_type: LinkType::Email,
329                dest_url,
330                title,
331                id: _,
332            } => {
333                self.write("<a href=\"mailto:")?;
334                escape_href(&mut self.writer, &dest_url)?;
335                if !title.is_empty() {
336                    self.write("\" title=\"")?;
337                    escape_html(&mut self.writer, &title)?;
338                }
339                self.write("\">")
340            }
341            Tag::Link {
342                link_type: _,
343                dest_url,
344                title,
345                id: _,
346            } => {
347                self.write("<a href=\"")?;
348                escape_href(&mut self.writer, &dest_url)?;
349                if !title.is_empty() {
350                    self.write("\" title=\"")?;
351                    escape_html(&mut self.writer, &title)?;
352                }
353                self.write("\">")
354            }
355            Tag::Image {
356                link_type: _,
357                dest_url,
358                title,
359                id: _,
360            } => {
361                self.write("<img src=\"")?;
362                escape_href(&mut self.writer, &dest_url)?;
363                self.write("\" alt=\"")?;
364                self.raw_text()?;
365                if !title.is_empty() {
366                    self.write("\" title=\"")?;
367                    escape_html(&mut self.writer, &title)?;
368                }
369                self.write("\" />")
370            }
371            Tag::FootnoteDefinition(name) => {
372                if self.end_newline {
373                    self.write("<div class=\"footnote-definition\" id=\"")?;
374                } else {
375                    self.write("\n<div class=\"footnote-definition\" id=\"")?;
376                }
377                escape_html(&mut self.writer, &name)?;
378                self.write("\"><sup class=\"footnote-definition-label\">")?;
379                let len = self.numbers.len() + 1;
380                let number = *self.numbers.entry(name).or_insert(len);
381                write!(&mut self.writer, "{}", number)?;
382                self.write("</sup>")
383            }
384            Tag::MetadataBlock(_) => {
385                self.in_non_writing_block = true;
386                Ok(())
387            }
388        }
389    }
390
391    fn end_tag(&mut self, tag: TagEnd) -> Result<(), W::Error> {
392        match tag {
393            TagEnd::HtmlBlock => {}
394            TagEnd::Paragraph => {
395                self.write("</p>\n")?;
396            }
397            TagEnd::Heading(level) => {
398                self.write("</")?;
399                write!(&mut self.writer, "{}", level)?;
400                self.write(">\n")?;
401            }
402            TagEnd::Table => {
403                self.write("</tbody></table>\n")?;
404            }
405            TagEnd::TableHead => {
406                self.write("</tr></thead><tbody>\n")?;
407                self.table_state = TableState::Body;
408            }
409            TagEnd::TableRow => {
410                self.write("</tr>\n")?;
411            }
412            TagEnd::TableCell => {
413                match self.table_state {
414                    TableState::Head => {
415                        self.write("</th>")?;
416                    }
417                    TableState::Body => {
418                        self.write("</td>")?;
419                    }
420                }
421                self.table_cell_index += 1;
422            }
423            TagEnd::BlockQuote(_) => {
424                self.write("</blockquote>\n")?;
425            }
426            TagEnd::CodeBlock => {
427                self.write("</code></pre>\n")?;
428            }
429            TagEnd::List(true) => {
430                self.write("</ol>\n")?;
431            }
432            TagEnd::List(false) => {
433                self.write("</ul>\n")?;
434            }
435            TagEnd::Item => {
436                self.write("</li>\n")?;
437            }
438            TagEnd::DefinitionList => {
439                self.write("</dl>\n")?;
440            }
441            TagEnd::DefinitionListTitle => {
442                self.write("</dt>\n")?;
443            }
444            TagEnd::DefinitionListDefinition => {
445                self.write("</dd>\n")?;
446            }
447            TagEnd::Emphasis => {
448                self.write("</em>")?;
449            }
450            TagEnd::Strong => {
451                self.write("</strong>")?;
452            }
453            TagEnd::Strikethrough => {
454                self.write("</del>")?;
455            }
456            TagEnd::Link => {
457                self.write("</a>")?;
458            }
459            TagEnd::Image => (), // shouldn't happen, handled in start
460            TagEnd::FootnoteDefinition => {
461                self.write("</div>\n")?;
462            }
463            TagEnd::MetadataBlock(_) => {
464                self.in_non_writing_block = false;
465            }
466        }
467        Ok(())
468    }
469
470    // run raw text, consuming end tag
471    fn raw_text(&mut self) -> Result<(), W::Error> {
472        let mut nest = 0;
473        while let Some(event) = self.iter.next() {
474            match event {
475                Start(_) => nest += 1,
476                End(_) => {
477                    if nest == 0 {
478                        break;
479                    }
480                    nest -= 1;
481                }
482                Html(_) => {}
483                InlineHtml(text) | Code(text) | Text(text) => {
484                    // Don't use escape_html_body_text here.
485                    // The output of this function is used in the `alt` attribute.
486                    escape_html(&mut self.writer, &text)?;
487                    self.end_newline = text.ends_with('\n');
488                }
489                InlineMath(text) => {
490                    self.write("$")?;
491                    escape_html(&mut self.writer, &text)?;
492                    self.write("$")?;
493                }
494                DisplayMath(text) => {
495                    self.write("$$")?;
496                    escape_html(&mut self.writer, &text)?;
497                    self.write("$$")?;
498                }
499                SoftBreak | HardBreak | Rule => {
500                    self.write(" ")?;
501                }
502                FootnoteReference(name) => {
503                    let len = self.numbers.len() + 1;
504                    let number = *self.numbers.entry(name).or_insert(len);
505                    write!(&mut self.writer, "[{}]", number)?;
506                }
507                TaskListMarker(true) => self.write("[x]")?,
508                TaskListMarker(false) => self.write("[ ]")?,
509            }
510        }
511        Ok(())
512    }
513}
514
515/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
516/// push it to a `String`.
517///
518/// # Examples
519///
520/// ```
521/// use pulldown_cmark::{html, Parser};
522///
523/// let markdown_str = r#"
524/// hello
525/// =====
526///
527/// * alpha
528/// * beta
529/// "#;
530/// let parser = Parser::new(markdown_str);
531///
532/// let mut html_buf = String::new();
533/// html::push_html(&mut html_buf, parser);
534///
535/// assert_eq!(html_buf, r#"<h1>hello</h1>
536/// <ul>
537/// <li>alpha</li>
538/// <li>beta</li>
539/// </ul>
540/// "#);
541/// ```
542pub fn push_html<'a, I>(s: &mut String, iter: I)
543where
544    I: Iterator<Item = Event<'a>>,
545{
546    write_html_fmt(s, iter).unwrap()
547}
548
549/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
550/// write it out to an I/O stream.
551///
552/// **Note**: using this function with an unbuffered writer like a file or socket
553/// will result in poor performance. Wrap these in a
554/// [`BufWriter`](https://doc.rust-lang.org/std/io/struct.BufWriter.html) to
555/// prevent unnecessary slowdowns.
556///
557/// # Examples
558///
559/// ```
560/// use pulldown_cmark::{html, Parser};
561/// use std::io::Cursor;
562///
563/// let markdown_str = r#"
564/// hello
565/// =====
566///
567/// * alpha
568/// * beta
569/// "#;
570/// let mut bytes = Vec::new();
571/// let parser = Parser::new(markdown_str);
572///
573/// html::write_html_io(Cursor::new(&mut bytes), parser);
574///
575/// assert_eq!(&String::from_utf8_lossy(&bytes)[..], r#"<h1>hello</h1>
576/// <ul>
577/// <li>alpha</li>
578/// <li>beta</li>
579/// </ul>
580/// "#);
581/// ```
582pub fn write_html_io<'a, I, W>(writer: W, iter: I) -> std::io::Result<()>
583where
584    I: Iterator<Item = Event<'a>>,
585    W: std::io::Write,
586{
587    HtmlWriter::new(iter, IoWriter(writer)).run()
588}
589
590/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
591/// write it into Unicode-accepting buffer or stream.
592///
593/// # Examples
594///
595/// ```
596/// use pulldown_cmark::{html, Parser};
597///
598/// let markdown_str = r#"
599/// hello
600/// =====
601///
602/// * alpha
603/// * beta
604/// "#;
605/// let mut buf = String::new();
606/// let parser = Parser::new(markdown_str);
607///
608/// html::write_html_fmt(&mut buf, parser);
609///
610/// assert_eq!(buf, r#"<h1>hello</h1>
611/// <ul>
612/// <li>alpha</li>
613/// <li>beta</li>
614/// </ul>
615/// "#);
616/// ```
617pub fn write_html_fmt<'a, I, W>(writer: W, iter: I) -> std::fmt::Result
618where
619    I: Iterator<Item = Event<'a>>,
620    W: std::fmt::Write,
621{
622    HtmlWriter::new(iter, FmtWriter(writer)).run()
623}