pulldown_cmark/
parse.rs

1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Tree-based two pass parser.
22
23use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::iter::FusedIterator;
26use std::num::NonZeroUsize;
27use std::ops::{Index, Range};
28
29use unicase::UniCase;
30
31use crate::firstpass::run_first_pass;
32use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
33use crate::scanners::*;
34use crate::strings::CowStr;
35use crate::tree::{Tree, TreeIndex};
36use crate::{
37    Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, LinkType, MetadataBlockKind,
38    Options, Tag, TagEnd,
39};
40
41// Allowing arbitrary depth nested parentheses inside link destinations
42// can create denial of service vulnerabilities if we're not careful.
43// The simplest countermeasure is to limit their depth, which is
44// explicitly allowed by the spec as long as the limit is at least 3:
45// https://spec.commonmark.org/0.29/#link-destination
46pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
47
48#[derive(Debug, Default, Clone, Copy)]
49pub(crate) struct Item {
50    pub start: usize,
51    pub end: usize,
52    pub body: ItemBody,
53}
54
55#[derive(Debug, PartialEq, Clone, Copy, Default)]
56pub(crate) enum ItemBody {
57    // These are possible inline items, need to be resolved in second pass.
58
59    // repeats, can_open, can_close
60    MaybeEmphasis(usize, bool, bool),
61    // can_open, can_close, brace context
62    MaybeMath(bool, bool, u8),
63    // quote byte, can_open, can_close
64    MaybeSmartQuote(u8, bool, bool),
65    MaybeCode(usize, bool), // number of backticks, preceded by backslash
66    MaybeHtml,
67    MaybeLinkOpen,
68    // bool indicates whether or not the preceding section could be a reference
69    MaybeLinkClose(bool),
70    MaybeImage,
71
72    // These are inline items after resolution.
73    Emphasis,
74    Strong,
75    Strikethrough,
76    Math(CowIndex, bool), // true for display math
77    Code(CowIndex),
78    Link(LinkIndex),
79    Image(LinkIndex),
80    FootnoteReference(CowIndex),
81    TaskListMarker(bool), // true for checked
82
83    // These are also inline items.
84    InlineHtml,
85    OwnedInlineHtml(CowIndex),
86    SynthesizeText(CowIndex),
87    SynthesizeChar(char),
88    Html,
89    Text {
90        backslash_escaped: bool,
91    },
92    SoftBreak,
93    // true = is backlash
94    HardBreak(bool),
95
96    // Dummy node at the top of the tree - should not be used otherwise!
97    #[default]
98    Root,
99
100    // These are block items.
101    Paragraph,
102    Rule,
103    Heading(HeadingLevel, Option<HeadingIndex>), // heading level
104    FencedCodeBlock(CowIndex),
105    IndentCodeBlock,
106    HtmlBlock,
107    BlockQuote(Option<BlockQuoteKind>),
108    List(bool, u8, u64), // is_tight, list character, list start index
109    ListItem(usize),     // indent level
110    FootnoteDefinition(CowIndex),
111    MetadataBlock(MetadataBlockKind),
112
113    // Definition lists
114    DefinitionList(bool), // is_tight
115    // gets turned into either a paragraph or a definition list title,
116    // depending on whether there's a definition after it
117    MaybeDefinitionListTitle,
118    DefinitionListTitle,
119    DefinitionListDefinition(usize),
120
121    // Tables
122    Table(AlignmentIndex),
123    TableHead,
124    TableRow,
125    TableCell,
126}
127
128impl ItemBody {
129    fn is_maybe_inline(&self) -> bool {
130        use ItemBody::*;
131        matches!(
132            *self,
133            MaybeEmphasis(..)
134                | MaybeMath(..)
135                | MaybeSmartQuote(..)
136                | MaybeCode(..)
137                | MaybeHtml
138                | MaybeLinkOpen
139                | MaybeLinkClose(..)
140                | MaybeImage
141        )
142    }
143    fn is_inline(&self) -> bool {
144        use ItemBody::*;
145        matches!(
146            *self,
147            MaybeEmphasis(..)
148                | MaybeMath(..)
149                | MaybeSmartQuote(..)
150                | MaybeCode(..)
151                | MaybeHtml
152                | MaybeLinkOpen
153                | MaybeLinkClose(..)
154                | MaybeImage
155                | Emphasis
156                | Strong
157                | Strikethrough
158                | Math(..)
159                | Code(..)
160                | Link(..)
161                | Image(..)
162                | FootnoteReference(..)
163                | TaskListMarker(..)
164                | InlineHtml
165                | OwnedInlineHtml(..)
166                | SynthesizeText(..)
167                | SynthesizeChar(..)
168                | Html
169                | Text { .. }
170                | SoftBreak
171                | HardBreak(..)
172        )
173    }
174    fn is_block(&self) -> bool {
175        !self.is_inline()
176    }
177}
178
179#[derive(Debug)]
180pub struct BrokenLink<'a> {
181    pub span: std::ops::Range<usize>,
182    pub link_type: LinkType,
183    pub reference: CowStr<'a>,
184}
185
186/// Markdown event iterator.
187pub struct Parser<'input, F = DefaultBrokenLinkCallback> {
188    text: &'input str,
189    options: Options,
190    tree: Tree<Item>,
191    allocs: Allocations<'input>,
192    broken_link_callback: Option<F>,
193    html_scan_guard: HtmlScanGuard,
194
195    // https://github.com/pulldown-cmark/pulldown-cmark/issues/844
196    // Consider this example:
197    //
198    //     [x]: xxx...
199    //     [x]
200    //     [x]
201    //     [x]
202    //
203    // Which expands to this HTML:
204    //
205    //     <a href="xxx...">x</a>
206    //     <a href="xxx...">x</a>
207    //     <a href="xxx...">x</a>
208    //
209    // This is quadratic growth, because it's filling in the area of a square.
210    // To prevent this, track how much it's expanded and limit it.
211    link_ref_expansion_limit: usize,
212
213    // used by inline passes. store them here for reuse
214    inline_stack: InlineStack,
215    link_stack: LinkStack,
216    code_delims: CodeDelims,
217    math_delims: MathDelims,
218}
219
220impl<'input, F> std::fmt::Debug for Parser<'input, F> {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        // Only print the fields that have public types.
223        f.debug_struct("Parser")
224            .field("text", &self.text)
225            .field("options", &self.options)
226            .field(
227                "broken_link_callback",
228                &self.broken_link_callback.as_ref().map(|_| ..),
229            )
230            .finish()
231    }
232}
233
234impl<'a> BrokenLink<'a> {
235    /// Moves the link into version with a static lifetime.
236    ///
237    /// The `reference` member is cloned to a Boxed or Inline version.
238    pub fn into_static(self) -> BrokenLink<'static> {
239        BrokenLink {
240            span: self.span.clone(),
241            link_type: self.link_type,
242            reference: self.reference.into_string().into(),
243        }
244    }
245}
246
247impl<'input> Parser<'input, DefaultBrokenLinkCallback> {
248    /// Creates a new event iterator for a markdown string without any options enabled.
249    pub fn new(text: &'input str) -> Self {
250        Self::new_ext(text, Options::empty())
251    }
252
253    /// Creates a new event iterator for a markdown string with given options.
254    pub fn new_ext(text: &'input str, options: Options) -> Self {
255        Self::new_with_broken_link_callback(text, options, None)
256    }
257}
258
259impl<'input, F: BrokenLinkCallback<'input>> Parser<'input, F> {
260    /// In case the parser encounters any potential links that have a broken
261    /// reference (e.g `[foo]` when there is no `[foo]: ` entry at the bottom)
262    /// the provided callback will be called with the reference name,
263    /// and the returned pair will be used as the link URL and title if it is not
264    /// `None`.
265    pub fn new_with_broken_link_callback(
266        text: &'input str,
267        options: Options,
268        broken_link_callback: Option<F>,
269    ) -> Self {
270        let (mut tree, allocs) = run_first_pass(text, options);
271        tree.reset();
272        let inline_stack = Default::default();
273        let link_stack = Default::default();
274        let html_scan_guard = Default::default();
275        Parser {
276            text,
277            options,
278            tree,
279            allocs,
280            broken_link_callback,
281            inline_stack,
282            link_stack,
283            html_scan_guard,
284            // always allow 100KiB
285            link_ref_expansion_limit: text.len().max(100_000),
286            code_delims: CodeDelims::new(),
287            math_delims: MathDelims::new(),
288        }
289    }
290
291    /// Returns a reference to the internal `RefDefs` object, which provides access
292    /// to the internal map of reference definitions.
293    pub fn reference_definitions(&self) -> &RefDefs<'_> {
294        &self.allocs.refdefs
295    }
296
297    /// Use a link label to fetch a type, url, and title.
298    ///
299    /// This function enforces the [`link_ref_expansion_limit`].
300    /// If it returns Some, it also consumes some of the fuel.
301    /// If we're out of fuel, it immediately returns None.
302    ///
303    /// The URL and title are found in the [`RefDefs`] map.
304    /// If they're not there, and a callback was provided by the user,
305    /// the [`broken_link_callback`] will be invoked and given the opportunity
306    /// to provide a fallback.
307    ///
308    /// The link type (that's "link" or "image") depends on the usage site, and
309    /// is provided by the caller of this function.
310    /// This function returns a new one because, if it has to invoke a callback
311    /// to find the information, the link type is [mapped to an unknown type].
312    ///
313    /// [mapped to an unknown type]: crate::LinkType::to_unknown
314    /// [`link_ref_expansion_limit`]: Self::link_ref_expansion_limit
315    /// [`broken_link_callback`]: Self::broken_link_callback
316    fn fetch_link_type_url_title(
317        &mut self,
318        link_label: CowStr<'input>,
319        span: Range<usize>,
320        link_type: LinkType,
321    ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
322        if self.link_ref_expansion_limit == 0 {
323            return None;
324        }
325
326        let (link_type, url, title) = self
327            .allocs
328            .refdefs
329            .get(link_label.as_ref())
330            .map(|matching_def| {
331                // found a matching definition!
332                let title = matching_def
333                    .title
334                    .as_ref()
335                    .cloned()
336                    .unwrap_or_else(|| "".into());
337                let url = matching_def.dest.clone();
338                (link_type, url, title)
339            })
340            .or_else(|| {
341                match self.broken_link_callback.as_mut() {
342                    Some(callback) => {
343                        // Construct a BrokenLink struct, which will be passed to the callback
344                        let broken_link = BrokenLink {
345                            span,
346                            link_type,
347                            reference: link_label,
348                        };
349
350                        callback
351                            .handle_broken_link(broken_link)
352                            .map(|(url, title)| (link_type.to_unknown(), url, title))
353                    }
354                    None => None,
355                }
356            })?;
357
358        // Limit expansion from link references.
359        // This isn't a problem for footnotes, because multiple references to the same one
360        // reuse the same node, but links/images get their HREF/SRC copied.
361        self.link_ref_expansion_limit = self
362            .link_ref_expansion_limit
363            .saturating_sub(url.len() + title.len());
364
365        Some((link_type, url, title))
366    }
367
368    /// Handle inline markup.
369    ///
370    /// When the parser encounters any item indicating potential inline markup, all
371    /// inline markup passes are run on the remainder of the chain.
372    ///
373    /// Note: there's some potential for optimization here, but that's future work.
374    fn handle_inline(&mut self) {
375        self.handle_inline_pass1();
376        self.handle_emphasis_and_hard_break();
377    }
378
379    /// Handle inline HTML, code spans, and links.
380    ///
381    /// This function handles both inline HTML and code spans, because they have
382    /// the same precedence. It also handles links, even though they have lower
383    /// precedence, because the URL of links must not be processed.
384    fn handle_inline_pass1(&mut self) {
385        let mut cur = self.tree.cur();
386        let mut prev = None;
387
388        let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
389        let block_text = &self.text[..block_end];
390
391        while let Some(mut cur_ix) = cur {
392            match self.tree[cur_ix].item.body {
393                ItemBody::MaybeHtml => {
394                    let next = self.tree[cur_ix].next;
395                    let autolink = if let Some(next_ix) = next {
396                        scan_autolink(block_text, self.tree[next_ix].item.start)
397                    } else {
398                        None
399                    };
400
401                    if let Some((ix, uri, link_type)) = autolink {
402                        let node = scan_nodes_to_ix(&self.tree, next, ix);
403                        let text_node = self.tree.create_node(Item {
404                            start: self.tree[cur_ix].item.start + 1,
405                            end: ix - 1,
406                            body: ItemBody::Text {
407                                backslash_escaped: false,
408                            },
409                        });
410                        let link_ix =
411                            self.allocs
412                                .allocate_link(link_type, uri, "".into(), "".into());
413                        self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
414                        self.tree[cur_ix].item.end = ix;
415                        self.tree[cur_ix].next = node;
416                        self.tree[cur_ix].child = Some(text_node);
417                        prev = cur;
418                        cur = node;
419                        if let Some(node_ix) = cur {
420                            self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
421                        }
422                        continue;
423                    } else {
424                        let inline_html = next.and_then(|next_ix| {
425                            self.scan_inline_html(
426                                block_text.as_bytes(),
427                                self.tree[next_ix].item.start,
428                            )
429                        });
430                        if let Some((span, ix)) = inline_html {
431                            let node = scan_nodes_to_ix(&self.tree, next, ix);
432                            self.tree[cur_ix].item.body = if !span.is_empty() {
433                                let converted_string =
434                                    String::from_utf8(span).expect("invalid utf8");
435                                ItemBody::OwnedInlineHtml(
436                                    self.allocs.allocate_cow(converted_string.into()),
437                                )
438                            } else {
439                                ItemBody::InlineHtml
440                            };
441                            self.tree[cur_ix].item.end = ix;
442                            self.tree[cur_ix].next = node;
443                            prev = cur;
444                            cur = node;
445                            if let Some(node_ix) = cur {
446                                self.tree[node_ix].item.start =
447                                    max(self.tree[node_ix].item.start, ix);
448                            }
449                            continue;
450                        }
451                    }
452                    self.tree[cur_ix].item.body = ItemBody::Text {
453                        backslash_escaped: false,
454                    };
455                }
456                ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
457                    if !can_open {
458                        self.tree[cur_ix].item.body = ItemBody::Text {
459                            backslash_escaped: false,
460                        };
461                        prev = cur;
462                        cur = self.tree[cur_ix].next;
463                        continue;
464                    }
465                    let is_display = self.tree[cur_ix].next.map_or(false, |next_ix| {
466                        matches!(
467                            self.tree[next_ix].item.body,
468                            ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
469                        )
470                    });
471                    let result = if self.math_delims.is_populated() {
472                        // we have previously scanned all math environment delimiters,
473                        // so we can reuse that work
474                        self.math_delims
475                            .find(&self.tree, cur_ix, is_display, brace_context)
476                    } else {
477                        // we haven't previously scanned all math delimiters,
478                        // so walk the AST
479                        let mut scan = self.tree[cur_ix].next;
480                        if is_display {
481                            // a display delimiter, `$$`, is actually two delimiters
482                            // skip the second one
483                            scan = self.tree[scan.unwrap()].next;
484                        }
485                        let mut invalid = false;
486                        while let Some(scan_ix) = scan {
487                            if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
488                                self.tree[scan_ix].item.body
489                            {
490                                let delim_is_display =
491                                    self.tree[scan_ix].next.map_or(false, |next_ix| {
492                                        matches!(
493                                            self.tree[next_ix].item.body,
494                                            ItemBody::MaybeMath(
495                                                _can_open,
496                                                _can_close,
497                                                _brace_context
498                                            )
499                                        )
500                                    });
501                                if !invalid && delim_brace_context == brace_context {
502                                    if (!is_display && can_close)
503                                        || (is_display && delim_is_display)
504                                    {
505                                        // This will skip ahead past everything we
506                                        // just inserted. Needed for correctness to
507                                        // ensure that a new scan is done after this item.
508                                        self.math_delims.clear();
509                                        break;
510                                    } else {
511                                        // Math cannot contain $, so the current item
512                                        // is invalid. Keep scanning to fill math_delims.
513                                        invalid = true;
514                                    }
515                                }
516                                self.math_delims.insert(
517                                    delim_is_display,
518                                    delim_brace_context,
519                                    scan_ix,
520                                    can_close,
521                                );
522                            }
523                            if self.tree[scan_ix].item.body.is_block() {
524                                // If this is a tight list, blocks and inlines might be
525                                // siblings. Inlines can't cross the boundary like that.
526                                scan = None;
527                                break;
528                            }
529                            scan = self.tree[scan_ix].next;
530                        }
531                        scan
532                    };
533
534                    if let Some(scan_ix) = result {
535                        self.make_math_span(cur_ix, scan_ix);
536                    } else {
537                        self.tree[cur_ix].item.body = ItemBody::Text {
538                            backslash_escaped: false,
539                        };
540                    }
541                }
542                ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
543                    if preceded_by_backslash {
544                        search_count -= 1;
545                        if search_count == 0 {
546                            self.tree[cur_ix].item.body = ItemBody::Text {
547                                backslash_escaped: false,
548                            };
549                            prev = cur;
550                            cur = self.tree[cur_ix].next;
551                            continue;
552                        }
553                    }
554
555                    if self.code_delims.is_populated() {
556                        // we have previously scanned all codeblock delimiters,
557                        // so we can reuse that work
558                        if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
559                            self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
560                        } else {
561                            self.tree[cur_ix].item.body = ItemBody::Text {
562                                backslash_escaped: false,
563                            };
564                        }
565                    } else {
566                        // we haven't previously scanned all codeblock delimiters,
567                        // so walk the AST
568                        let mut scan = if search_count > 0 {
569                            self.tree[cur_ix].next
570                        } else {
571                            None
572                        };
573                        while let Some(scan_ix) = scan {
574                            if let ItemBody::MaybeCode(delim_count, _) =
575                                self.tree[scan_ix].item.body
576                            {
577                                if search_count == delim_count {
578                                    self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
579                                    self.code_delims.clear();
580                                    break;
581                                } else {
582                                    self.code_delims.insert(delim_count, scan_ix);
583                                }
584                            }
585                            if self.tree[scan_ix].item.body.is_block() {
586                                // If this is a tight list, blocks and inlines might be
587                                // siblings. Inlines can't cross the boundary like that.
588                                scan = None;
589                                break;
590                            }
591                            scan = self.tree[scan_ix].next;
592                        }
593                        if scan.is_none() {
594                            self.tree[cur_ix].item.body = ItemBody::Text {
595                                backslash_escaped: false,
596                            };
597                        }
598                    }
599                }
600                ItemBody::MaybeLinkOpen => {
601                    self.tree[cur_ix].item.body = ItemBody::Text {
602                        backslash_escaped: false,
603                    };
604                    self.link_stack.push(LinkStackEl {
605                        node: cur_ix,
606                        ty: LinkStackTy::Link,
607                    });
608                }
609                ItemBody::MaybeImage => {
610                    self.tree[cur_ix].item.body = ItemBody::Text {
611                        backslash_escaped: false,
612                    };
613                    self.link_stack.push(LinkStackEl {
614                        node: cur_ix,
615                        ty: LinkStackTy::Image,
616                    });
617                }
618                ItemBody::MaybeLinkClose(could_be_ref) => {
619                    self.tree[cur_ix].item.body = ItemBody::Text {
620                        backslash_escaped: false,
621                    };
622                    if let Some(tos) = self.link_stack.pop() {
623                        if tos.ty == LinkStackTy::Disabled {
624                            continue;
625                        }
626                        let next = self.tree[cur_ix].next;
627                        if let Some((next_ix, url, title)) =
628                            self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
629                        {
630                            let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
631                            if let Some(prev_ix) = prev {
632                                self.tree[prev_ix].next = None;
633                            }
634                            cur = Some(tos.node);
635                            cur_ix = tos.node;
636                            let link_ix =
637                                self.allocs
638                                    .allocate_link(LinkType::Inline, url, title, "".into());
639                            self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
640                                ItemBody::Image(link_ix)
641                            } else {
642                                ItemBody::Link(link_ix)
643                            };
644                            self.tree[cur_ix].child = self.tree[cur_ix].next;
645                            self.tree[cur_ix].next = next_node;
646                            self.tree[cur_ix].item.end = next_ix;
647                            if let Some(next_node_ix) = next_node {
648                                self.tree[next_node_ix].item.start =
649                                    max(self.tree[next_node_ix].item.start, next_ix);
650                            }
651
652                            if tos.ty == LinkStackTy::Link {
653                                self.link_stack.disable_all_links();
654                            }
655                        } else {
656                            // ok, so its not an inline link. maybe it is a reference
657                            // to a defined link?
658                            let scan_result = scan_reference(
659                                &self.tree,
660                                block_text,
661                                next,
662                                self.options.contains(Options::ENABLE_FOOTNOTES),
663                                self.options.has_gfm_footnotes(),
664                            );
665                            let (node_after_link, link_type) = match scan_result {
666                                // [label][reference]
667                                RefScan::LinkLabel(_, end_ix) => {
668                                    // Toggle reference viability of the last closing bracket,
669                                    // so that we can skip it on future iterations in case
670                                    // it fails in this one. In particular, we won't call
671                                    // the broken link callback twice on one reference.
672                                    let reference_close_node = if let Some(node) =
673                                        scan_nodes_to_ix(&self.tree, next, end_ix - 1)
674                                    {
675                                        node
676                                    } else {
677                                        continue;
678                                    };
679                                    self.tree[reference_close_node].item.body =
680                                        ItemBody::MaybeLinkClose(false);
681                                    let next_node = self.tree[reference_close_node].next;
682
683                                    (next_node, LinkType::Reference)
684                                }
685                                // [reference][]
686                                RefScan::Collapsed(next_node) => {
687                                    // This reference has already been tried, and it's not
688                                    // valid. Skip it.
689                                    if !could_be_ref {
690                                        continue;
691                                    }
692                                    (next_node, LinkType::Collapsed)
693                                }
694                                // [shortcut]
695                                //
696                                // [shortcut]: /blah
697                                RefScan::Failed | RefScan::UnexpectedFootnote => {
698                                    if !could_be_ref {
699                                        continue;
700                                    }
701                                    (next, LinkType::Shortcut)
702                                }
703                            };
704
705                            // FIXME: references and labels are mixed in the naming of variables
706                            // below. Disambiguate!
707
708                            // (label, source_ix end)
709                            let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
710                                RefScan::LinkLabel(l, end_ix) => {
711                                    Some((ReferenceLabel::Link(l), end_ix))
712                                }
713                                RefScan::Collapsed(..)
714                                | RefScan::Failed
715                                | RefScan::UnexpectedFootnote => {
716                                    // No label? maybe it is a shortcut reference
717                                    let label_start = self.tree[tos.node].item.end - 1;
718                                    let label_end = self.tree[cur_ix].item.end;
719                                    scan_link_label(
720                                        &self.tree,
721                                        &self.text[label_start..label_end],
722                                        self.options.contains(Options::ENABLE_FOOTNOTES),
723                                        self.options.has_gfm_footnotes(),
724                                    )
725                                    .map(|(ix, label)| (label, label_start + ix))
726                                    .filter(|(_, end)| *end == label_end)
727                                }
728                            };
729
730                            let id = match &label {
731                                Some(
732                                    (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
733                                ) => l.clone(),
734                                None => "".into(),
735                            };
736
737                            // see if it's a footnote reference
738                            if let Some((ReferenceLabel::Footnote(l), end)) = label {
739                                let footref = self.allocs.allocate_cow(l);
740                                if let Some(def) = self
741                                    .allocs
742                                    .footdefs
743                                    .get_mut(self.allocs.cows[footref.0].to_owned())
744                                {
745                                    def.use_count += 1;
746                                }
747                                if !self.options.has_gfm_footnotes()
748                                    || self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
749                                {
750                                    // If this came from a MaybeImage, then the `!` prefix
751                                    // isn't part of the footnote reference.
752                                    let footnote_ix = if tos.ty == LinkStackTy::Image {
753                                        self.tree[tos.node].next = Some(cur_ix);
754                                        self.tree[tos.node].child = None;
755                                        self.tree[tos.node].item.body =
756                                            ItemBody::SynthesizeChar('!');
757                                        self.tree[cur_ix].item.start =
758                                            self.tree[tos.node].item.start + 1;
759                                        self.tree[tos.node].item.end =
760                                            self.tree[tos.node].item.start + 1;
761                                        cur_ix
762                                    } else {
763                                        tos.node
764                                    };
765                                    // use `next` instead of `node_after_link` because
766                                    // node_after_link is calculated for a [collapsed][] link,
767                                    // which footnotes don't support.
768                                    self.tree[footnote_ix].next = next;
769                                    self.tree[footnote_ix].child = None;
770                                    self.tree[footnote_ix].item.body =
771                                        ItemBody::FootnoteReference(footref);
772                                    self.tree[footnote_ix].item.end = end;
773                                    prev = Some(footnote_ix);
774                                    cur = next;
775                                    self.link_stack.clear();
776                                    continue;
777                                }
778                            } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
779                                if let Some((def_link_type, url, title)) = self
780                                    .fetch_link_type_url_title(
781                                        link_label,
782                                        (self.tree[tos.node].item.start)..end,
783                                        link_type,
784                                    )
785                                {
786                                    let link_ix =
787                                        self.allocs.allocate_link(def_link_type, url, title, id);
788                                    self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
789                                    {
790                                        ItemBody::Image(link_ix)
791                                    } else {
792                                        ItemBody::Link(link_ix)
793                                    };
794                                    let label_node = self.tree[tos.node].next;
795
796                                    // lets do some tree surgery to add the link to the tree
797                                    // 1st: skip the label node and close node
798                                    self.tree[tos.node].next = node_after_link;
799
800                                    // then, if it exists, add the label node as a child to the link node
801                                    if label_node != cur {
802                                        self.tree[tos.node].child = label_node;
803
804                                        // finally: disconnect list of children
805                                        if let Some(prev_ix) = prev {
806                                            self.tree[prev_ix].next = None;
807                                        }
808                                    }
809
810                                    self.tree[tos.node].item.end = end;
811
812                                    // set up cur so next node will be node_after_link
813                                    cur = Some(tos.node);
814                                    cur_ix = tos.node;
815
816                                    if tos.ty == LinkStackTy::Link {
817                                        self.link_stack.disable_all_links();
818                                    }
819                                }
820                            }
821                        }
822                    }
823                }
824                _ => {
825                    // If this is a tight list, blocks and inlines might be
826                    // siblings. Inlines can't cross the boundary like that.
827                    if let Some(cur_ix) = cur {
828                        if self.tree[cur_ix].item.body.is_block() {
829                            self.link_stack.clear();
830                        }
831                    }
832                }
833            }
834            prev = cur;
835            cur = self.tree[cur_ix].next;
836        }
837        self.link_stack.clear();
838        self.code_delims.clear();
839        self.math_delims.clear();
840    }
841
842    fn handle_emphasis_and_hard_break(&mut self) {
843        let mut prev = None;
844        let mut prev_ix: TreeIndex;
845        let mut cur = self.tree.cur();
846
847        let mut single_quote_open: Option<TreeIndex> = None;
848        let mut double_quote_open: bool = false;
849
850        while let Some(mut cur_ix) = cur {
851            match self.tree[cur_ix].item.body {
852                ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
853                    let run_length = count;
854                    let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
855                    let both = can_open && can_close;
856                    if can_close {
857                        while let Some(el) =
858                            self.inline_stack
859                                .find_match(&mut self.tree, c, run_length, both)
860                        {
861                            // have a match!
862                            if let Some(prev_ix) = prev {
863                                self.tree[prev_ix].next = None;
864                            }
865                            let match_count = min(count, el.count);
866                            // start, end are tree node indices
867                            let mut end = cur_ix - 1;
868                            let mut start = el.start + el.count;
869
870                            // work from the inside out
871                            while start > el.start + el.count - match_count {
872                                let inc = if start > el.start + el.count - match_count + 1 {
873                                    2
874                                } else {
875                                    1
876                                };
877                                let ty = if c == b'~' {
878                                    ItemBody::Strikethrough
879                                } else if inc == 2 {
880                                    ItemBody::Strong
881                                } else {
882                                    ItemBody::Emphasis
883                                };
884
885                                let root = start - inc;
886                                end = end + inc;
887                                self.tree[root].item.body = ty;
888                                self.tree[root].item.end = self.tree[end].item.end;
889                                self.tree[root].child = Some(start);
890                                self.tree[root].next = None;
891                                start = root;
892                            }
893
894                            // set next for top most emph level
895                            prev_ix = el.start + el.count - match_count;
896                            prev = Some(prev_ix);
897                            cur = self.tree[cur_ix + match_count - 1].next;
898                            self.tree[prev_ix].next = cur;
899
900                            if el.count > match_count {
901                                self.inline_stack.push(InlineEl {
902                                    start: el.start,
903                                    count: el.count - match_count,
904                                    run_length: el.run_length,
905                                    c: el.c,
906                                    both: el.both,
907                                })
908                            }
909                            count -= match_count;
910                            if count > 0 {
911                                cur_ix = cur.unwrap();
912                            } else {
913                                break;
914                            }
915                        }
916                    }
917                    if count > 0 {
918                        if can_open {
919                            self.inline_stack.push(InlineEl {
920                                start: cur_ix,
921                                run_length,
922                                count,
923                                c,
924                                both,
925                            });
926                        } else {
927                            for i in 0..count {
928                                self.tree[cur_ix + i].item.body = ItemBody::Text {
929                                    backslash_escaped: false,
930                                };
931                            }
932                        }
933                        prev_ix = cur_ix + count - 1;
934                        prev = Some(prev_ix);
935                        cur = self.tree[prev_ix].next;
936                    }
937                }
938                ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
939                    self.tree[cur_ix].item.body = match c {
940                        b'\'' => {
941                            if let (Some(open_ix), true) = (single_quote_open, can_close) {
942                                self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
943                                single_quote_open = None;
944                            } else if can_open {
945                                single_quote_open = Some(cur_ix);
946                            }
947                            ItemBody::SynthesizeChar('’')
948                        }
949                        _ /* double quote */ => {
950                            if can_close && double_quote_open {
951                                double_quote_open = false;
952                                ItemBody::SynthesizeChar('”')
953                            } else {
954                                if can_open && !double_quote_open {
955                                    double_quote_open = true;
956                                }
957                                ItemBody::SynthesizeChar('“')
958                            }
959                        }
960                    };
961                    prev = cur;
962                    cur = self.tree[cur_ix].next;
963                }
964                ItemBody::HardBreak(true) => {
965                    if self.tree[cur_ix].next.is_none() {
966                        self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
967                    }
968                    prev = cur;
969                    cur = self.tree[cur_ix].next;
970                }
971                _ => {
972                    prev = cur;
973                    // If this is a tight list, blocks and inlines might be
974                    // siblings. Inlines can't cross the boundary like that.
975                    if let Some(cur_ix) = cur {
976                        if self.tree[cur_ix].item.body.is_block() {
977                            self.inline_stack.pop_all(&mut self.tree);
978                        }
979                    }
980                    cur = self.tree[cur_ix].next;
981                }
982            }
983        }
984        self.inline_stack.pop_all(&mut self.tree);
985    }
986
987    /// Returns next byte index, url and title.
988    fn scan_inline_link(
989        &self,
990        underlying: &'input str,
991        mut ix: usize,
992        node: Option<TreeIndex>,
993    ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
994        if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
995            return None;
996        }
997        ix += 1;
998
999        let scan_separator = |ix: &mut usize| {
1000            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1001            if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1002                *ix += bl;
1003                let mut line_start = LineStart::new(&underlying.as_bytes()[*ix..]);
1004                let _ = scan_containers(
1005                    &self.tree,
1006                    &mut line_start,
1007                    self.options.has_gfm_footnotes(),
1008                );
1009                *ix += line_start.bytes_scanned();
1010            }
1011            *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1012        };
1013
1014        scan_separator(&mut ix);
1015
1016        let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1017        let dest = unescape(dest, self.tree.is_in_table());
1018        ix += dest_length;
1019
1020        scan_separator(&mut ix);
1021
1022        let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1023            ix += bytes_scanned;
1024            scan_separator(&mut ix);
1025            t
1026        } else {
1027            "".into()
1028        };
1029        if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
1030            return None;
1031        }
1032        ix += 1;
1033
1034        Some((ix, dest, title))
1035    }
1036
1037    // returns (bytes scanned, title cow)
1038    fn scan_link_title(
1039        &self,
1040        text: &'input str,
1041        start_ix: usize,
1042        node: Option<TreeIndex>,
1043    ) -> Option<(usize, CowStr<'input>)> {
1044        let bytes = text.as_bytes();
1045        let open = match bytes.get(start_ix) {
1046            Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1047            _ => return None,
1048        };
1049        let close = if open == b'(' { b')' } else { open };
1050
1051        let mut title = String::new();
1052        let mut mark = start_ix + 1;
1053        let mut i = start_ix + 1;
1054
1055        while i < bytes.len() {
1056            let c = bytes[i];
1057
1058            if c == close {
1059                let cow = if mark == 1 {
1060                    (i - start_ix + 1, text[mark..i].into())
1061                } else {
1062                    title.push_str(&text[mark..i]);
1063                    (i - start_ix + 1, title.into())
1064                };
1065
1066                return Some(cow);
1067            }
1068            if c == open {
1069                return None;
1070            }
1071
1072            if c == b'\n' || c == b'\r' {
1073                if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1074                    if self.tree[node_ix].item.start > i {
1075                        title.push_str(&text[mark..i]);
1076                        title.push('\n');
1077                        i = self.tree[node_ix].item.start;
1078                        mark = i;
1079                        continue;
1080                    }
1081                }
1082            }
1083            if c == b'&' {
1084                if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1085                    title.push_str(&text[mark..i]);
1086                    title.push_str(&value);
1087                    i += n;
1088                    mark = i;
1089                    continue;
1090                }
1091            }
1092            if self.tree.is_in_table()
1093                && c == b'\\'
1094                && i + 2 < bytes.len()
1095                && bytes[i + 1] == b'\\'
1096                && bytes[i + 2] == b'|'
1097            {
1098                // this runs if there are an even number of pipes in a table
1099                // if it's odd, then it gets parsed as normal
1100                title.push_str(&text[mark..i]);
1101                i += 2;
1102                mark = i;
1103            }
1104            if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1105                title.push_str(&text[mark..i]);
1106                i += 1;
1107                mark = i;
1108            }
1109
1110            i += 1;
1111        }
1112
1113        None
1114    }
1115
1116    fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1117        let start_is_display = self.tree[open].next.filter(|&next_ix| {
1118            next_ix != close
1119                && matches!(
1120                    self.tree[next_ix].item.body,
1121                    ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1122                )
1123        });
1124        let end_is_display = self.tree[close].next.filter(|&next_ix| {
1125            matches!(
1126                self.tree[next_ix].item.body,
1127                ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1128            )
1129        });
1130        let is_display = start_is_display.is_some() && end_is_display.is_some();
1131        if is_display {
1132            // This unwrap() can't panic, because if the next variable were None, end_is_display would be None
1133            close = self.tree[close].next.unwrap();
1134            self.tree[open].next = Some(close);
1135            self.tree[open].item.end += 1;
1136            self.tree[close].item.start -= 1;
1137        } else {
1138            if self.tree[open].item.end == self.tree[close].item.start {
1139                // inline math spans cannot be empty
1140                self.tree[open].item.body = ItemBody::Text {
1141                    backslash_escaped: false,
1142                };
1143                return;
1144            }
1145            self.tree[open].next = Some(close);
1146        }
1147        let span_start = self.tree[open].item.end;
1148        let span_end = self.tree[close].item.start;
1149
1150        let bytes = self.text.as_bytes();
1151        let mut buf: Option<String> = None;
1152
1153        let mut start_ix = span_start;
1154        let mut ix = span_start;
1155        while ix < span_end {
1156            let c = bytes[ix];
1157            if c == b'\r' || c == b'\n' {
1158                ix += 1;
1159                let buf = buf.get_or_insert_with(|| String::with_capacity(ix - span_start));
1160                buf.push_str(&self.text[start_ix..ix]);
1161                let mut line_start = LineStart::new(&bytes[ix..]);
1162                let _ = scan_containers(
1163                    &self.tree,
1164                    &mut line_start,
1165                    self.options.has_gfm_footnotes(),
1166                );
1167                ix += line_start.bytes_scanned();
1168                start_ix = ix;
1169            } else if c == b'\\' && bytes.get(ix + 1) == Some(&b'|') && self.tree.is_in_table() {
1170                let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1171                buf.push_str(&self.text[start_ix..ix]);
1172                buf.push('|');
1173                ix += 2;
1174                start_ix = ix;
1175            } else {
1176                ix += 1;
1177            }
1178        }
1179
1180        let cow = if let Some(mut buf) = buf {
1181            buf.push_str(&self.text[start_ix..span_end]);
1182            buf.into()
1183        } else {
1184            self.text[span_start..span_end].into()
1185        };
1186
1187        self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1188        self.tree[open].item.end = self.tree[close].item.end;
1189        self.tree[open].next = self.tree[close].next;
1190    }
1191
1192    /// Make a code span.
1193    ///
1194    /// Both `open` and `close` are matching MaybeCode items.
1195    fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1196        let bytes = self.text.as_bytes();
1197        let span_start = self.tree[open].item.end;
1198        let span_end = self.tree[close].item.start;
1199        let mut buf: Option<String> = None;
1200
1201        let mut start_ix = span_start;
1202        let mut ix = span_start;
1203        while ix < span_end {
1204            let c = bytes[ix];
1205            if c == b'\r' || c == b'\n' {
1206                let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1207                buf.push_str(&self.text[start_ix..ix]);
1208                buf.push(' ');
1209                ix += 1;
1210                let mut line_start = LineStart::new(&bytes[ix..]);
1211                let _ = scan_containers(
1212                    &self.tree,
1213                    &mut line_start,
1214                    self.options.has_gfm_footnotes(),
1215                );
1216                ix += line_start.bytes_scanned();
1217                start_ix = ix;
1218            } else if c == b'\\' && bytes.get(ix + 1) == Some(&b'|') && self.tree.is_in_table() {
1219                let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1220                buf.push_str(&self.text[start_ix..ix]);
1221                buf.push('|');
1222                ix += 2;
1223                start_ix = ix;
1224            } else {
1225                ix += 1;
1226            }
1227        }
1228
1229        let (opening, closing, all_spaces) = {
1230            let s = if let Some(buf) = &mut buf {
1231                buf.push_str(&self.text[start_ix..span_end]);
1232                &buf[..]
1233            } else {
1234                &self.text[span_start..span_end]
1235            };
1236            (
1237                s.as_bytes().first() == Some(&b' '),
1238                s.as_bytes().last() == Some(&b' '),
1239                s.bytes().all(|b| b == b' '),
1240            )
1241        };
1242
1243        let cow: CowStr<'input> = if !all_spaces && opening && closing {
1244            if let Some(mut buf) = buf {
1245                buf.remove(0);
1246                buf.pop();
1247                buf.into()
1248            } else {
1249                let lo = span_start + 1;
1250                let hi = (span_end - 1).max(lo);
1251                self.text[lo..hi].into()
1252            }
1253        } else if let Some(buf) = buf {
1254            buf.into()
1255        } else {
1256            self.text[span_start..span_end].into()
1257        };
1258
1259        if preceding_backslash {
1260            self.tree[open].item.body = ItemBody::Text {
1261                backslash_escaped: true,
1262            };
1263            self.tree[open].item.end = self.tree[open].item.start + 1;
1264            self.tree[open].next = Some(close);
1265            self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1266            self.tree[close].item.start = self.tree[open].item.start + 1;
1267        } else {
1268            self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1269            self.tree[open].item.end = self.tree[close].item.end;
1270            self.tree[open].next = self.tree[close].next;
1271        }
1272    }
1273
1274    /// On success, returns a buffer containing the inline html and byte offset.
1275    /// When no bytes were skipped, the buffer will be empty and the html can be
1276    /// represented as a subslice of the input string.
1277    fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1278        let c = *bytes.get(ix)?;
1279        if c == b'!' {
1280            Some((
1281                vec![],
1282                scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1283            ))
1284        } else if c == b'?' {
1285            Some((
1286                vec![],
1287                scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1288            ))
1289        } else {
1290            let (span, i) = scan_html_block_inner(
1291                // Subtract 1 to include the < character
1292                &bytes[(ix - 1)..],
1293                Some(&|bytes| {
1294                    let mut line_start = LineStart::new(bytes);
1295                    let _ = scan_containers(
1296                        &self.tree,
1297                        &mut line_start,
1298                        self.options.has_gfm_footnotes(),
1299                    );
1300                    line_start.bytes_scanned()
1301                }),
1302            )?;
1303            Some((span, i + ix - 1))
1304        }
1305    }
1306
1307    /// Consumes the event iterator and produces an iterator that produces
1308    /// `(Event, Range)` pairs, where the `Range` value maps to the corresponding
1309    /// range in the markdown source.
1310    pub fn into_offset_iter(self) -> OffsetIter<'input, F> {
1311        OffsetIter { inner: self }
1312    }
1313}
1314
1315/// Returns number of containers scanned.
1316pub(crate) fn scan_containers(
1317    tree: &Tree<Item>,
1318    line_start: &mut LineStart<'_>,
1319    gfm_footnotes: bool,
1320) -> usize {
1321    let mut i = 0;
1322    for &node_ix in tree.walk_spine() {
1323        match tree[node_ix].item.body {
1324            ItemBody::BlockQuote(..) => {
1325                let save = line_start.clone();
1326                let _ = line_start.scan_space(3);
1327                if !line_start.scan_blockquote_marker() {
1328                    *line_start = save;
1329                    break;
1330                }
1331            }
1332            ItemBody::ListItem(indent) => {
1333                let save = line_start.clone();
1334                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1335                    *line_start = save;
1336                    break;
1337                }
1338            }
1339            ItemBody::DefinitionListDefinition(indent) => {
1340                let save = line_start.clone();
1341                if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1342                    *line_start = save;
1343                    break;
1344                }
1345            }
1346            ItemBody::FootnoteDefinition(..) if gfm_footnotes => {
1347                let save = line_start.clone();
1348                if !line_start.scan_space(4) && !line_start.is_at_eol() {
1349                    *line_start = save;
1350                    break;
1351                }
1352            }
1353            _ => (),
1354        }
1355        i += 1;
1356    }
1357    i
1358}
1359
1360impl Tree<Item> {
1361    pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1362        if end > start {
1363            if let Some(ix) = self.cur() {
1364                if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1365                {
1366                    self[ix].item.end = end;
1367                    return;
1368                }
1369            }
1370            self.append(Item {
1371                start,
1372                end,
1373                body: ItemBody::Text { backslash_escaped },
1374            });
1375        }
1376    }
1377    /// Returns true if the current node is inside a table.
1378    ///
1379    /// If `cur` is an ItemBody::Table, it would return false,
1380    /// but since the `TableRow` and `TableHead` and `TableCell`
1381    /// are children of the table, anything doing inline parsing
1382    /// doesn't need to care about that.
1383    pub(crate) fn is_in_table(&self) -> bool {
1384        fn might_be_in_table(item: &Item) -> bool {
1385            item.body.is_inline()
1386                || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1387                    | ItemBody::TableCell)
1388        }
1389        for &ix in self.walk_spine().rev() {
1390            if matches!(self[ix].item.body, ItemBody::Table(_)) {
1391                return true;
1392            }
1393            if !might_be_in_table(&self[ix].item) {
1394                return false;
1395            }
1396        }
1397        false
1398    }
1399}
1400
1401#[derive(Copy, Clone, Debug)]
1402struct InlineEl {
1403    /// offset of tree node
1404    start: TreeIndex,
1405    /// number of delimiters available for matching
1406    count: usize,
1407    /// length of the run that these delimiters came from
1408    run_length: usize,
1409    /// b'*', b'_', or b'~'
1410    c: u8,
1411    /// can both open and close
1412    both: bool,
1413}
1414
1415#[derive(Debug, Clone, Default)]
1416struct InlineStack {
1417    stack: Vec<InlineEl>,
1418    // Lower bounds for matching indices in the stack. For example
1419    // a strikethrough delimiter will never match with any element
1420    // in the stack with index smaller than
1421    // `lower_bounds[InlineStack::TILDES]`.
1422    lower_bounds: [usize; 9],
1423}
1424
1425impl InlineStack {
1426    /// These are indices into the lower bounds array.
1427    /// Not both refers to the property that the delimiter can not both
1428    /// be opener as a closer.
1429    const UNDERSCORE_NOT_BOTH: usize = 0;
1430    const ASTERISK_NOT_BOTH: usize = 1;
1431    const ASTERISK_BASE: usize = 2;
1432    const TILDES: usize = 5;
1433    const UNDERSCORE_BASE: usize = 6;
1434
1435    fn pop_all(&mut self, tree: &mut Tree<Item>) {
1436        for el in self.stack.drain(..) {
1437            for i in 0..el.count {
1438                tree[el.start + i].item.body = ItemBody::Text {
1439                    backslash_escaped: false,
1440                };
1441            }
1442        }
1443        self.lower_bounds = [0; 9];
1444    }
1445
1446    fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1447        if c == b'_' {
1448            let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1449            if both {
1450                mod3_lower
1451            } else {
1452                min(
1453                    mod3_lower,
1454                    self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1455                )
1456            }
1457        } else if c == b'*' {
1458            let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1459            if both {
1460                mod3_lower
1461            } else {
1462                min(
1463                    mod3_lower,
1464                    self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1465                )
1466            }
1467        } else {
1468            self.lower_bounds[InlineStack::TILDES]
1469        }
1470    }
1471
1472    fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1473        if c == b'_' {
1474            if both {
1475                self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1476            } else {
1477                self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1478            }
1479        } else if c == b'*' {
1480            self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1481            if !both {
1482                self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1483            }
1484        } else {
1485            self.lower_bounds[InlineStack::TILDES] = new_bound;
1486        }
1487    }
1488
1489    fn truncate(&mut self, new_bound: usize) {
1490        self.stack.truncate(new_bound);
1491        for lower_bound in &mut self.lower_bounds {
1492            if *lower_bound > new_bound {
1493                *lower_bound = new_bound;
1494            }
1495        }
1496    }
1497
1498    fn find_match(
1499        &mut self,
1500        tree: &mut Tree<Item>,
1501        c: u8,
1502        run_length: usize,
1503        both: bool,
1504    ) -> Option<InlineEl> {
1505        let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1506        let res = self.stack[lowerbound..]
1507            .iter()
1508            .cloned()
1509            .enumerate()
1510            .rfind(|(_, el)| {
1511                if c == b'~' && run_length != el.run_length {
1512                    return false;
1513                }
1514                el.c == c
1515                    && (!both && !el.both
1516                        || (run_length + el.run_length) % 3 != 0
1517                        || run_length % 3 == 0)
1518            });
1519
1520        if let Some((matching_ix, matching_el)) = res {
1521            let matching_ix = matching_ix + lowerbound;
1522            for el in &self.stack[(matching_ix + 1)..] {
1523                for i in 0..el.count {
1524                    tree[el.start + i].item.body = ItemBody::Text {
1525                        backslash_escaped: false,
1526                    };
1527                }
1528            }
1529            self.truncate(matching_ix);
1530            Some(matching_el)
1531        } else {
1532            self.set_lowerbound(c, run_length, both, self.stack.len());
1533            None
1534        }
1535    }
1536
1537    fn trim_lower_bound(&mut self, ix: usize) {
1538        self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1539    }
1540
1541    fn push(&mut self, el: InlineEl) {
1542        if el.c == b'~' {
1543            self.trim_lower_bound(InlineStack::TILDES);
1544        }
1545        self.stack.push(el)
1546    }
1547}
1548
1549#[derive(Debug, Clone)]
1550enum RefScan<'a> {
1551    // label, source ix of label end
1552    LinkLabel(CowStr<'a>, usize),
1553    // contains next node index
1554    Collapsed(Option<TreeIndex>),
1555    UnexpectedFootnote,
1556    Failed,
1557}
1558
1559/// Skips forward within a block to a node which spans (ends inclusive) the given
1560/// index into the source.
1561fn scan_nodes_to_ix(
1562    tree: &Tree<Item>,
1563    mut node: Option<TreeIndex>,
1564    ix: usize,
1565) -> Option<TreeIndex> {
1566    while let Some(node_ix) = node {
1567        if tree[node_ix].item.end <= ix {
1568            node = tree[node_ix].next;
1569        } else {
1570            break;
1571        }
1572    }
1573    node
1574}
1575
1576/// Scans an inline link label, which cannot be interrupted.
1577/// Returns number of bytes (including brackets) and label on success.
1578fn scan_link_label<'text>(
1579    tree: &Tree<Item>,
1580    text: &'text str,
1581    allow_footnote_refs: bool,
1582    gfm_footnotes: bool,
1583) -> Option<(usize, ReferenceLabel<'text>)> {
1584    let bytes = text.as_bytes();
1585    if bytes.len() < 2 || bytes[0] != b'[' {
1586        return None;
1587    }
1588    let linebreak_handler = |bytes: &[u8]| {
1589        let mut line_start = LineStart::new(bytes);
1590        let _ = scan_containers(tree, &mut line_start, gfm_footnotes);
1591        Some(line_start.bytes_scanned())
1592    };
1593    if allow_footnote_refs && b'^' == bytes[1] && bytes.get(2) != Some(&b']') {
1594        let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if gfm_footnotes {
1595            &|_| None
1596        } else {
1597            &linebreak_handler
1598        };
1599        if let Some((byte_index, cow)) =
1600            scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1601        {
1602            return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1603        }
1604    }
1605    let (byte_index, cow) =
1606        scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1607    Some((byte_index + 1, ReferenceLabel::Link(cow)))
1608}
1609
1610fn scan_reference<'b>(
1611    tree: &Tree<Item>,
1612    text: &'b str,
1613    cur: Option<TreeIndex>,
1614    allow_footnote_refs: bool,
1615    gfm_footnotes: bool,
1616) -> RefScan<'b> {
1617    let cur_ix = match cur {
1618        None => return RefScan::Failed,
1619        Some(cur_ix) => cur_ix,
1620    };
1621    let start = tree[cur_ix].item.start;
1622    let tail = &text.as_bytes()[start..];
1623
1624    if tail.starts_with(b"[]") {
1625        // TODO: this unwrap is sus and should be looked at closer
1626        let closing_node = tree[cur_ix].next.unwrap();
1627        RefScan::Collapsed(tree[closing_node].next)
1628    } else {
1629        let label = scan_link_label(tree, &text[start..], allow_footnote_refs, gfm_footnotes);
1630        match label {
1631            Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
1632            Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
1633            None => RefScan::Failed,
1634        }
1635    }
1636}
1637
1638#[derive(Clone, Default)]
1639struct LinkStack {
1640    inner: Vec<LinkStackEl>,
1641    disabled_ix: usize,
1642}
1643
1644impl LinkStack {
1645    fn push(&mut self, el: LinkStackEl) {
1646        self.inner.push(el);
1647    }
1648
1649    fn pop(&mut self) -> Option<LinkStackEl> {
1650        let el = self.inner.pop();
1651        self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1652        el
1653    }
1654
1655    fn clear(&mut self) {
1656        self.inner.clear();
1657        self.disabled_ix = 0;
1658    }
1659
1660    fn disable_all_links(&mut self) {
1661        for el in &mut self.inner[self.disabled_ix..] {
1662            if el.ty == LinkStackTy::Link {
1663                el.ty = LinkStackTy::Disabled;
1664            }
1665        }
1666        self.disabled_ix = self.inner.len();
1667    }
1668}
1669
1670#[derive(Clone, Debug)]
1671struct LinkStackEl {
1672    node: TreeIndex,
1673    ty: LinkStackTy,
1674}
1675
1676#[derive(PartialEq, Clone, Debug)]
1677enum LinkStackTy {
1678    Link,
1679    Image,
1680    Disabled,
1681}
1682
1683/// Contains the destination URL, title and source span of a reference definition.
1684#[derive(Clone, Debug)]
1685pub struct LinkDef<'a> {
1686    pub dest: CowStr<'a>,
1687    pub title: Option<CowStr<'a>>,
1688    pub span: Range<usize>,
1689}
1690
1691impl<'a> LinkDef<'a> {
1692    pub fn into_static(self) -> LinkDef<'static> {
1693        LinkDef {
1694            dest: self.dest.into_static(),
1695            title: self.title.map(|s| s.into_static()),
1696            span: self.span,
1697        }
1698    }
1699}
1700
1701/// Contains the destination URL, title and source span of a reference definition.
1702#[derive(Clone, Debug)]
1703pub struct FootnoteDef {
1704    pub use_count: usize,
1705}
1706
1707/// Tracks tree indices of code span delimiters of each length. It should prevent
1708/// quadratic scanning behaviours by providing (amortized) constant time lookups.
1709struct CodeDelims {
1710    inner: HashMap<usize, VecDeque<TreeIndex>>,
1711    seen_first: bool,
1712}
1713
1714impl CodeDelims {
1715    fn new() -> Self {
1716        Self {
1717            inner: Default::default(),
1718            seen_first: false,
1719        }
1720    }
1721
1722    fn insert(&mut self, count: usize, ix: TreeIndex) {
1723        if self.seen_first {
1724            self.inner.entry(count).or_default().push_back(ix);
1725        } else {
1726            // Skip the first insert, since that delimiter will always
1727            // be an opener and not a closer.
1728            self.seen_first = true;
1729        }
1730    }
1731
1732    fn is_populated(&self) -> bool {
1733        !self.inner.is_empty()
1734    }
1735
1736    fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1737        while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1738            if ix > open_ix {
1739                return Some(ix);
1740            }
1741        }
1742        None
1743    }
1744
1745    fn clear(&mut self) {
1746        self.inner.clear();
1747        self.seen_first = false;
1748    }
1749}
1750
1751/// Tracks brace contexts and delimiter length for math delimiters.
1752/// Provides amortized constant-time lookups.
1753struct MathDelims {
1754    inner: HashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
1755}
1756
1757impl MathDelims {
1758    fn new() -> Self {
1759        Self {
1760            inner: Default::default(),
1761        }
1762    }
1763
1764    fn insert(
1765        &mut self,
1766        delim_is_display: bool,
1767        brace_context: u8,
1768        ix: TreeIndex,
1769        can_close: bool,
1770    ) {
1771        self.inner
1772            .entry(brace_context)
1773            .or_default()
1774            .push_back((ix, can_close, delim_is_display));
1775    }
1776
1777    fn is_populated(&self) -> bool {
1778        !self.inner.is_empty()
1779    }
1780
1781    fn find(
1782        &mut self,
1783        tree: &Tree<Item>,
1784        open_ix: TreeIndex,
1785        is_display: bool,
1786        brace_context: u8,
1787    ) -> Option<TreeIndex> {
1788        while let Some((ix, can_close, delim_is_display)) =
1789            self.inner.get_mut(&brace_context)?.pop_front()
1790        {
1791            if ix <= open_ix || (is_display && tree[open_ix].next == Some(ix)) {
1792                continue;
1793            }
1794            let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
1795            if (!is_display && can_close) || (is_display && delim_is_display) {
1796                return Some(ix);
1797            }
1798            // if we can't use it, leave it in the queue as a tombstone for the next
1799            // thing that tries to match it
1800            self.inner
1801                .get_mut(&brace_context)?
1802                .push_front((ix, can_close, delim_is_display));
1803            break;
1804        }
1805        None
1806    }
1807
1808    fn clear(&mut self) {
1809        self.inner.clear();
1810    }
1811}
1812
1813#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1814pub(crate) struct LinkIndex(usize);
1815
1816#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1817pub(crate) struct CowIndex(usize);
1818
1819#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1820pub(crate) struct AlignmentIndex(usize);
1821
1822#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1823pub(crate) struct HeadingIndex(NonZeroUsize);
1824
1825#[derive(Clone)]
1826pub(crate) struct Allocations<'a> {
1827    pub refdefs: RefDefs<'a>,
1828    pub footdefs: FootnoteDefs<'a>,
1829    links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
1830    cows: Vec<CowStr<'a>>,
1831    alignments: Vec<Vec<Alignment>>,
1832    headings: Vec<HeadingAttributes<'a>>,
1833}
1834
1835/// Used by the heading attributes extension.
1836#[derive(Clone)]
1837pub(crate) struct HeadingAttributes<'a> {
1838    pub id: Option<CowStr<'a>>,
1839    pub classes: Vec<CowStr<'a>>,
1840    pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
1841}
1842
1843/// Keeps track of the reference definitions defined in the document.
1844#[derive(Clone, Default, Debug)]
1845pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1846
1847/// Keeps track of the footnote definitions defined in the document.
1848#[derive(Clone, Default, Debug)]
1849pub struct FootnoteDefs<'input>(pub(crate) HashMap<FootnoteLabel<'input>, FootnoteDef>);
1850
1851impl<'input, 'b, 's> RefDefs<'input>
1852where
1853    's: 'b,
1854{
1855    /// Performs a lookup on reference label using unicode case folding.
1856    pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1857        self.0.get(&UniCase::new(key.into()))
1858    }
1859
1860    /// Provides an iterator over all the document's reference definitions.
1861    pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1862        self.0.iter().map(|(k, v)| (k.as_ref(), v))
1863    }
1864}
1865
1866impl<'input, 'b, 's> FootnoteDefs<'input>
1867where
1868    's: 'b,
1869{
1870    /// Performs a lookup on reference label using unicode case folding.
1871    pub fn contains(&'s self, key: &'b str) -> bool {
1872        self.0.contains_key(&UniCase::new(key.into()))
1873    }
1874    /// Performs a lookup on reference label using unicode case folding.
1875    pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
1876        self.0.get_mut(&UniCase::new(key))
1877    }
1878}
1879
1880impl<'a> Allocations<'a> {
1881    pub fn new() -> Self {
1882        Self {
1883            refdefs: RefDefs::default(),
1884            footdefs: FootnoteDefs::default(),
1885            links: Vec::with_capacity(128),
1886            cows: Vec::new(),
1887            alignments: Vec::new(),
1888            headings: Vec::new(),
1889        }
1890    }
1891
1892    pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1893        let ix = self.cows.len();
1894        self.cows.push(cow);
1895        CowIndex(ix)
1896    }
1897
1898    pub fn allocate_link(
1899        &mut self,
1900        ty: LinkType,
1901        url: CowStr<'a>,
1902        title: CowStr<'a>,
1903        id: CowStr<'a>,
1904    ) -> LinkIndex {
1905        let ix = self.links.len();
1906        self.links.push((ty, url, title, id));
1907        LinkIndex(ix)
1908    }
1909
1910    pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1911        let ix = self.alignments.len();
1912        self.alignments.push(alignment);
1913        AlignmentIndex(ix)
1914    }
1915
1916    pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
1917        let ix = self.headings.len();
1918        self.headings.push(attrs);
1919        // This won't panic. `self.headings.len()` can't be `usize::MAX` since
1920        // such a long Vec cannot fit in memory.
1921        let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
1922        HeadingIndex(ix_nonzero)
1923    }
1924
1925    pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
1926        std::mem::replace(&mut self.cows[ix.0], "".into())
1927    }
1928
1929    pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
1930        let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
1931        std::mem::replace(&mut self.links[ix.0], default_link)
1932    }
1933
1934    pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
1935        std::mem::take(&mut self.alignments[ix.0])
1936    }
1937}
1938
1939impl<'a> Index<CowIndex> for Allocations<'a> {
1940    type Output = CowStr<'a>;
1941
1942    fn index(&self, ix: CowIndex) -> &Self::Output {
1943        self.cows.index(ix.0)
1944    }
1945}
1946
1947impl<'a> Index<LinkIndex> for Allocations<'a> {
1948    type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
1949
1950    fn index(&self, ix: LinkIndex) -> &Self::Output {
1951        self.links.index(ix.0)
1952    }
1953}
1954
1955impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1956    type Output = Vec<Alignment>;
1957
1958    fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1959        self.alignments.index(ix.0)
1960    }
1961}
1962
1963impl<'a> Index<HeadingIndex> for Allocations<'a> {
1964    type Output = HeadingAttributes<'a>;
1965
1966    fn index(&self, ix: HeadingIndex) -> &Self::Output {
1967        self.headings.index(ix.0.get() - 1)
1968    }
1969}
1970
1971/// A struct containing information on the reachability of certain inline HTML
1972/// elements. In particular, for cdata elements (`<![CDATA[`), processing
1973/// elements (`<?`) and declarations (`<!DECLARATION`). The respectives usizes
1974/// represent the indices before which a scan will always fail and can hence
1975/// be skipped.
1976#[derive(Clone, Default)]
1977pub(crate) struct HtmlScanGuard {
1978    pub cdata: usize,
1979    pub processing: usize,
1980    pub declaration: usize,
1981    pub comment: usize,
1982}
1983
1984/// Trait for broken link callbacks.
1985///
1986/// See [Parser::new_with_broken_link_callback].
1987/// Automatically implemented for closures with the appropriate signature.
1988pub trait BrokenLinkCallback<'input> {
1989    fn handle_broken_link(
1990        &mut self,
1991        link: BrokenLink<'input>,
1992    ) -> Option<(CowStr<'input>, CowStr<'input>)>;
1993}
1994
1995impl<'input, T> BrokenLinkCallback<'input> for T
1996where
1997    T: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
1998{
1999    fn handle_broken_link(
2000        &mut self,
2001        link: BrokenLink<'input>,
2002    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2003        self(link)
2004    }
2005}
2006
2007impl<'input> BrokenLinkCallback<'input> for Box<dyn BrokenLinkCallback<'input>> {
2008    fn handle_broken_link(
2009        &mut self,
2010        link: BrokenLink<'input>,
2011    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2012        (**self).handle_broken_link(link)
2013    }
2014}
2015
2016/// Broken link callback that does nothing.
2017#[derive(Debug)]
2018pub struct DefaultBrokenLinkCallback;
2019
2020impl<'input> BrokenLinkCallback<'input> for DefaultBrokenLinkCallback {
2021    fn handle_broken_link(
2022        &mut self,
2023        _link: BrokenLink<'input>,
2024    ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2025        None
2026    }
2027}
2028
2029/// Markdown event and source range iterator.
2030///
2031/// Generates tuples where the first element is the markdown event and the second
2032/// is a the corresponding range in the source string.
2033///
2034/// Constructed from a `Parser` using its
2035/// [`into_offset_iter`](struct.Parser.html#method.into_offset_iter) method.
2036#[derive(Debug)]
2037pub struct OffsetIter<'a, F = DefaultBrokenLinkCallback> {
2038    inner: Parser<'a, F>,
2039}
2040
2041impl<'a, F: BrokenLinkCallback<'a>> OffsetIter<'a, F> {
2042    /// Returns a reference to the internal reference definition tracker.
2043    pub fn reference_definitions(&self) -> &RefDefs<'_> {
2044        self.inner.reference_definitions()
2045    }
2046}
2047
2048impl<'a, F: BrokenLinkCallback<'a>> Iterator for OffsetIter<'a, F> {
2049    type Item = (Event<'a>, Range<usize>);
2050
2051    fn next(&mut self) -> Option<Self::Item> {
2052        match self.inner.tree.cur() {
2053            None => {
2054                let ix = self.inner.tree.pop()?;
2055                let tag_end = body_to_tag_end(&self.inner.tree[ix].item.body);
2056                self.inner.tree.next_sibling(ix);
2057                let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
2058                debug_assert!(span.start <= span.end);
2059                Some((Event::End(tag_end), span))
2060            }
2061            Some(cur_ix) => {
2062                if self.inner.tree[cur_ix].item.body.is_maybe_inline() {
2063                    self.inner.handle_inline();
2064                }
2065
2066                let node = self.inner.tree[cur_ix];
2067                let item = node.item;
2068                let event = item_to_event(item, self.inner.text, &mut self.inner.allocs);
2069                if let Event::Start(..) = event {
2070                    self.inner.tree.push();
2071                } else {
2072                    self.inner.tree.next_sibling(cur_ix);
2073                }
2074                debug_assert!(item.start <= item.end);
2075                Some((event, item.start..item.end))
2076            }
2077        }
2078    }
2079}
2080
2081fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2082    match *body {
2083        ItemBody::Paragraph => TagEnd::Paragraph,
2084        ItemBody::Emphasis => TagEnd::Emphasis,
2085        ItemBody::Strong => TagEnd::Strong,
2086        ItemBody::Strikethrough => TagEnd::Strikethrough,
2087        ItemBody::Link(..) => TagEnd::Link,
2088        ItemBody::Image(..) => TagEnd::Image,
2089        ItemBody::Heading(level, _) => TagEnd::Heading(level),
2090        ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2091        ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2092        ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2093        ItemBody::List(_, c, _) => {
2094            let is_ordered = c == b'.' || c == b')';
2095            TagEnd::List(is_ordered)
2096        }
2097        ItemBody::ListItem(_) => TagEnd::Item,
2098        ItemBody::TableHead => TagEnd::TableHead,
2099        ItemBody::TableCell => TagEnd::TableCell,
2100        ItemBody::TableRow => TagEnd::TableRow,
2101        ItemBody::Table(..) => TagEnd::Table,
2102        ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2103        ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2104        ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2105        ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2106        ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2107        _ => panic!("unexpected item body {:?}", body),
2108    }
2109}
2110
2111fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2112    let tag = match item.body {
2113        ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2114        ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2115        ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2116        ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2117        ItemBody::HtmlBlock => Tag::HtmlBlock,
2118        ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2119        ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2120        ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2121        ItemBody::SoftBreak => return Event::SoftBreak,
2122        ItemBody::HardBreak(_) => return Event::HardBreak,
2123        ItemBody::FootnoteReference(cow_ix) => {
2124            return Event::FootnoteReference(allocs.take_cow(cow_ix))
2125        }
2126        ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2127        ItemBody::Rule => return Event::Rule,
2128        ItemBody::Paragraph => Tag::Paragraph,
2129        ItemBody::Emphasis => Tag::Emphasis,
2130        ItemBody::Strong => Tag::Strong,
2131        ItemBody::Strikethrough => Tag::Strikethrough,
2132        ItemBody::Link(link_ix) => {
2133            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2134            Tag::Link {
2135                link_type,
2136                dest_url,
2137                title,
2138                id,
2139            }
2140        }
2141        ItemBody::Image(link_ix) => {
2142            let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2143            Tag::Image {
2144                link_type,
2145                dest_url,
2146                title,
2147                id,
2148            }
2149        }
2150        ItemBody::Heading(level, Some(heading_ix)) => {
2151            let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2152            Tag::Heading {
2153                level,
2154                id: id.clone(),
2155                classes: classes.clone(),
2156                attrs: attrs.clone(),
2157            }
2158        }
2159        ItemBody::Heading(level, None) => Tag::Heading {
2160            level,
2161            id: None,
2162            classes: Vec::new(),
2163            attrs: Vec::new(),
2164        },
2165        ItemBody::FencedCodeBlock(cow_ix) => {
2166            Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2167        }
2168        ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2169        ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2170        ItemBody::List(_, c, listitem_start) => {
2171            if c == b'.' || c == b')' {
2172                Tag::List(Some(listitem_start))
2173            } else {
2174                Tag::List(None)
2175            }
2176        }
2177        ItemBody::ListItem(_) => Tag::Item,
2178        ItemBody::TableHead => Tag::TableHead,
2179        ItemBody::TableCell => Tag::TableCell,
2180        ItemBody::TableRow => Tag::TableRow,
2181        ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2182        ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2183        ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2184        ItemBody::Math(cow_ix, is_display) => {
2185            return if is_display {
2186                Event::DisplayMath(allocs.take_cow(cow_ix))
2187            } else {
2188                Event::InlineMath(allocs.take_cow(cow_ix))
2189            }
2190        }
2191        ItemBody::DefinitionList(_) => Tag::DefinitionList,
2192        ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2193        ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2194        _ => panic!("unexpected item body {:?}", item.body),
2195    };
2196
2197    Event::Start(tag)
2198}
2199
2200impl<'a, F: BrokenLinkCallback<'a>> Iterator for Parser<'a, F> {
2201    type Item = Event<'a>;
2202
2203    fn next(&mut self) -> Option<Event<'a>> {
2204        match self.tree.cur() {
2205            None => {
2206                let ix = self.tree.pop()?;
2207                let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2208                self.tree.next_sibling(ix);
2209                Some(Event::End(tag_end))
2210            }
2211            Some(cur_ix) => {
2212                if self.tree[cur_ix].item.body.is_maybe_inline() {
2213                    self.handle_inline();
2214                }
2215
2216                let node = self.tree[cur_ix];
2217                let item = node.item;
2218                let event = item_to_event(item, self.text, &mut self.allocs);
2219                if let Event::Start(ref _tag) = event {
2220                    self.tree.push();
2221                } else {
2222                    self.tree.next_sibling(cur_ix);
2223                }
2224                Some(event)
2225            }
2226        }
2227    }
2228}
2229
2230impl<'a, F: BrokenLinkCallback<'a>> FusedIterator for Parser<'a, F> {}
2231
2232#[cfg(test)]
2233mod test {
2234    use super::*;
2235    use crate::tree::Node;
2236
2237    // TODO: move these tests to tests/html.rs?
2238
2239    fn parser_with_extensions(text: &str) -> Parser<'_> {
2240        let mut opts = Options::empty();
2241        opts.insert(Options::ENABLE_TABLES);
2242        opts.insert(Options::ENABLE_FOOTNOTES);
2243        opts.insert(Options::ENABLE_STRIKETHROUGH);
2244        opts.insert(Options::ENABLE_TASKLISTS);
2245
2246        Parser::new_ext(text, opts)
2247    }
2248
2249    #[test]
2250    #[cfg(target_pointer_width = "64")]
2251    fn node_size() {
2252        let node_size = std::mem::size_of::<Node<Item>>();
2253        assert_eq!(48, node_size);
2254    }
2255
2256    #[test]
2257    #[cfg(target_pointer_width = "64")]
2258    fn body_size() {
2259        let body_size = std::mem::size_of::<ItemBody>();
2260        assert_eq!(16, body_size);
2261    }
2262
2263    #[test]
2264    fn single_open_fish_bracket() {
2265        // dont crash
2266        assert_eq!(3, Parser::new("<").count());
2267    }
2268
2269    #[test]
2270    fn lone_hashtag() {
2271        // dont crash
2272        assert_eq!(2, Parser::new("#").count());
2273    }
2274
2275    #[test]
2276    fn lots_of_backslashes() {
2277        // dont crash
2278        Parser::new("\\\\\r\r").count();
2279        Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2280    }
2281
2282    #[test]
2283    fn issue_320() {
2284        // dont crash
2285        parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2286    }
2287
2288    #[test]
2289    fn issue_319() {
2290        // dont crash
2291        parser_with_extensions("|\r-]([^|\r-]([^").count();
2292        parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2293    }
2294
2295    #[test]
2296    fn issue_303() {
2297        // dont crash
2298        parser_with_extensions("[^\r\ra]").count();
2299        parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2300    }
2301
2302    #[test]
2303    fn issue_313() {
2304        // dont crash
2305        parser_with_extensions("*]0[^\r\r*]0[^").count();
2306        parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2307    }
2308
2309    #[test]
2310    fn issue_311() {
2311        // dont crash
2312        parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2313    }
2314
2315    #[test]
2316    fn issue_283() {
2317        let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2318        // dont crash
2319        parser_with_extensions(input).count();
2320    }
2321
2322    #[test]
2323    fn issue_289() {
2324        // dont crash
2325        parser_with_extensions("> - \\\n> - ").count();
2326        parser_with_extensions("- \n\n").count();
2327    }
2328
2329    #[test]
2330    fn issue_306() {
2331        // dont crash
2332        parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2333    }
2334
2335    #[test]
2336    fn issue_305() {
2337        // dont crash
2338        parser_with_extensions("_6**6*_*").count();
2339    }
2340
2341    #[test]
2342    fn another_emphasis_panic() {
2343        parser_with_extensions("*__#_#__*").count();
2344    }
2345
2346    #[test]
2347    fn offset_iter() {
2348        let event_offsets: Vec<_> = Parser::new("*hello* world")
2349            .into_offset_iter()
2350            .map(|(_ev, range)| range)
2351            .collect();
2352        let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2353        assert_eq!(expected_offsets, event_offsets);
2354    }
2355
2356    #[test]
2357    fn reference_link_offsets() {
2358        let range =
2359            Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2360                .into_offset_iter()
2361                .filter_map(|(ev, range)| match ev {
2362                    Event::Start(
2363                        Tag::Link {
2364                            link_type: LinkType::Reference,
2365                            ..
2366                        },
2367                        ..,
2368                    ) => Some(range),
2369                    _ => None,
2370                })
2371                .next()
2372                .unwrap();
2373        assert_eq!(5..30, range);
2374    }
2375
2376    #[test]
2377    fn footnote_offsets() {
2378        let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2379            .into_offset_iter()
2380            .filter_map(|(ev, range)| match ev {
2381                Event::FootnoteReference(..) => Some(range),
2382                _ => None,
2383            })
2384            .next()
2385            .unwrap();
2386        assert_eq!(12..16, range);
2387    }
2388
2389    #[test]
2390    fn footnote_offsets_exclamation() {
2391        let mut immediately_before_footnote = None;
2392        let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2393            .into_offset_iter()
2394            .filter_map(|(ev, range)| match ev {
2395                Event::FootnoteReference(..) => Some(range),
2396                _ => {
2397                    immediately_before_footnote = Some((ev, range));
2398                    None
2399                }
2400            })
2401            .next()
2402            .unwrap();
2403        assert_eq!(13..17, range);
2404        if let (Event::Text(exclamation), range_exclamation) =
2405            immediately_before_footnote.as_ref().unwrap()
2406        {
2407            assert_eq!("!", &exclamation[..]);
2408            assert_eq!(&(12..13), range_exclamation);
2409        } else {
2410            panic!("what came first, then? {immediately_before_footnote:?}");
2411        }
2412    }
2413
2414    #[test]
2415    fn table_offset() {
2416        let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2417        let event_offset = parser_with_extensions(markdown)
2418            .into_offset_iter()
2419            .map(|(_ev, range)| range)
2420            .nth(3)
2421            .unwrap();
2422        let expected_offset = 3..59;
2423        assert_eq!(expected_offset, event_offset);
2424    }
2425
2426    #[test]
2427    fn table_cell_span() {
2428        let markdown = "a|b|c\n--|--|--\na|  |c";
2429        let event_offset = parser_with_extensions(markdown)
2430            .into_offset_iter()
2431            .filter_map(|(ev, span)| match ev {
2432                Event::Start(Tag::TableCell) => Some(span),
2433                _ => None,
2434            })
2435            .nth(4)
2436            .unwrap();
2437        let expected_offset_start = "a|b|c\n--|--|--\na|".len();
2438        assert_eq!(
2439            expected_offset_start..(expected_offset_start + 2),
2440            event_offset
2441        );
2442    }
2443
2444    #[test]
2445    fn offset_iter_issue_378() {
2446        let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2447            .into_offset_iter()
2448            .map(|(_ev, range)| range)
2449            .collect();
2450        let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2451        assert_eq!(expected_offsets, event_offsets);
2452    }
2453
2454    #[test]
2455    fn offset_iter_issue_404() {
2456        let event_offsets: Vec<_> = Parser::new("###\n")
2457            .into_offset_iter()
2458            .map(|(_ev, range)| range)
2459            .collect();
2460        let expected_offsets = vec![(0..4), (0..4)];
2461        assert_eq!(expected_offsets, event_offsets);
2462    }
2463
2464    // FIXME: add this one regression suite
2465    #[cfg(feature = "html")]
2466    #[test]
2467    fn link_def_at_eof() {
2468        let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2469        let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2470
2471        let mut buf = String::new();
2472        crate::html::push_html(&mut buf, Parser::new(test_str));
2473        assert_eq!(expected, buf);
2474    }
2475
2476    #[cfg(feature = "html")]
2477    #[test]
2478    fn no_footnote_refs_without_option() {
2479        let test_str = "a [^a]\n\n[^a]: yolo";
2480        let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
2481
2482        let mut buf = String::new();
2483        crate::html::push_html(&mut buf, Parser::new(test_str));
2484        assert_eq!(expected, buf);
2485    }
2486
2487    #[cfg(feature = "html")]
2488    #[test]
2489    fn ref_def_at_eof() {
2490        let test_str = "[test]:\\";
2491        let expected = "";
2492
2493        let mut buf = String::new();
2494        crate::html::push_html(&mut buf, Parser::new(test_str));
2495        assert_eq!(expected, buf);
2496    }
2497
2498    #[cfg(feature = "html")]
2499    #[test]
2500    fn ref_def_cr_lf() {
2501        let test_str = "[a]: /u\r\n\n[a]";
2502        let expected = "<p><a href=\"/u\">a</a></p>\n";
2503
2504        let mut buf = String::new();
2505        crate::html::push_html(&mut buf, Parser::new(test_str));
2506        assert_eq!(expected, buf);
2507    }
2508
2509    #[cfg(feature = "html")]
2510    #[test]
2511    fn no_dest_refdef() {
2512        let test_str = "[a]:";
2513        let expected = "<p>[a]:</p>\n";
2514
2515        let mut buf = String::new();
2516        crate::html::push_html(&mut buf, Parser::new(test_str));
2517        assert_eq!(expected, buf);
2518    }
2519
2520    #[test]
2521    fn broken_links_called_only_once() {
2522        for &(markdown, expected) in &[
2523            ("See also [`g()`][crate::g].", 1),
2524            ("See also [`g()`][crate::g][].", 1),
2525            ("[brokenlink1] some other node [brokenlink2]", 2),
2526        ] {
2527            let mut times_called = 0;
2528            let callback = &mut |_broken_link: BrokenLink| {
2529                times_called += 1;
2530                None
2531            };
2532            let parser =
2533                Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2534            for _ in parser {}
2535            assert_eq!(times_called, expected);
2536        }
2537    }
2538
2539    #[test]
2540    fn simple_broken_link_callback() {
2541        let test_str = "This is a link w/o def: [hello][world]";
2542        let mut callback = |broken_link: BrokenLink| {
2543            assert_eq!("world", broken_link.reference.as_ref());
2544            assert_eq!(&test_str[broken_link.span], "[hello][world]");
2545            let url = "YOLO".into();
2546            let title = "SWAG".to_owned().into();
2547            Some((url, title))
2548        };
2549        let parser =
2550            Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2551        let mut link_tag_count = 0;
2552        for (typ, url, title, id) in parser.filter_map(|event| match event {
2553            Event::Start(tag) => match tag {
2554                Tag::Link {
2555                    link_type,
2556                    dest_url,
2557                    title,
2558                    id,
2559                } => Some((link_type, dest_url, title, id)),
2560                _ => None,
2561            },
2562            _ => None,
2563        }) {
2564            link_tag_count += 1;
2565            assert_eq!(typ, LinkType::ReferenceUnknown);
2566            assert_eq!(url.as_ref(), "YOLO");
2567            assert_eq!(title.as_ref(), "SWAG");
2568            assert_eq!(id.as_ref(), "world");
2569        }
2570        assert!(link_tag_count > 0);
2571    }
2572
2573    #[test]
2574    fn code_block_kind_check_fenced() {
2575        let parser = Parser::new("hello\n```test\ntadam\n```");
2576        let mut found = 0;
2577        for (ev, _range) in parser.into_offset_iter() {
2578            match ev {
2579                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
2580                    assert_eq!(syntax.as_ref(), "test");
2581                    found += 1;
2582                }
2583                _ => {}
2584            }
2585        }
2586        assert_eq!(found, 1);
2587    }
2588
2589    #[test]
2590    fn code_block_kind_check_indented() {
2591        let parser = Parser::new("hello\n\n    ```test\n    tadam\nhello");
2592        let mut found = 0;
2593        for (ev, _range) in parser.into_offset_iter() {
2594            match ev {
2595                Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
2596                    found += 1;
2597                }
2598                _ => {}
2599            }
2600        }
2601        assert_eq!(found, 1);
2602    }
2603
2604    #[test]
2605    fn ref_defs() {
2606        let input = r###"[a B c]: http://example.com
2607[another]: https://google.com
2608
2609text
2610
2611[final ONE]: http://wikipedia.org
2612"###;
2613        let mut parser = Parser::new(input);
2614
2615        assert!(parser.reference_definitions().get("a b c").is_some());
2616        assert!(parser.reference_definitions().get("nope").is_none());
2617
2618        if let Some(_event) = parser.next() {
2619            // testing keys with shorter lifetimes than parser and its input
2620            let s = "final one".to_owned();
2621            let link_def = parser.reference_definitions().get(&s).unwrap();
2622            let span = &input[link_def.span.clone()];
2623            assert_eq!(span, "[final ONE]: http://wikipedia.org");
2624        }
2625    }
2626
2627    #[test]
2628    fn common_lifetime_patterns_allowed<'b>() {
2629        let temporary_str = String::from("xyz");
2630
2631        // NOTE: this is a limitation of Rust, it doesn't allow putting lifetime parameters on the closure itself.
2632        // Hack it by attaching the lifetime to the test function instead.
2633        // TODO: why is the `'b` lifetime required at all? Changing it to `'_` breaks things :(
2634        let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
2635
2636        fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
2637            Some(("#".into(), link.reference))
2638        }
2639
2640        for _ in Parser::new_with_broken_link_callback(
2641            "static lifetime",
2642            Options::empty(),
2643            Some(&mut closure),
2644        ) {}
2645        /* This fails to compile. Because the closure can't say `for <'a> fn(BrokenLink<'a>) ->
2646         * CowStr<'a>` and has to use the enclosing `'b` lifetime parameter, `temporary_str` lives
2647         * shorter than `'b`. I think this is unlikely to occur in real life, and if it does, the
2648         * fix is simple: move it out to a function that allows annotating the lifetimes.
2649         */
2650        //for _ in Parser::new_with_broken_link_callback(&temporary_str, Options::empty(), Some(&mut callback)) {
2651        //}
2652
2653        for _ in Parser::new_with_broken_link_callback(
2654            "static lifetime",
2655            Options::empty(),
2656            Some(&mut function),
2657        ) {}
2658        for _ in Parser::new_with_broken_link_callback(
2659            &temporary_str,
2660            Options::empty(),
2661            Some(&mut function),
2662        ) {}
2663    }
2664
2665    #[test]
2666    fn inline_html_inside_blockquote() {
2667        // Regression for #960
2668        let input = "> <foo\n> bar>";
2669        let events: Vec<_> = Parser::new(input).collect();
2670        let expected = [
2671            Event::Start(Tag::BlockQuote(None)),
2672            Event::Start(Tag::Paragraph),
2673            Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
2674            Event::End(TagEnd::Paragraph),
2675            Event::End(TagEnd::BlockQuote(None)),
2676        ];
2677        assert_eq!(&events, &expected);
2678    }
2679}