1use std::cmp::{max, min};
24use std::collections::{HashMap, VecDeque};
25use std::iter::FusedIterator;
26use std::num::NonZeroUsize;
27use std::ops::{Index, Range};
28
29use unicase::UniCase;
30
31use crate::firstpass::run_first_pass;
32use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
33use crate::scanners::*;
34use crate::strings::CowStr;
35use crate::tree::{Tree, TreeIndex};
36use crate::{
37 Alignment, BlockQuoteKind, CodeBlockKind, Event, HeadingLevel, LinkType, MetadataBlockKind,
38 Options, Tag, TagEnd,
39};
40
41pub(crate) const LINK_MAX_NESTED_PARENS: usize = 32;
47
48#[derive(Debug, Default, Clone, Copy)]
49pub(crate) struct Item {
50 pub start: usize,
51 pub end: usize,
52 pub body: ItemBody,
53}
54
55#[derive(Debug, PartialEq, Clone, Copy, Default)]
56pub(crate) enum ItemBody {
57 MaybeEmphasis(usize, bool, bool),
61 MaybeMath(bool, bool, u8),
63 MaybeSmartQuote(u8, bool, bool),
65 MaybeCode(usize, bool), MaybeHtml,
67 MaybeLinkOpen,
68 MaybeLinkClose(bool),
70 MaybeImage,
71
72 Emphasis,
74 Strong,
75 Strikethrough,
76 Math(CowIndex, bool), Code(CowIndex),
78 Link(LinkIndex),
79 Image(LinkIndex),
80 FootnoteReference(CowIndex),
81 TaskListMarker(bool), InlineHtml,
85 OwnedInlineHtml(CowIndex),
86 SynthesizeText(CowIndex),
87 SynthesizeChar(char),
88 Html,
89 Text {
90 backslash_escaped: bool,
91 },
92 SoftBreak,
93 HardBreak(bool),
95
96 #[default]
98 Root,
99
100 Paragraph,
102 Rule,
103 Heading(HeadingLevel, Option<HeadingIndex>), FencedCodeBlock(CowIndex),
105 IndentCodeBlock,
106 HtmlBlock,
107 BlockQuote(Option<BlockQuoteKind>),
108 List(bool, u8, u64), ListItem(usize), FootnoteDefinition(CowIndex),
111 MetadataBlock(MetadataBlockKind),
112
113 DefinitionList(bool), MaybeDefinitionListTitle,
118 DefinitionListTitle,
119 DefinitionListDefinition(usize),
120
121 Table(AlignmentIndex),
123 TableHead,
124 TableRow,
125 TableCell,
126}
127
128impl ItemBody {
129 fn is_maybe_inline(&self) -> bool {
130 use ItemBody::*;
131 matches!(
132 *self,
133 MaybeEmphasis(..)
134 | MaybeMath(..)
135 | MaybeSmartQuote(..)
136 | MaybeCode(..)
137 | MaybeHtml
138 | MaybeLinkOpen
139 | MaybeLinkClose(..)
140 | MaybeImage
141 )
142 }
143 fn is_inline(&self) -> bool {
144 use ItemBody::*;
145 matches!(
146 *self,
147 MaybeEmphasis(..)
148 | MaybeMath(..)
149 | MaybeSmartQuote(..)
150 | MaybeCode(..)
151 | MaybeHtml
152 | MaybeLinkOpen
153 | MaybeLinkClose(..)
154 | MaybeImage
155 | Emphasis
156 | Strong
157 | Strikethrough
158 | Math(..)
159 | Code(..)
160 | Link(..)
161 | Image(..)
162 | FootnoteReference(..)
163 | TaskListMarker(..)
164 | InlineHtml
165 | OwnedInlineHtml(..)
166 | SynthesizeText(..)
167 | SynthesizeChar(..)
168 | Html
169 | Text { .. }
170 | SoftBreak
171 | HardBreak(..)
172 )
173 }
174 fn is_block(&self) -> bool {
175 !self.is_inline()
176 }
177}
178
179#[derive(Debug)]
180pub struct BrokenLink<'a> {
181 pub span: std::ops::Range<usize>,
182 pub link_type: LinkType,
183 pub reference: CowStr<'a>,
184}
185
186pub struct Parser<'input, F = DefaultBrokenLinkCallback> {
188 text: &'input str,
189 options: Options,
190 tree: Tree<Item>,
191 allocs: Allocations<'input>,
192 broken_link_callback: Option<F>,
193 html_scan_guard: HtmlScanGuard,
194
195 link_ref_expansion_limit: usize,
212
213 inline_stack: InlineStack,
215 link_stack: LinkStack,
216 code_delims: CodeDelims,
217 math_delims: MathDelims,
218}
219
220impl<'input, F> std::fmt::Debug for Parser<'input, F> {
221 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222 f.debug_struct("Parser")
224 .field("text", &self.text)
225 .field("options", &self.options)
226 .field(
227 "broken_link_callback",
228 &self.broken_link_callback.as_ref().map(|_| ..),
229 )
230 .finish()
231 }
232}
233
234impl<'a> BrokenLink<'a> {
235 pub fn into_static(self) -> BrokenLink<'static> {
239 BrokenLink {
240 span: self.span.clone(),
241 link_type: self.link_type,
242 reference: self.reference.into_string().into(),
243 }
244 }
245}
246
247impl<'input> Parser<'input, DefaultBrokenLinkCallback> {
248 pub fn new(text: &'input str) -> Self {
250 Self::new_ext(text, Options::empty())
251 }
252
253 pub fn new_ext(text: &'input str, options: Options) -> Self {
255 Self::new_with_broken_link_callback(text, options, None)
256 }
257}
258
259impl<'input, F: BrokenLinkCallback<'input>> Parser<'input, F> {
260 pub fn new_with_broken_link_callback(
266 text: &'input str,
267 options: Options,
268 broken_link_callback: Option<F>,
269 ) -> Self {
270 let (mut tree, allocs) = run_first_pass(text, options);
271 tree.reset();
272 let inline_stack = Default::default();
273 let link_stack = Default::default();
274 let html_scan_guard = Default::default();
275 Parser {
276 text,
277 options,
278 tree,
279 allocs,
280 broken_link_callback,
281 inline_stack,
282 link_stack,
283 html_scan_guard,
284 link_ref_expansion_limit: text.len().max(100_000),
286 code_delims: CodeDelims::new(),
287 math_delims: MathDelims::new(),
288 }
289 }
290
291 pub fn reference_definitions(&self) -> &RefDefs<'_> {
294 &self.allocs.refdefs
295 }
296
297 fn fetch_link_type_url_title(
317 &mut self,
318 link_label: CowStr<'input>,
319 span: Range<usize>,
320 link_type: LinkType,
321 ) -> Option<(LinkType, CowStr<'input>, CowStr<'input>)> {
322 if self.link_ref_expansion_limit == 0 {
323 return None;
324 }
325
326 let (link_type, url, title) = self
327 .allocs
328 .refdefs
329 .get(link_label.as_ref())
330 .map(|matching_def| {
331 let title = matching_def
333 .title
334 .as_ref()
335 .cloned()
336 .unwrap_or_else(|| "".into());
337 let url = matching_def.dest.clone();
338 (link_type, url, title)
339 })
340 .or_else(|| {
341 match self.broken_link_callback.as_mut() {
342 Some(callback) => {
343 let broken_link = BrokenLink {
345 span,
346 link_type,
347 reference: link_label,
348 };
349
350 callback
351 .handle_broken_link(broken_link)
352 .map(|(url, title)| (link_type.to_unknown(), url, title))
353 }
354 None => None,
355 }
356 })?;
357
358 self.link_ref_expansion_limit = self
362 .link_ref_expansion_limit
363 .saturating_sub(url.len() + title.len());
364
365 Some((link_type, url, title))
366 }
367
368 fn handle_inline(&mut self) {
375 self.handle_inline_pass1();
376 self.handle_emphasis_and_hard_break();
377 }
378
379 fn handle_inline_pass1(&mut self) {
385 let mut cur = self.tree.cur();
386 let mut prev = None;
387
388 let block_end = self.tree[self.tree.peek_up().unwrap()].item.end;
389 let block_text = &self.text[..block_end];
390
391 while let Some(mut cur_ix) = cur {
392 match self.tree[cur_ix].item.body {
393 ItemBody::MaybeHtml => {
394 let next = self.tree[cur_ix].next;
395 let autolink = if let Some(next_ix) = next {
396 scan_autolink(block_text, self.tree[next_ix].item.start)
397 } else {
398 None
399 };
400
401 if let Some((ix, uri, link_type)) = autolink {
402 let node = scan_nodes_to_ix(&self.tree, next, ix);
403 let text_node = self.tree.create_node(Item {
404 start: self.tree[cur_ix].item.start + 1,
405 end: ix - 1,
406 body: ItemBody::Text {
407 backslash_escaped: false,
408 },
409 });
410 let link_ix =
411 self.allocs
412 .allocate_link(link_type, uri, "".into(), "".into());
413 self.tree[cur_ix].item.body = ItemBody::Link(link_ix);
414 self.tree[cur_ix].item.end = ix;
415 self.tree[cur_ix].next = node;
416 self.tree[cur_ix].child = Some(text_node);
417 prev = cur;
418 cur = node;
419 if let Some(node_ix) = cur {
420 self.tree[node_ix].item.start = max(self.tree[node_ix].item.start, ix);
421 }
422 continue;
423 } else {
424 let inline_html = next.and_then(|next_ix| {
425 self.scan_inline_html(
426 block_text.as_bytes(),
427 self.tree[next_ix].item.start,
428 )
429 });
430 if let Some((span, ix)) = inline_html {
431 let node = scan_nodes_to_ix(&self.tree, next, ix);
432 self.tree[cur_ix].item.body = if !span.is_empty() {
433 let converted_string =
434 String::from_utf8(span).expect("invalid utf8");
435 ItemBody::OwnedInlineHtml(
436 self.allocs.allocate_cow(converted_string.into()),
437 )
438 } else {
439 ItemBody::InlineHtml
440 };
441 self.tree[cur_ix].item.end = ix;
442 self.tree[cur_ix].next = node;
443 prev = cur;
444 cur = node;
445 if let Some(node_ix) = cur {
446 self.tree[node_ix].item.start =
447 max(self.tree[node_ix].item.start, ix);
448 }
449 continue;
450 }
451 }
452 self.tree[cur_ix].item.body = ItemBody::Text {
453 backslash_escaped: false,
454 };
455 }
456 ItemBody::MaybeMath(can_open, _can_close, brace_context) => {
457 if !can_open {
458 self.tree[cur_ix].item.body = ItemBody::Text {
459 backslash_escaped: false,
460 };
461 prev = cur;
462 cur = self.tree[cur_ix].next;
463 continue;
464 }
465 let is_display = self.tree[cur_ix].next.map_or(false, |next_ix| {
466 matches!(
467 self.tree[next_ix].item.body,
468 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
469 )
470 });
471 let result = if self.math_delims.is_populated() {
472 self.math_delims
475 .find(&self.tree, cur_ix, is_display, brace_context)
476 } else {
477 let mut scan = self.tree[cur_ix].next;
480 if is_display {
481 scan = self.tree[scan.unwrap()].next;
484 }
485 let mut invalid = false;
486 while let Some(scan_ix) = scan {
487 if let ItemBody::MaybeMath(_can_open, can_close, delim_brace_context) =
488 self.tree[scan_ix].item.body
489 {
490 let delim_is_display =
491 self.tree[scan_ix].next.map_or(false, |next_ix| {
492 matches!(
493 self.tree[next_ix].item.body,
494 ItemBody::MaybeMath(
495 _can_open,
496 _can_close,
497 _brace_context
498 )
499 )
500 });
501 if !invalid && delim_brace_context == brace_context {
502 if (!is_display && can_close)
503 || (is_display && delim_is_display)
504 {
505 self.math_delims.clear();
509 break;
510 } else {
511 invalid = true;
514 }
515 }
516 self.math_delims.insert(
517 delim_is_display,
518 delim_brace_context,
519 scan_ix,
520 can_close,
521 );
522 }
523 if self.tree[scan_ix].item.body.is_block() {
524 scan = None;
527 break;
528 }
529 scan = self.tree[scan_ix].next;
530 }
531 scan
532 };
533
534 if let Some(scan_ix) = result {
535 self.make_math_span(cur_ix, scan_ix);
536 } else {
537 self.tree[cur_ix].item.body = ItemBody::Text {
538 backslash_escaped: false,
539 };
540 }
541 }
542 ItemBody::MaybeCode(mut search_count, preceded_by_backslash) => {
543 if preceded_by_backslash {
544 search_count -= 1;
545 if search_count == 0 {
546 self.tree[cur_ix].item.body = ItemBody::Text {
547 backslash_escaped: false,
548 };
549 prev = cur;
550 cur = self.tree[cur_ix].next;
551 continue;
552 }
553 }
554
555 if self.code_delims.is_populated() {
556 if let Some(scan_ix) = self.code_delims.find(cur_ix, search_count) {
559 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
560 } else {
561 self.tree[cur_ix].item.body = ItemBody::Text {
562 backslash_escaped: false,
563 };
564 }
565 } else {
566 let mut scan = if search_count > 0 {
569 self.tree[cur_ix].next
570 } else {
571 None
572 };
573 while let Some(scan_ix) = scan {
574 if let ItemBody::MaybeCode(delim_count, _) =
575 self.tree[scan_ix].item.body
576 {
577 if search_count == delim_count {
578 self.make_code_span(cur_ix, scan_ix, preceded_by_backslash);
579 self.code_delims.clear();
580 break;
581 } else {
582 self.code_delims.insert(delim_count, scan_ix);
583 }
584 }
585 if self.tree[scan_ix].item.body.is_block() {
586 scan = None;
589 break;
590 }
591 scan = self.tree[scan_ix].next;
592 }
593 if scan.is_none() {
594 self.tree[cur_ix].item.body = ItemBody::Text {
595 backslash_escaped: false,
596 };
597 }
598 }
599 }
600 ItemBody::MaybeLinkOpen => {
601 self.tree[cur_ix].item.body = ItemBody::Text {
602 backslash_escaped: false,
603 };
604 self.link_stack.push(LinkStackEl {
605 node: cur_ix,
606 ty: LinkStackTy::Link,
607 });
608 }
609 ItemBody::MaybeImage => {
610 self.tree[cur_ix].item.body = ItemBody::Text {
611 backslash_escaped: false,
612 };
613 self.link_stack.push(LinkStackEl {
614 node: cur_ix,
615 ty: LinkStackTy::Image,
616 });
617 }
618 ItemBody::MaybeLinkClose(could_be_ref) => {
619 self.tree[cur_ix].item.body = ItemBody::Text {
620 backslash_escaped: false,
621 };
622 if let Some(tos) = self.link_stack.pop() {
623 if tos.ty == LinkStackTy::Disabled {
624 continue;
625 }
626 let next = self.tree[cur_ix].next;
627 if let Some((next_ix, url, title)) =
628 self.scan_inline_link(block_text, self.tree[cur_ix].item.end, next)
629 {
630 let next_node = scan_nodes_to_ix(&self.tree, next, next_ix);
631 if let Some(prev_ix) = prev {
632 self.tree[prev_ix].next = None;
633 }
634 cur = Some(tos.node);
635 cur_ix = tos.node;
636 let link_ix =
637 self.allocs
638 .allocate_link(LinkType::Inline, url, title, "".into());
639 self.tree[cur_ix].item.body = if tos.ty == LinkStackTy::Image {
640 ItemBody::Image(link_ix)
641 } else {
642 ItemBody::Link(link_ix)
643 };
644 self.tree[cur_ix].child = self.tree[cur_ix].next;
645 self.tree[cur_ix].next = next_node;
646 self.tree[cur_ix].item.end = next_ix;
647 if let Some(next_node_ix) = next_node {
648 self.tree[next_node_ix].item.start =
649 max(self.tree[next_node_ix].item.start, next_ix);
650 }
651
652 if tos.ty == LinkStackTy::Link {
653 self.link_stack.disable_all_links();
654 }
655 } else {
656 let scan_result = scan_reference(
659 &self.tree,
660 block_text,
661 next,
662 self.options.contains(Options::ENABLE_FOOTNOTES),
663 self.options.has_gfm_footnotes(),
664 );
665 let (node_after_link, link_type) = match scan_result {
666 RefScan::LinkLabel(_, end_ix) => {
668 let reference_close_node = if let Some(node) =
673 scan_nodes_to_ix(&self.tree, next, end_ix - 1)
674 {
675 node
676 } else {
677 continue;
678 };
679 self.tree[reference_close_node].item.body =
680 ItemBody::MaybeLinkClose(false);
681 let next_node = self.tree[reference_close_node].next;
682
683 (next_node, LinkType::Reference)
684 }
685 RefScan::Collapsed(next_node) => {
687 if !could_be_ref {
690 continue;
691 }
692 (next_node, LinkType::Collapsed)
693 }
694 RefScan::Failed | RefScan::UnexpectedFootnote => {
698 if !could_be_ref {
699 continue;
700 }
701 (next, LinkType::Shortcut)
702 }
703 };
704
705 let label: Option<(ReferenceLabel<'input>, usize)> = match scan_result {
710 RefScan::LinkLabel(l, end_ix) => {
711 Some((ReferenceLabel::Link(l), end_ix))
712 }
713 RefScan::Collapsed(..)
714 | RefScan::Failed
715 | RefScan::UnexpectedFootnote => {
716 let label_start = self.tree[tos.node].item.end - 1;
718 let label_end = self.tree[cur_ix].item.end;
719 scan_link_label(
720 &self.tree,
721 &self.text[label_start..label_end],
722 self.options.contains(Options::ENABLE_FOOTNOTES),
723 self.options.has_gfm_footnotes(),
724 )
725 .map(|(ix, label)| (label, label_start + ix))
726 .filter(|(_, end)| *end == label_end)
727 }
728 };
729
730 let id = match &label {
731 Some(
732 (ReferenceLabel::Link(l), _) | (ReferenceLabel::Footnote(l), _),
733 ) => l.clone(),
734 None => "".into(),
735 };
736
737 if let Some((ReferenceLabel::Footnote(l), end)) = label {
739 let footref = self.allocs.allocate_cow(l);
740 if let Some(def) = self
741 .allocs
742 .footdefs
743 .get_mut(self.allocs.cows[footref.0].to_owned())
744 {
745 def.use_count += 1;
746 }
747 if !self.options.has_gfm_footnotes()
748 || self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
749 {
750 let footnote_ix = if tos.ty == LinkStackTy::Image {
753 self.tree[tos.node].next = Some(cur_ix);
754 self.tree[tos.node].child = None;
755 self.tree[tos.node].item.body =
756 ItemBody::SynthesizeChar('!');
757 self.tree[cur_ix].item.start =
758 self.tree[tos.node].item.start + 1;
759 self.tree[tos.node].item.end =
760 self.tree[tos.node].item.start + 1;
761 cur_ix
762 } else {
763 tos.node
764 };
765 self.tree[footnote_ix].next = next;
769 self.tree[footnote_ix].child = None;
770 self.tree[footnote_ix].item.body =
771 ItemBody::FootnoteReference(footref);
772 self.tree[footnote_ix].item.end = end;
773 prev = Some(footnote_ix);
774 cur = next;
775 self.link_stack.clear();
776 continue;
777 }
778 } else if let Some((ReferenceLabel::Link(link_label), end)) = label {
779 if let Some((def_link_type, url, title)) = self
780 .fetch_link_type_url_title(
781 link_label,
782 (self.tree[tos.node].item.start)..end,
783 link_type,
784 )
785 {
786 let link_ix =
787 self.allocs.allocate_link(def_link_type, url, title, id);
788 self.tree[tos.node].item.body = if tos.ty == LinkStackTy::Image
789 {
790 ItemBody::Image(link_ix)
791 } else {
792 ItemBody::Link(link_ix)
793 };
794 let label_node = self.tree[tos.node].next;
795
796 self.tree[tos.node].next = node_after_link;
799
800 if label_node != cur {
802 self.tree[tos.node].child = label_node;
803
804 if let Some(prev_ix) = prev {
806 self.tree[prev_ix].next = None;
807 }
808 }
809
810 self.tree[tos.node].item.end = end;
811
812 cur = Some(tos.node);
814 cur_ix = tos.node;
815
816 if tos.ty == LinkStackTy::Link {
817 self.link_stack.disable_all_links();
818 }
819 }
820 }
821 }
822 }
823 }
824 _ => {
825 if let Some(cur_ix) = cur {
828 if self.tree[cur_ix].item.body.is_block() {
829 self.link_stack.clear();
830 }
831 }
832 }
833 }
834 prev = cur;
835 cur = self.tree[cur_ix].next;
836 }
837 self.link_stack.clear();
838 self.code_delims.clear();
839 self.math_delims.clear();
840 }
841
842 fn handle_emphasis_and_hard_break(&mut self) {
843 let mut prev = None;
844 let mut prev_ix: TreeIndex;
845 let mut cur = self.tree.cur();
846
847 let mut single_quote_open: Option<TreeIndex> = None;
848 let mut double_quote_open: bool = false;
849
850 while let Some(mut cur_ix) = cur {
851 match self.tree[cur_ix].item.body {
852 ItemBody::MaybeEmphasis(mut count, can_open, can_close) => {
853 let run_length = count;
854 let c = self.text.as_bytes()[self.tree[cur_ix].item.start];
855 let both = can_open && can_close;
856 if can_close {
857 while let Some(el) =
858 self.inline_stack
859 .find_match(&mut self.tree, c, run_length, both)
860 {
861 if let Some(prev_ix) = prev {
863 self.tree[prev_ix].next = None;
864 }
865 let match_count = min(count, el.count);
866 let mut end = cur_ix - 1;
868 let mut start = el.start + el.count;
869
870 while start > el.start + el.count - match_count {
872 let inc = if start > el.start + el.count - match_count + 1 {
873 2
874 } else {
875 1
876 };
877 let ty = if c == b'~' {
878 ItemBody::Strikethrough
879 } else if inc == 2 {
880 ItemBody::Strong
881 } else {
882 ItemBody::Emphasis
883 };
884
885 let root = start - inc;
886 end = end + inc;
887 self.tree[root].item.body = ty;
888 self.tree[root].item.end = self.tree[end].item.end;
889 self.tree[root].child = Some(start);
890 self.tree[root].next = None;
891 start = root;
892 }
893
894 prev_ix = el.start + el.count - match_count;
896 prev = Some(prev_ix);
897 cur = self.tree[cur_ix + match_count - 1].next;
898 self.tree[prev_ix].next = cur;
899
900 if el.count > match_count {
901 self.inline_stack.push(InlineEl {
902 start: el.start,
903 count: el.count - match_count,
904 run_length: el.run_length,
905 c: el.c,
906 both: el.both,
907 })
908 }
909 count -= match_count;
910 if count > 0 {
911 cur_ix = cur.unwrap();
912 } else {
913 break;
914 }
915 }
916 }
917 if count > 0 {
918 if can_open {
919 self.inline_stack.push(InlineEl {
920 start: cur_ix,
921 run_length,
922 count,
923 c,
924 both,
925 });
926 } else {
927 for i in 0..count {
928 self.tree[cur_ix + i].item.body = ItemBody::Text {
929 backslash_escaped: false,
930 };
931 }
932 }
933 prev_ix = cur_ix + count - 1;
934 prev = Some(prev_ix);
935 cur = self.tree[prev_ix].next;
936 }
937 }
938 ItemBody::MaybeSmartQuote(c, can_open, can_close) => {
939 self.tree[cur_ix].item.body = match c {
940 b'\'' => {
941 if let (Some(open_ix), true) = (single_quote_open, can_close) {
942 self.tree[open_ix].item.body = ItemBody::SynthesizeChar('‘');
943 single_quote_open = None;
944 } else if can_open {
945 single_quote_open = Some(cur_ix);
946 }
947 ItemBody::SynthesizeChar('’')
948 }
949 _ => {
950 if can_close && double_quote_open {
951 double_quote_open = false;
952 ItemBody::SynthesizeChar('”')
953 } else {
954 if can_open && !double_quote_open {
955 double_quote_open = true;
956 }
957 ItemBody::SynthesizeChar('“')
958 }
959 }
960 };
961 prev = cur;
962 cur = self.tree[cur_ix].next;
963 }
964 ItemBody::HardBreak(true) => {
965 if self.tree[cur_ix].next.is_none() {
966 self.tree[cur_ix].item.body = ItemBody::SynthesizeChar('\\');
967 }
968 prev = cur;
969 cur = self.tree[cur_ix].next;
970 }
971 _ => {
972 prev = cur;
973 if let Some(cur_ix) = cur {
976 if self.tree[cur_ix].item.body.is_block() {
977 self.inline_stack.pop_all(&mut self.tree);
978 }
979 }
980 cur = self.tree[cur_ix].next;
981 }
982 }
983 }
984 self.inline_stack.pop_all(&mut self.tree);
985 }
986
987 fn scan_inline_link(
989 &self,
990 underlying: &'input str,
991 mut ix: usize,
992 node: Option<TreeIndex>,
993 ) -> Option<(usize, CowStr<'input>, CowStr<'input>)> {
994 if scan_ch(&underlying.as_bytes()[ix..], b'(') == 0 {
995 return None;
996 }
997 ix += 1;
998
999 let scan_separator = |ix: &mut usize| {
1000 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1001 if let Some(bl) = scan_eol(&underlying.as_bytes()[*ix..]) {
1002 *ix += bl;
1003 let mut line_start = LineStart::new(&underlying.as_bytes()[*ix..]);
1004 let _ = scan_containers(
1005 &self.tree,
1006 &mut line_start,
1007 self.options.has_gfm_footnotes(),
1008 );
1009 *ix += line_start.bytes_scanned();
1010 }
1011 *ix += scan_while(&underlying.as_bytes()[*ix..], is_ascii_whitespace_no_nl);
1012 };
1013
1014 scan_separator(&mut ix);
1015
1016 let (dest_length, dest) = scan_link_dest(underlying, ix, LINK_MAX_NESTED_PARENS)?;
1017 let dest = unescape(dest, self.tree.is_in_table());
1018 ix += dest_length;
1019
1020 scan_separator(&mut ix);
1021
1022 let title = if let Some((bytes_scanned, t)) = self.scan_link_title(underlying, ix, node) {
1023 ix += bytes_scanned;
1024 scan_separator(&mut ix);
1025 t
1026 } else {
1027 "".into()
1028 };
1029 if scan_ch(&underlying.as_bytes()[ix..], b')') == 0 {
1030 return None;
1031 }
1032 ix += 1;
1033
1034 Some((ix, dest, title))
1035 }
1036
1037 fn scan_link_title(
1039 &self,
1040 text: &'input str,
1041 start_ix: usize,
1042 node: Option<TreeIndex>,
1043 ) -> Option<(usize, CowStr<'input>)> {
1044 let bytes = text.as_bytes();
1045 let open = match bytes.get(start_ix) {
1046 Some(b @ b'\'') | Some(b @ b'\"') | Some(b @ b'(') => *b,
1047 _ => return None,
1048 };
1049 let close = if open == b'(' { b')' } else { open };
1050
1051 let mut title = String::new();
1052 let mut mark = start_ix + 1;
1053 let mut i = start_ix + 1;
1054
1055 while i < bytes.len() {
1056 let c = bytes[i];
1057
1058 if c == close {
1059 let cow = if mark == 1 {
1060 (i - start_ix + 1, text[mark..i].into())
1061 } else {
1062 title.push_str(&text[mark..i]);
1063 (i - start_ix + 1, title.into())
1064 };
1065
1066 return Some(cow);
1067 }
1068 if c == open {
1069 return None;
1070 }
1071
1072 if c == b'\n' || c == b'\r' {
1073 if let Some(node_ix) = scan_nodes_to_ix(&self.tree, node, i + 1) {
1074 if self.tree[node_ix].item.start > i {
1075 title.push_str(&text[mark..i]);
1076 title.push('\n');
1077 i = self.tree[node_ix].item.start;
1078 mark = i;
1079 continue;
1080 }
1081 }
1082 }
1083 if c == b'&' {
1084 if let (n, Some(value)) = scan_entity(&bytes[i..]) {
1085 title.push_str(&text[mark..i]);
1086 title.push_str(&value);
1087 i += n;
1088 mark = i;
1089 continue;
1090 }
1091 }
1092 if self.tree.is_in_table()
1093 && c == b'\\'
1094 && i + 2 < bytes.len()
1095 && bytes[i + 1] == b'\\'
1096 && bytes[i + 2] == b'|'
1097 {
1098 title.push_str(&text[mark..i]);
1101 i += 2;
1102 mark = i;
1103 }
1104 if c == b'\\' && i + 1 < bytes.len() && is_ascii_punctuation(bytes[i + 1]) {
1105 title.push_str(&text[mark..i]);
1106 i += 1;
1107 mark = i;
1108 }
1109
1110 i += 1;
1111 }
1112
1113 None
1114 }
1115
1116 fn make_math_span(&mut self, open: TreeIndex, mut close: TreeIndex) {
1117 let start_is_display = self.tree[open].next.filter(|&next_ix| {
1118 next_ix != close
1119 && matches!(
1120 self.tree[next_ix].item.body,
1121 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1122 )
1123 });
1124 let end_is_display = self.tree[close].next.filter(|&next_ix| {
1125 matches!(
1126 self.tree[next_ix].item.body,
1127 ItemBody::MaybeMath(_can_open, _can_close, _brace_context)
1128 )
1129 });
1130 let is_display = start_is_display.is_some() && end_is_display.is_some();
1131 if is_display {
1132 close = self.tree[close].next.unwrap();
1134 self.tree[open].next = Some(close);
1135 self.tree[open].item.end += 1;
1136 self.tree[close].item.start -= 1;
1137 } else {
1138 if self.tree[open].item.end == self.tree[close].item.start {
1139 self.tree[open].item.body = ItemBody::Text {
1141 backslash_escaped: false,
1142 };
1143 return;
1144 }
1145 self.tree[open].next = Some(close);
1146 }
1147 let span_start = self.tree[open].item.end;
1148 let span_end = self.tree[close].item.start;
1149
1150 let bytes = self.text.as_bytes();
1151 let mut buf: Option<String> = None;
1152
1153 let mut start_ix = span_start;
1154 let mut ix = span_start;
1155 while ix < span_end {
1156 let c = bytes[ix];
1157 if c == b'\r' || c == b'\n' {
1158 ix += 1;
1159 let buf = buf.get_or_insert_with(|| String::with_capacity(ix - span_start));
1160 buf.push_str(&self.text[start_ix..ix]);
1161 let mut line_start = LineStart::new(&bytes[ix..]);
1162 let _ = scan_containers(
1163 &self.tree,
1164 &mut line_start,
1165 self.options.has_gfm_footnotes(),
1166 );
1167 ix += line_start.bytes_scanned();
1168 start_ix = ix;
1169 } else if c == b'\\' && bytes.get(ix + 1) == Some(&b'|') && self.tree.is_in_table() {
1170 let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1171 buf.push_str(&self.text[start_ix..ix]);
1172 buf.push('|');
1173 ix += 2;
1174 start_ix = ix;
1175 } else {
1176 ix += 1;
1177 }
1178 }
1179
1180 let cow = if let Some(mut buf) = buf {
1181 buf.push_str(&self.text[start_ix..span_end]);
1182 buf.into()
1183 } else {
1184 self.text[span_start..span_end].into()
1185 };
1186
1187 self.tree[open].item.body = ItemBody::Math(self.allocs.allocate_cow(cow), is_display);
1188 self.tree[open].item.end = self.tree[close].item.end;
1189 self.tree[open].next = self.tree[close].next;
1190 }
1191
1192 fn make_code_span(&mut self, open: TreeIndex, close: TreeIndex, preceding_backslash: bool) {
1196 let bytes = self.text.as_bytes();
1197 let span_start = self.tree[open].item.end;
1198 let span_end = self.tree[close].item.start;
1199 let mut buf: Option<String> = None;
1200
1201 let mut start_ix = span_start;
1202 let mut ix = span_start;
1203 while ix < span_end {
1204 let c = bytes[ix];
1205 if c == b'\r' || c == b'\n' {
1206 let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1207 buf.push_str(&self.text[start_ix..ix]);
1208 buf.push(' ');
1209 ix += 1;
1210 let mut line_start = LineStart::new(&bytes[ix..]);
1211 let _ = scan_containers(
1212 &self.tree,
1213 &mut line_start,
1214 self.options.has_gfm_footnotes(),
1215 );
1216 ix += line_start.bytes_scanned();
1217 start_ix = ix;
1218 } else if c == b'\\' && bytes.get(ix + 1) == Some(&b'|') && self.tree.is_in_table() {
1219 let buf = buf.get_or_insert_with(|| String::with_capacity(ix + 1 - span_start));
1220 buf.push_str(&self.text[start_ix..ix]);
1221 buf.push('|');
1222 ix += 2;
1223 start_ix = ix;
1224 } else {
1225 ix += 1;
1226 }
1227 }
1228
1229 let (opening, closing, all_spaces) = {
1230 let s = if let Some(buf) = &mut buf {
1231 buf.push_str(&self.text[start_ix..span_end]);
1232 &buf[..]
1233 } else {
1234 &self.text[span_start..span_end]
1235 };
1236 (
1237 s.as_bytes().first() == Some(&b' '),
1238 s.as_bytes().last() == Some(&b' '),
1239 s.bytes().all(|b| b == b' '),
1240 )
1241 };
1242
1243 let cow: CowStr<'input> = if !all_spaces && opening && closing {
1244 if let Some(mut buf) = buf {
1245 buf.remove(0);
1246 buf.pop();
1247 buf.into()
1248 } else {
1249 let lo = span_start + 1;
1250 let hi = (span_end - 1).max(lo);
1251 self.text[lo..hi].into()
1252 }
1253 } else if let Some(buf) = buf {
1254 buf.into()
1255 } else {
1256 self.text[span_start..span_end].into()
1257 };
1258
1259 if preceding_backslash {
1260 self.tree[open].item.body = ItemBody::Text {
1261 backslash_escaped: true,
1262 };
1263 self.tree[open].item.end = self.tree[open].item.start + 1;
1264 self.tree[open].next = Some(close);
1265 self.tree[close].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1266 self.tree[close].item.start = self.tree[open].item.start + 1;
1267 } else {
1268 self.tree[open].item.body = ItemBody::Code(self.allocs.allocate_cow(cow));
1269 self.tree[open].item.end = self.tree[close].item.end;
1270 self.tree[open].next = self.tree[close].next;
1271 }
1272 }
1273
1274 fn scan_inline_html(&mut self, bytes: &[u8], ix: usize) -> Option<(Vec<u8>, usize)> {
1278 let c = *bytes.get(ix)?;
1279 if c == b'!' {
1280 Some((
1281 vec![],
1282 scan_inline_html_comment(bytes, ix + 1, &mut self.html_scan_guard)?,
1283 ))
1284 } else if c == b'?' {
1285 Some((
1286 vec![],
1287 scan_inline_html_processing(bytes, ix + 1, &mut self.html_scan_guard)?,
1288 ))
1289 } else {
1290 let (span, i) = scan_html_block_inner(
1291 &bytes[(ix - 1)..],
1293 Some(&|bytes| {
1294 let mut line_start = LineStart::new(bytes);
1295 let _ = scan_containers(
1296 &self.tree,
1297 &mut line_start,
1298 self.options.has_gfm_footnotes(),
1299 );
1300 line_start.bytes_scanned()
1301 }),
1302 )?;
1303 Some((span, i + ix - 1))
1304 }
1305 }
1306
1307 pub fn into_offset_iter(self) -> OffsetIter<'input, F> {
1311 OffsetIter { inner: self }
1312 }
1313}
1314
1315pub(crate) fn scan_containers(
1317 tree: &Tree<Item>,
1318 line_start: &mut LineStart<'_>,
1319 gfm_footnotes: bool,
1320) -> usize {
1321 let mut i = 0;
1322 for &node_ix in tree.walk_spine() {
1323 match tree[node_ix].item.body {
1324 ItemBody::BlockQuote(..) => {
1325 let save = line_start.clone();
1326 let _ = line_start.scan_space(3);
1327 if !line_start.scan_blockquote_marker() {
1328 *line_start = save;
1329 break;
1330 }
1331 }
1332 ItemBody::ListItem(indent) => {
1333 let save = line_start.clone();
1334 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1335 *line_start = save;
1336 break;
1337 }
1338 }
1339 ItemBody::DefinitionListDefinition(indent) => {
1340 let save = line_start.clone();
1341 if !line_start.scan_space(indent) && !line_start.is_at_eol() {
1342 *line_start = save;
1343 break;
1344 }
1345 }
1346 ItemBody::FootnoteDefinition(..) if gfm_footnotes => {
1347 let save = line_start.clone();
1348 if !line_start.scan_space(4) && !line_start.is_at_eol() {
1349 *line_start = save;
1350 break;
1351 }
1352 }
1353 _ => (),
1354 }
1355 i += 1;
1356 }
1357 i
1358}
1359
1360impl Tree<Item> {
1361 pub(crate) fn append_text(&mut self, start: usize, end: usize, backslash_escaped: bool) {
1362 if end > start {
1363 if let Some(ix) = self.cur() {
1364 if matches!(self[ix].item.body, ItemBody::Text { .. }) && self[ix].item.end == start
1365 {
1366 self[ix].item.end = end;
1367 return;
1368 }
1369 }
1370 self.append(Item {
1371 start,
1372 end,
1373 body: ItemBody::Text { backslash_escaped },
1374 });
1375 }
1376 }
1377 pub(crate) fn is_in_table(&self) -> bool {
1384 fn might_be_in_table(item: &Item) -> bool {
1385 item.body.is_inline()
1386 || matches!(item.body, |ItemBody::TableHead| ItemBody::TableRow
1387 | ItemBody::TableCell)
1388 }
1389 for &ix in self.walk_spine().rev() {
1390 if matches!(self[ix].item.body, ItemBody::Table(_)) {
1391 return true;
1392 }
1393 if !might_be_in_table(&self[ix].item) {
1394 return false;
1395 }
1396 }
1397 false
1398 }
1399}
1400
1401#[derive(Copy, Clone, Debug)]
1402struct InlineEl {
1403 start: TreeIndex,
1405 count: usize,
1407 run_length: usize,
1409 c: u8,
1411 both: bool,
1413}
1414
1415#[derive(Debug, Clone, Default)]
1416struct InlineStack {
1417 stack: Vec<InlineEl>,
1418 lower_bounds: [usize; 9],
1423}
1424
1425impl InlineStack {
1426 const UNDERSCORE_NOT_BOTH: usize = 0;
1430 const ASTERISK_NOT_BOTH: usize = 1;
1431 const ASTERISK_BASE: usize = 2;
1432 const TILDES: usize = 5;
1433 const UNDERSCORE_BASE: usize = 6;
1434
1435 fn pop_all(&mut self, tree: &mut Tree<Item>) {
1436 for el in self.stack.drain(..) {
1437 for i in 0..el.count {
1438 tree[el.start + i].item.body = ItemBody::Text {
1439 backslash_escaped: false,
1440 };
1441 }
1442 }
1443 self.lower_bounds = [0; 9];
1444 }
1445
1446 fn get_lowerbound(&self, c: u8, count: usize, both: bool) -> usize {
1447 if c == b'_' {
1448 let mod3_lower = self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3];
1449 if both {
1450 mod3_lower
1451 } else {
1452 min(
1453 mod3_lower,
1454 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH],
1455 )
1456 }
1457 } else if c == b'*' {
1458 let mod3_lower = self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3];
1459 if both {
1460 mod3_lower
1461 } else {
1462 min(
1463 mod3_lower,
1464 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH],
1465 )
1466 }
1467 } else {
1468 self.lower_bounds[InlineStack::TILDES]
1469 }
1470 }
1471
1472 fn set_lowerbound(&mut self, c: u8, count: usize, both: bool, new_bound: usize) {
1473 if c == b'_' {
1474 if both {
1475 self.lower_bounds[InlineStack::UNDERSCORE_BASE + count % 3] = new_bound;
1476 } else {
1477 self.lower_bounds[InlineStack::UNDERSCORE_NOT_BOTH] = new_bound;
1478 }
1479 } else if c == b'*' {
1480 self.lower_bounds[InlineStack::ASTERISK_BASE + count % 3] = new_bound;
1481 if !both {
1482 self.lower_bounds[InlineStack::ASTERISK_NOT_BOTH] = new_bound;
1483 }
1484 } else {
1485 self.lower_bounds[InlineStack::TILDES] = new_bound;
1486 }
1487 }
1488
1489 fn truncate(&mut self, new_bound: usize) {
1490 self.stack.truncate(new_bound);
1491 for lower_bound in &mut self.lower_bounds {
1492 if *lower_bound > new_bound {
1493 *lower_bound = new_bound;
1494 }
1495 }
1496 }
1497
1498 fn find_match(
1499 &mut self,
1500 tree: &mut Tree<Item>,
1501 c: u8,
1502 run_length: usize,
1503 both: bool,
1504 ) -> Option<InlineEl> {
1505 let lowerbound = min(self.stack.len(), self.get_lowerbound(c, run_length, both));
1506 let res = self.stack[lowerbound..]
1507 .iter()
1508 .cloned()
1509 .enumerate()
1510 .rfind(|(_, el)| {
1511 if c == b'~' && run_length != el.run_length {
1512 return false;
1513 }
1514 el.c == c
1515 && (!both && !el.both
1516 || (run_length + el.run_length) % 3 != 0
1517 || run_length % 3 == 0)
1518 });
1519
1520 if let Some((matching_ix, matching_el)) = res {
1521 let matching_ix = matching_ix + lowerbound;
1522 for el in &self.stack[(matching_ix + 1)..] {
1523 for i in 0..el.count {
1524 tree[el.start + i].item.body = ItemBody::Text {
1525 backslash_escaped: false,
1526 };
1527 }
1528 }
1529 self.truncate(matching_ix);
1530 Some(matching_el)
1531 } else {
1532 self.set_lowerbound(c, run_length, both, self.stack.len());
1533 None
1534 }
1535 }
1536
1537 fn trim_lower_bound(&mut self, ix: usize) {
1538 self.lower_bounds[ix] = self.lower_bounds[ix].min(self.stack.len());
1539 }
1540
1541 fn push(&mut self, el: InlineEl) {
1542 if el.c == b'~' {
1543 self.trim_lower_bound(InlineStack::TILDES);
1544 }
1545 self.stack.push(el)
1546 }
1547}
1548
1549#[derive(Debug, Clone)]
1550enum RefScan<'a> {
1551 LinkLabel(CowStr<'a>, usize),
1553 Collapsed(Option<TreeIndex>),
1555 UnexpectedFootnote,
1556 Failed,
1557}
1558
1559fn scan_nodes_to_ix(
1562 tree: &Tree<Item>,
1563 mut node: Option<TreeIndex>,
1564 ix: usize,
1565) -> Option<TreeIndex> {
1566 while let Some(node_ix) = node {
1567 if tree[node_ix].item.end <= ix {
1568 node = tree[node_ix].next;
1569 } else {
1570 break;
1571 }
1572 }
1573 node
1574}
1575
1576fn scan_link_label<'text>(
1579 tree: &Tree<Item>,
1580 text: &'text str,
1581 allow_footnote_refs: bool,
1582 gfm_footnotes: bool,
1583) -> Option<(usize, ReferenceLabel<'text>)> {
1584 let bytes = text.as_bytes();
1585 if bytes.len() < 2 || bytes[0] != b'[' {
1586 return None;
1587 }
1588 let linebreak_handler = |bytes: &[u8]| {
1589 let mut line_start = LineStart::new(bytes);
1590 let _ = scan_containers(tree, &mut line_start, gfm_footnotes);
1591 Some(line_start.bytes_scanned())
1592 };
1593 if allow_footnote_refs && b'^' == bytes[1] && bytes.get(2) != Some(&b']') {
1594 let linebreak_handler: &dyn Fn(&[u8]) -> Option<usize> = if gfm_footnotes {
1595 &|_| None
1596 } else {
1597 &linebreak_handler
1598 };
1599 if let Some((byte_index, cow)) =
1600 scan_link_label_rest(&text[2..], linebreak_handler, tree.is_in_table())
1601 {
1602 return Some((byte_index + 2, ReferenceLabel::Footnote(cow)));
1603 }
1604 }
1605 let (byte_index, cow) =
1606 scan_link_label_rest(&text[1..], &linebreak_handler, tree.is_in_table())?;
1607 Some((byte_index + 1, ReferenceLabel::Link(cow)))
1608}
1609
1610fn scan_reference<'b>(
1611 tree: &Tree<Item>,
1612 text: &'b str,
1613 cur: Option<TreeIndex>,
1614 allow_footnote_refs: bool,
1615 gfm_footnotes: bool,
1616) -> RefScan<'b> {
1617 let cur_ix = match cur {
1618 None => return RefScan::Failed,
1619 Some(cur_ix) => cur_ix,
1620 };
1621 let start = tree[cur_ix].item.start;
1622 let tail = &text.as_bytes()[start..];
1623
1624 if tail.starts_with(b"[]") {
1625 let closing_node = tree[cur_ix].next.unwrap();
1627 RefScan::Collapsed(tree[closing_node].next)
1628 } else {
1629 let label = scan_link_label(tree, &text[start..], allow_footnote_refs, gfm_footnotes);
1630 match label {
1631 Some((ix, ReferenceLabel::Link(label))) => RefScan::LinkLabel(label, start + ix),
1632 Some((_ix, ReferenceLabel::Footnote(_label))) => RefScan::UnexpectedFootnote,
1633 None => RefScan::Failed,
1634 }
1635 }
1636}
1637
1638#[derive(Clone, Default)]
1639struct LinkStack {
1640 inner: Vec<LinkStackEl>,
1641 disabled_ix: usize,
1642}
1643
1644impl LinkStack {
1645 fn push(&mut self, el: LinkStackEl) {
1646 self.inner.push(el);
1647 }
1648
1649 fn pop(&mut self) -> Option<LinkStackEl> {
1650 let el = self.inner.pop();
1651 self.disabled_ix = std::cmp::min(self.disabled_ix, self.inner.len());
1652 el
1653 }
1654
1655 fn clear(&mut self) {
1656 self.inner.clear();
1657 self.disabled_ix = 0;
1658 }
1659
1660 fn disable_all_links(&mut self) {
1661 for el in &mut self.inner[self.disabled_ix..] {
1662 if el.ty == LinkStackTy::Link {
1663 el.ty = LinkStackTy::Disabled;
1664 }
1665 }
1666 self.disabled_ix = self.inner.len();
1667 }
1668}
1669
1670#[derive(Clone, Debug)]
1671struct LinkStackEl {
1672 node: TreeIndex,
1673 ty: LinkStackTy,
1674}
1675
1676#[derive(PartialEq, Clone, Debug)]
1677enum LinkStackTy {
1678 Link,
1679 Image,
1680 Disabled,
1681}
1682
1683#[derive(Clone, Debug)]
1685pub struct LinkDef<'a> {
1686 pub dest: CowStr<'a>,
1687 pub title: Option<CowStr<'a>>,
1688 pub span: Range<usize>,
1689}
1690
1691impl<'a> LinkDef<'a> {
1692 pub fn into_static(self) -> LinkDef<'static> {
1693 LinkDef {
1694 dest: self.dest.into_static(),
1695 title: self.title.map(|s| s.into_static()),
1696 span: self.span,
1697 }
1698 }
1699}
1700
1701#[derive(Clone, Debug)]
1703pub struct FootnoteDef {
1704 pub use_count: usize,
1705}
1706
1707struct CodeDelims {
1710 inner: HashMap<usize, VecDeque<TreeIndex>>,
1711 seen_first: bool,
1712}
1713
1714impl CodeDelims {
1715 fn new() -> Self {
1716 Self {
1717 inner: Default::default(),
1718 seen_first: false,
1719 }
1720 }
1721
1722 fn insert(&mut self, count: usize, ix: TreeIndex) {
1723 if self.seen_first {
1724 self.inner.entry(count).or_default().push_back(ix);
1725 } else {
1726 self.seen_first = true;
1729 }
1730 }
1731
1732 fn is_populated(&self) -> bool {
1733 !self.inner.is_empty()
1734 }
1735
1736 fn find(&mut self, open_ix: TreeIndex, count: usize) -> Option<TreeIndex> {
1737 while let Some(ix) = self.inner.get_mut(&count)?.pop_front() {
1738 if ix > open_ix {
1739 return Some(ix);
1740 }
1741 }
1742 None
1743 }
1744
1745 fn clear(&mut self) {
1746 self.inner.clear();
1747 self.seen_first = false;
1748 }
1749}
1750
1751struct MathDelims {
1754 inner: HashMap<u8, VecDeque<(TreeIndex, bool, bool)>>,
1755}
1756
1757impl MathDelims {
1758 fn new() -> Self {
1759 Self {
1760 inner: Default::default(),
1761 }
1762 }
1763
1764 fn insert(
1765 &mut self,
1766 delim_is_display: bool,
1767 brace_context: u8,
1768 ix: TreeIndex,
1769 can_close: bool,
1770 ) {
1771 self.inner
1772 .entry(brace_context)
1773 .or_default()
1774 .push_back((ix, can_close, delim_is_display));
1775 }
1776
1777 fn is_populated(&self) -> bool {
1778 !self.inner.is_empty()
1779 }
1780
1781 fn find(
1782 &mut self,
1783 tree: &Tree<Item>,
1784 open_ix: TreeIndex,
1785 is_display: bool,
1786 brace_context: u8,
1787 ) -> Option<TreeIndex> {
1788 while let Some((ix, can_close, delim_is_display)) =
1789 self.inner.get_mut(&brace_context)?.pop_front()
1790 {
1791 if ix <= open_ix || (is_display && tree[open_ix].next == Some(ix)) {
1792 continue;
1793 }
1794 let can_close = can_close && tree[open_ix].item.end != tree[ix].item.start;
1795 if (!is_display && can_close) || (is_display && delim_is_display) {
1796 return Some(ix);
1797 }
1798 self.inner
1801 .get_mut(&brace_context)?
1802 .push_front((ix, can_close, delim_is_display));
1803 break;
1804 }
1805 None
1806 }
1807
1808 fn clear(&mut self) {
1809 self.inner.clear();
1810 }
1811}
1812
1813#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1814pub(crate) struct LinkIndex(usize);
1815
1816#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1817pub(crate) struct CowIndex(usize);
1818
1819#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1820pub(crate) struct AlignmentIndex(usize);
1821
1822#[derive(Copy, Clone, PartialEq, Eq, Debug)]
1823pub(crate) struct HeadingIndex(NonZeroUsize);
1824
1825#[derive(Clone)]
1826pub(crate) struct Allocations<'a> {
1827 pub refdefs: RefDefs<'a>,
1828 pub footdefs: FootnoteDefs<'a>,
1829 links: Vec<(LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>)>,
1830 cows: Vec<CowStr<'a>>,
1831 alignments: Vec<Vec<Alignment>>,
1832 headings: Vec<HeadingAttributes<'a>>,
1833}
1834
1835#[derive(Clone)]
1837pub(crate) struct HeadingAttributes<'a> {
1838 pub id: Option<CowStr<'a>>,
1839 pub classes: Vec<CowStr<'a>>,
1840 pub attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
1841}
1842
1843#[derive(Clone, Default, Debug)]
1845pub struct RefDefs<'input>(pub(crate) HashMap<LinkLabel<'input>, LinkDef<'input>>);
1846
1847#[derive(Clone, Default, Debug)]
1849pub struct FootnoteDefs<'input>(pub(crate) HashMap<FootnoteLabel<'input>, FootnoteDef>);
1850
1851impl<'input, 'b, 's> RefDefs<'input>
1852where
1853 's: 'b,
1854{
1855 pub fn get(&'s self, key: &'b str) -> Option<&'b LinkDef<'input>> {
1857 self.0.get(&UniCase::new(key.into()))
1858 }
1859
1860 pub fn iter(&'s self) -> impl Iterator<Item = (&'s str, &'s LinkDef<'input>)> {
1862 self.0.iter().map(|(k, v)| (k.as_ref(), v))
1863 }
1864}
1865
1866impl<'input, 'b, 's> FootnoteDefs<'input>
1867where
1868 's: 'b,
1869{
1870 pub fn contains(&'s self, key: &'b str) -> bool {
1872 self.0.contains_key(&UniCase::new(key.into()))
1873 }
1874 pub fn get_mut(&'s mut self, key: CowStr<'input>) -> Option<&'s mut FootnoteDef> {
1876 self.0.get_mut(&UniCase::new(key))
1877 }
1878}
1879
1880impl<'a> Allocations<'a> {
1881 pub fn new() -> Self {
1882 Self {
1883 refdefs: RefDefs::default(),
1884 footdefs: FootnoteDefs::default(),
1885 links: Vec::with_capacity(128),
1886 cows: Vec::new(),
1887 alignments: Vec::new(),
1888 headings: Vec::new(),
1889 }
1890 }
1891
1892 pub fn allocate_cow(&mut self, cow: CowStr<'a>) -> CowIndex {
1893 let ix = self.cows.len();
1894 self.cows.push(cow);
1895 CowIndex(ix)
1896 }
1897
1898 pub fn allocate_link(
1899 &mut self,
1900 ty: LinkType,
1901 url: CowStr<'a>,
1902 title: CowStr<'a>,
1903 id: CowStr<'a>,
1904 ) -> LinkIndex {
1905 let ix = self.links.len();
1906 self.links.push((ty, url, title, id));
1907 LinkIndex(ix)
1908 }
1909
1910 pub fn allocate_alignment(&mut self, alignment: Vec<Alignment>) -> AlignmentIndex {
1911 let ix = self.alignments.len();
1912 self.alignments.push(alignment);
1913 AlignmentIndex(ix)
1914 }
1915
1916 pub fn allocate_heading(&mut self, attrs: HeadingAttributes<'a>) -> HeadingIndex {
1917 let ix = self.headings.len();
1918 self.headings.push(attrs);
1919 let ix_nonzero = NonZeroUsize::new(ix.wrapping_add(1)).expect("too many headings");
1922 HeadingIndex(ix_nonzero)
1923 }
1924
1925 pub fn take_cow(&mut self, ix: CowIndex) -> CowStr<'a> {
1926 std::mem::replace(&mut self.cows[ix.0], "".into())
1927 }
1928
1929 pub fn take_link(&mut self, ix: LinkIndex) -> (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>) {
1930 let default_link = (LinkType::ShortcutUnknown, "".into(), "".into(), "".into());
1931 std::mem::replace(&mut self.links[ix.0], default_link)
1932 }
1933
1934 pub fn take_alignment(&mut self, ix: AlignmentIndex) -> Vec<Alignment> {
1935 std::mem::take(&mut self.alignments[ix.0])
1936 }
1937}
1938
1939impl<'a> Index<CowIndex> for Allocations<'a> {
1940 type Output = CowStr<'a>;
1941
1942 fn index(&self, ix: CowIndex) -> &Self::Output {
1943 self.cows.index(ix.0)
1944 }
1945}
1946
1947impl<'a> Index<LinkIndex> for Allocations<'a> {
1948 type Output = (LinkType, CowStr<'a>, CowStr<'a>, CowStr<'a>);
1949
1950 fn index(&self, ix: LinkIndex) -> &Self::Output {
1951 self.links.index(ix.0)
1952 }
1953}
1954
1955impl<'a> Index<AlignmentIndex> for Allocations<'a> {
1956 type Output = Vec<Alignment>;
1957
1958 fn index(&self, ix: AlignmentIndex) -> &Self::Output {
1959 self.alignments.index(ix.0)
1960 }
1961}
1962
1963impl<'a> Index<HeadingIndex> for Allocations<'a> {
1964 type Output = HeadingAttributes<'a>;
1965
1966 fn index(&self, ix: HeadingIndex) -> &Self::Output {
1967 self.headings.index(ix.0.get() - 1)
1968 }
1969}
1970
1971#[derive(Clone, Default)]
1977pub(crate) struct HtmlScanGuard {
1978 pub cdata: usize,
1979 pub processing: usize,
1980 pub declaration: usize,
1981 pub comment: usize,
1982}
1983
1984pub trait BrokenLinkCallback<'input> {
1989 fn handle_broken_link(
1990 &mut self,
1991 link: BrokenLink<'input>,
1992 ) -> Option<(CowStr<'input>, CowStr<'input>)>;
1993}
1994
1995impl<'input, T> BrokenLinkCallback<'input> for T
1996where
1997 T: FnMut(BrokenLink<'input>) -> Option<(CowStr<'input>, CowStr<'input>)>,
1998{
1999 fn handle_broken_link(
2000 &mut self,
2001 link: BrokenLink<'input>,
2002 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2003 self(link)
2004 }
2005}
2006
2007impl<'input> BrokenLinkCallback<'input> for Box<dyn BrokenLinkCallback<'input>> {
2008 fn handle_broken_link(
2009 &mut self,
2010 link: BrokenLink<'input>,
2011 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2012 (**self).handle_broken_link(link)
2013 }
2014}
2015
2016#[derive(Debug)]
2018pub struct DefaultBrokenLinkCallback;
2019
2020impl<'input> BrokenLinkCallback<'input> for DefaultBrokenLinkCallback {
2021 fn handle_broken_link(
2022 &mut self,
2023 _link: BrokenLink<'input>,
2024 ) -> Option<(CowStr<'input>, CowStr<'input>)> {
2025 None
2026 }
2027}
2028
2029#[derive(Debug)]
2037pub struct OffsetIter<'a, F = DefaultBrokenLinkCallback> {
2038 inner: Parser<'a, F>,
2039}
2040
2041impl<'a, F: BrokenLinkCallback<'a>> OffsetIter<'a, F> {
2042 pub fn reference_definitions(&self) -> &RefDefs<'_> {
2044 self.inner.reference_definitions()
2045 }
2046}
2047
2048impl<'a, F: BrokenLinkCallback<'a>> Iterator for OffsetIter<'a, F> {
2049 type Item = (Event<'a>, Range<usize>);
2050
2051 fn next(&mut self) -> Option<Self::Item> {
2052 match self.inner.tree.cur() {
2053 None => {
2054 let ix = self.inner.tree.pop()?;
2055 let tag_end = body_to_tag_end(&self.inner.tree[ix].item.body);
2056 self.inner.tree.next_sibling(ix);
2057 let span = self.inner.tree[ix].item.start..self.inner.tree[ix].item.end;
2058 debug_assert!(span.start <= span.end);
2059 Some((Event::End(tag_end), span))
2060 }
2061 Some(cur_ix) => {
2062 if self.inner.tree[cur_ix].item.body.is_maybe_inline() {
2063 self.inner.handle_inline();
2064 }
2065
2066 let node = self.inner.tree[cur_ix];
2067 let item = node.item;
2068 let event = item_to_event(item, self.inner.text, &mut self.inner.allocs);
2069 if let Event::Start(..) = event {
2070 self.inner.tree.push();
2071 } else {
2072 self.inner.tree.next_sibling(cur_ix);
2073 }
2074 debug_assert!(item.start <= item.end);
2075 Some((event, item.start..item.end))
2076 }
2077 }
2078 }
2079}
2080
2081fn body_to_tag_end(body: &ItemBody) -> TagEnd {
2082 match *body {
2083 ItemBody::Paragraph => TagEnd::Paragraph,
2084 ItemBody::Emphasis => TagEnd::Emphasis,
2085 ItemBody::Strong => TagEnd::Strong,
2086 ItemBody::Strikethrough => TagEnd::Strikethrough,
2087 ItemBody::Link(..) => TagEnd::Link,
2088 ItemBody::Image(..) => TagEnd::Image,
2089 ItemBody::Heading(level, _) => TagEnd::Heading(level),
2090 ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
2091 ItemBody::BlockQuote(kind) => TagEnd::BlockQuote(kind),
2092 ItemBody::HtmlBlock => TagEnd::HtmlBlock,
2093 ItemBody::List(_, c, _) => {
2094 let is_ordered = c == b'.' || c == b')';
2095 TagEnd::List(is_ordered)
2096 }
2097 ItemBody::ListItem(_) => TagEnd::Item,
2098 ItemBody::TableHead => TagEnd::TableHead,
2099 ItemBody::TableCell => TagEnd::TableCell,
2100 ItemBody::TableRow => TagEnd::TableRow,
2101 ItemBody::Table(..) => TagEnd::Table,
2102 ItemBody::FootnoteDefinition(..) => TagEnd::FootnoteDefinition,
2103 ItemBody::MetadataBlock(kind) => TagEnd::MetadataBlock(kind),
2104 ItemBody::DefinitionList(_) => TagEnd::DefinitionList,
2105 ItemBody::DefinitionListTitle => TagEnd::DefinitionListTitle,
2106 ItemBody::DefinitionListDefinition(_) => TagEnd::DefinitionListDefinition,
2107 _ => panic!("unexpected item body {:?}", body),
2108 }
2109}
2110
2111fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) -> Event<'a> {
2112 let tag = match item.body {
2113 ItemBody::Text { .. } => return Event::Text(text[item.start..item.end].into()),
2114 ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
2115 ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
2116 ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
2117 ItemBody::HtmlBlock => Tag::HtmlBlock,
2118 ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
2119 ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
2120 ItemBody::OwnedInlineHtml(cow_ix) => return Event::InlineHtml(allocs.take_cow(cow_ix)),
2121 ItemBody::SoftBreak => return Event::SoftBreak,
2122 ItemBody::HardBreak(_) => return Event::HardBreak,
2123 ItemBody::FootnoteReference(cow_ix) => {
2124 return Event::FootnoteReference(allocs.take_cow(cow_ix))
2125 }
2126 ItemBody::TaskListMarker(checked) => return Event::TaskListMarker(checked),
2127 ItemBody::Rule => return Event::Rule,
2128 ItemBody::Paragraph => Tag::Paragraph,
2129 ItemBody::Emphasis => Tag::Emphasis,
2130 ItemBody::Strong => Tag::Strong,
2131 ItemBody::Strikethrough => Tag::Strikethrough,
2132 ItemBody::Link(link_ix) => {
2133 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2134 Tag::Link {
2135 link_type,
2136 dest_url,
2137 title,
2138 id,
2139 }
2140 }
2141 ItemBody::Image(link_ix) => {
2142 let (link_type, dest_url, title, id) = allocs.take_link(link_ix);
2143 Tag::Image {
2144 link_type,
2145 dest_url,
2146 title,
2147 id,
2148 }
2149 }
2150 ItemBody::Heading(level, Some(heading_ix)) => {
2151 let HeadingAttributes { id, classes, attrs } = allocs.index(heading_ix);
2152 Tag::Heading {
2153 level,
2154 id: id.clone(),
2155 classes: classes.clone(),
2156 attrs: attrs.clone(),
2157 }
2158 }
2159 ItemBody::Heading(level, None) => Tag::Heading {
2160 level,
2161 id: None,
2162 classes: Vec::new(),
2163 attrs: Vec::new(),
2164 },
2165 ItemBody::FencedCodeBlock(cow_ix) => {
2166 Tag::CodeBlock(CodeBlockKind::Fenced(allocs.take_cow(cow_ix)))
2167 }
2168 ItemBody::IndentCodeBlock => Tag::CodeBlock(CodeBlockKind::Indented),
2169 ItemBody::BlockQuote(kind) => Tag::BlockQuote(kind),
2170 ItemBody::List(_, c, listitem_start) => {
2171 if c == b'.' || c == b')' {
2172 Tag::List(Some(listitem_start))
2173 } else {
2174 Tag::List(None)
2175 }
2176 }
2177 ItemBody::ListItem(_) => Tag::Item,
2178 ItemBody::TableHead => Tag::TableHead,
2179 ItemBody::TableCell => Tag::TableCell,
2180 ItemBody::TableRow => Tag::TableRow,
2181 ItemBody::Table(alignment_ix) => Tag::Table(allocs.take_alignment(alignment_ix)),
2182 ItemBody::FootnoteDefinition(cow_ix) => Tag::FootnoteDefinition(allocs.take_cow(cow_ix)),
2183 ItemBody::MetadataBlock(kind) => Tag::MetadataBlock(kind),
2184 ItemBody::Math(cow_ix, is_display) => {
2185 return if is_display {
2186 Event::DisplayMath(allocs.take_cow(cow_ix))
2187 } else {
2188 Event::InlineMath(allocs.take_cow(cow_ix))
2189 }
2190 }
2191 ItemBody::DefinitionList(_) => Tag::DefinitionList,
2192 ItemBody::DefinitionListTitle => Tag::DefinitionListTitle,
2193 ItemBody::DefinitionListDefinition(_) => Tag::DefinitionListDefinition,
2194 _ => panic!("unexpected item body {:?}", item.body),
2195 };
2196
2197 Event::Start(tag)
2198}
2199
2200impl<'a, F: BrokenLinkCallback<'a>> Iterator for Parser<'a, F> {
2201 type Item = Event<'a>;
2202
2203 fn next(&mut self) -> Option<Event<'a>> {
2204 match self.tree.cur() {
2205 None => {
2206 let ix = self.tree.pop()?;
2207 let tag_end = body_to_tag_end(&self.tree[ix].item.body);
2208 self.tree.next_sibling(ix);
2209 Some(Event::End(tag_end))
2210 }
2211 Some(cur_ix) => {
2212 if self.tree[cur_ix].item.body.is_maybe_inline() {
2213 self.handle_inline();
2214 }
2215
2216 let node = self.tree[cur_ix];
2217 let item = node.item;
2218 let event = item_to_event(item, self.text, &mut self.allocs);
2219 if let Event::Start(ref _tag) = event {
2220 self.tree.push();
2221 } else {
2222 self.tree.next_sibling(cur_ix);
2223 }
2224 Some(event)
2225 }
2226 }
2227 }
2228}
2229
2230impl<'a, F: BrokenLinkCallback<'a>> FusedIterator for Parser<'a, F> {}
2231
2232#[cfg(test)]
2233mod test {
2234 use super::*;
2235 use crate::tree::Node;
2236
2237 fn parser_with_extensions(text: &str) -> Parser<'_> {
2240 let mut opts = Options::empty();
2241 opts.insert(Options::ENABLE_TABLES);
2242 opts.insert(Options::ENABLE_FOOTNOTES);
2243 opts.insert(Options::ENABLE_STRIKETHROUGH);
2244 opts.insert(Options::ENABLE_TASKLISTS);
2245
2246 Parser::new_ext(text, opts)
2247 }
2248
2249 #[test]
2250 #[cfg(target_pointer_width = "64")]
2251 fn node_size() {
2252 let node_size = std::mem::size_of::<Node<Item>>();
2253 assert_eq!(48, node_size);
2254 }
2255
2256 #[test]
2257 #[cfg(target_pointer_width = "64")]
2258 fn body_size() {
2259 let body_size = std::mem::size_of::<ItemBody>();
2260 assert_eq!(16, body_size);
2261 }
2262
2263 #[test]
2264 fn single_open_fish_bracket() {
2265 assert_eq!(3, Parser::new("<").count());
2267 }
2268
2269 #[test]
2270 fn lone_hashtag() {
2271 assert_eq!(2, Parser::new("#").count());
2273 }
2274
2275 #[test]
2276 fn lots_of_backslashes() {
2277 Parser::new("\\\\\r\r").count();
2279 Parser::new("\\\r\r\\.\\\\\r\r\\.\\").count();
2280 }
2281
2282 #[test]
2283 fn issue_320() {
2284 parser_with_extensions(":\r\t> |\r:\r\t> |\r").count();
2286 }
2287
2288 #[test]
2289 fn issue_319() {
2290 parser_with_extensions("|\r-]([^|\r-]([^").count();
2292 parser_with_extensions("|\r\r=][^|\r\r=][^car").count();
2293 }
2294
2295 #[test]
2296 fn issue_303() {
2297 parser_with_extensions("[^\r\ra]").count();
2299 parser_with_extensions("\r\r]Z[^\x00\r\r]Z[^\x00").count();
2300 }
2301
2302 #[test]
2303 fn issue_313() {
2304 parser_with_extensions("*]0[^\r\r*]0[^").count();
2306 parser_with_extensions("[^\r> `][^\r> `][^\r> `][").count();
2307 }
2308
2309 #[test]
2310 fn issue_311() {
2311 parser_with_extensions("\\\u{0d}-\u{09}\\\u{0d}-\u{09}").count();
2313 }
2314
2315 #[test]
2316 fn issue_283() {
2317 let input = std::str::from_utf8(b"\xf0\x9b\xb2\x9f<td:^\xf0\x9b\xb2\x9f").unwrap();
2318 parser_with_extensions(input).count();
2320 }
2321
2322 #[test]
2323 fn issue_289() {
2324 parser_with_extensions("> - \\\n> - ").count();
2326 parser_with_extensions("- \n\n").count();
2327 }
2328
2329 #[test]
2330 fn issue_306() {
2331 parser_with_extensions("*\r_<__*\r_<__*\r_<__*\r_<__").count();
2333 }
2334
2335 #[test]
2336 fn issue_305() {
2337 parser_with_extensions("_6**6*_*").count();
2339 }
2340
2341 #[test]
2342 fn another_emphasis_panic() {
2343 parser_with_extensions("*__#_#__*").count();
2344 }
2345
2346 #[test]
2347 fn offset_iter() {
2348 let event_offsets: Vec<_> = Parser::new("*hello* world")
2349 .into_offset_iter()
2350 .map(|(_ev, range)| range)
2351 .collect();
2352 let expected_offsets = vec![(0..13), (0..7), (1..6), (0..7), (7..13), (0..13)];
2353 assert_eq!(expected_offsets, event_offsets);
2354 }
2355
2356 #[test]
2357 fn reference_link_offsets() {
2358 let range =
2359 Parser::new("# H1\n[testing][Some reference]\n\n[Some reference]: https://github.com")
2360 .into_offset_iter()
2361 .filter_map(|(ev, range)| match ev {
2362 Event::Start(
2363 Tag::Link {
2364 link_type: LinkType::Reference,
2365 ..
2366 },
2367 ..,
2368 ) => Some(range),
2369 _ => None,
2370 })
2371 .next()
2372 .unwrap();
2373 assert_eq!(5..30, range);
2374 }
2375
2376 #[test]
2377 fn footnote_offsets() {
2378 let range = parser_with_extensions("Testing this[^1] out.\n\n[^1]: Footnote.")
2379 .into_offset_iter()
2380 .filter_map(|(ev, range)| match ev {
2381 Event::FootnoteReference(..) => Some(range),
2382 _ => None,
2383 })
2384 .next()
2385 .unwrap();
2386 assert_eq!(12..16, range);
2387 }
2388
2389 #[test]
2390 fn footnote_offsets_exclamation() {
2391 let mut immediately_before_footnote = None;
2392 let range = parser_with_extensions("Testing this![^1] out.\n\n[^1]: Footnote.")
2393 .into_offset_iter()
2394 .filter_map(|(ev, range)| match ev {
2395 Event::FootnoteReference(..) => Some(range),
2396 _ => {
2397 immediately_before_footnote = Some((ev, range));
2398 None
2399 }
2400 })
2401 .next()
2402 .unwrap();
2403 assert_eq!(13..17, range);
2404 if let (Event::Text(exclamation), range_exclamation) =
2405 immediately_before_footnote.as_ref().unwrap()
2406 {
2407 assert_eq!("!", &exclamation[..]);
2408 assert_eq!(&(12..13), range_exclamation);
2409 } else {
2410 panic!("what came first, then? {immediately_before_footnote:?}");
2411 }
2412 }
2413
2414 #[test]
2415 fn table_offset() {
2416 let markdown = "a\n\nTesting|This|Outtt\n--|:--:|--:\nSome Data|Other data|asdf";
2417 let event_offset = parser_with_extensions(markdown)
2418 .into_offset_iter()
2419 .map(|(_ev, range)| range)
2420 .nth(3)
2421 .unwrap();
2422 let expected_offset = 3..59;
2423 assert_eq!(expected_offset, event_offset);
2424 }
2425
2426 #[test]
2427 fn table_cell_span() {
2428 let markdown = "a|b|c\n--|--|--\na| |c";
2429 let event_offset = parser_with_extensions(markdown)
2430 .into_offset_iter()
2431 .filter_map(|(ev, span)| match ev {
2432 Event::Start(Tag::TableCell) => Some(span),
2433 _ => None,
2434 })
2435 .nth(4)
2436 .unwrap();
2437 let expected_offset_start = "a|b|c\n--|--|--\na|".len();
2438 assert_eq!(
2439 expected_offset_start..(expected_offset_start + 2),
2440 event_offset
2441 );
2442 }
2443
2444 #[test]
2445 fn offset_iter_issue_378() {
2446 let event_offsets: Vec<_> = Parser::new("a [b](c) d")
2447 .into_offset_iter()
2448 .map(|(_ev, range)| range)
2449 .collect();
2450 let expected_offsets = vec![(0..10), (0..2), (2..8), (3..4), (2..8), (8..10), (0..10)];
2451 assert_eq!(expected_offsets, event_offsets);
2452 }
2453
2454 #[test]
2455 fn offset_iter_issue_404() {
2456 let event_offsets: Vec<_> = Parser::new("###\n")
2457 .into_offset_iter()
2458 .map(|(_ev, range)| range)
2459 .collect();
2460 let expected_offsets = vec![(0..4), (0..4)];
2461 assert_eq!(expected_offsets, event_offsets);
2462 }
2463
2464 #[cfg(feature = "html")]
2466 #[test]
2467 fn link_def_at_eof() {
2468 let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
2469 let expected = "<p><a href=\"https://vincentprouillet.com\">My site</a></p>\n";
2470
2471 let mut buf = String::new();
2472 crate::html::push_html(&mut buf, Parser::new(test_str));
2473 assert_eq!(expected, buf);
2474 }
2475
2476 #[cfg(feature = "html")]
2477 #[test]
2478 fn no_footnote_refs_without_option() {
2479 let test_str = "a [^a]\n\n[^a]: yolo";
2480 let expected = "<p>a <a href=\"yolo\">^a</a></p>\n";
2481
2482 let mut buf = String::new();
2483 crate::html::push_html(&mut buf, Parser::new(test_str));
2484 assert_eq!(expected, buf);
2485 }
2486
2487 #[cfg(feature = "html")]
2488 #[test]
2489 fn ref_def_at_eof() {
2490 let test_str = "[test]:\\";
2491 let expected = "";
2492
2493 let mut buf = String::new();
2494 crate::html::push_html(&mut buf, Parser::new(test_str));
2495 assert_eq!(expected, buf);
2496 }
2497
2498 #[cfg(feature = "html")]
2499 #[test]
2500 fn ref_def_cr_lf() {
2501 let test_str = "[a]: /u\r\n\n[a]";
2502 let expected = "<p><a href=\"/u\">a</a></p>\n";
2503
2504 let mut buf = String::new();
2505 crate::html::push_html(&mut buf, Parser::new(test_str));
2506 assert_eq!(expected, buf);
2507 }
2508
2509 #[cfg(feature = "html")]
2510 #[test]
2511 fn no_dest_refdef() {
2512 let test_str = "[a]:";
2513 let expected = "<p>[a]:</p>\n";
2514
2515 let mut buf = String::new();
2516 crate::html::push_html(&mut buf, Parser::new(test_str));
2517 assert_eq!(expected, buf);
2518 }
2519
2520 #[test]
2521 fn broken_links_called_only_once() {
2522 for &(markdown, expected) in &[
2523 ("See also [`g()`][crate::g].", 1),
2524 ("See also [`g()`][crate::g][].", 1),
2525 ("[brokenlink1] some other node [brokenlink2]", 2),
2526 ] {
2527 let mut times_called = 0;
2528 let callback = &mut |_broken_link: BrokenLink| {
2529 times_called += 1;
2530 None
2531 };
2532 let parser =
2533 Parser::new_with_broken_link_callback(markdown, Options::empty(), Some(callback));
2534 for _ in parser {}
2535 assert_eq!(times_called, expected);
2536 }
2537 }
2538
2539 #[test]
2540 fn simple_broken_link_callback() {
2541 let test_str = "This is a link w/o def: [hello][world]";
2542 let mut callback = |broken_link: BrokenLink| {
2543 assert_eq!("world", broken_link.reference.as_ref());
2544 assert_eq!(&test_str[broken_link.span], "[hello][world]");
2545 let url = "YOLO".into();
2546 let title = "SWAG".to_owned().into();
2547 Some((url, title))
2548 };
2549 let parser =
2550 Parser::new_with_broken_link_callback(test_str, Options::empty(), Some(&mut callback));
2551 let mut link_tag_count = 0;
2552 for (typ, url, title, id) in parser.filter_map(|event| match event {
2553 Event::Start(tag) => match tag {
2554 Tag::Link {
2555 link_type,
2556 dest_url,
2557 title,
2558 id,
2559 } => Some((link_type, dest_url, title, id)),
2560 _ => None,
2561 },
2562 _ => None,
2563 }) {
2564 link_tag_count += 1;
2565 assert_eq!(typ, LinkType::ReferenceUnknown);
2566 assert_eq!(url.as_ref(), "YOLO");
2567 assert_eq!(title.as_ref(), "SWAG");
2568 assert_eq!(id.as_ref(), "world");
2569 }
2570 assert!(link_tag_count > 0);
2571 }
2572
2573 #[test]
2574 fn code_block_kind_check_fenced() {
2575 let parser = Parser::new("hello\n```test\ntadam\n```");
2576 let mut found = 0;
2577 for (ev, _range) in parser.into_offset_iter() {
2578 match ev {
2579 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(syntax))) => {
2580 assert_eq!(syntax.as_ref(), "test");
2581 found += 1;
2582 }
2583 _ => {}
2584 }
2585 }
2586 assert_eq!(found, 1);
2587 }
2588
2589 #[test]
2590 fn code_block_kind_check_indented() {
2591 let parser = Parser::new("hello\n\n ```test\n tadam\nhello");
2592 let mut found = 0;
2593 for (ev, _range) in parser.into_offset_iter() {
2594 match ev {
2595 Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
2596 found += 1;
2597 }
2598 _ => {}
2599 }
2600 }
2601 assert_eq!(found, 1);
2602 }
2603
2604 #[test]
2605 fn ref_defs() {
2606 let input = r###"[a B c]: http://example.com
2607[another]: https://google.com
2608
2609text
2610
2611[final ONE]: http://wikipedia.org
2612"###;
2613 let mut parser = Parser::new(input);
2614
2615 assert!(parser.reference_definitions().get("a b c").is_some());
2616 assert!(parser.reference_definitions().get("nope").is_none());
2617
2618 if let Some(_event) = parser.next() {
2619 let s = "final one".to_owned();
2621 let link_def = parser.reference_definitions().get(&s).unwrap();
2622 let span = &input[link_def.span.clone()];
2623 assert_eq!(span, "[final ONE]: http://wikipedia.org");
2624 }
2625 }
2626
2627 #[test]
2628 fn common_lifetime_patterns_allowed<'b>() {
2629 let temporary_str = String::from("xyz");
2630
2631 let mut closure = |link: BrokenLink<'b>| Some(("#".into(), link.reference));
2635
2636 fn function(link: BrokenLink<'_>) -> Option<(CowStr<'_>, CowStr<'_>)> {
2637 Some(("#".into(), link.reference))
2638 }
2639
2640 for _ in Parser::new_with_broken_link_callback(
2641 "static lifetime",
2642 Options::empty(),
2643 Some(&mut closure),
2644 ) {}
2645 for _ in Parser::new_with_broken_link_callback(
2654 "static lifetime",
2655 Options::empty(),
2656 Some(&mut function),
2657 ) {}
2658 for _ in Parser::new_with_broken_link_callback(
2659 &temporary_str,
2660 Options::empty(),
2661 Some(&mut function),
2662 ) {}
2663 }
2664
2665 #[test]
2666 fn inline_html_inside_blockquote() {
2667 let input = "> <foo\n> bar>";
2669 let events: Vec<_> = Parser::new(input).collect();
2670 let expected = [
2671 Event::Start(Tag::BlockQuote(None)),
2672 Event::Start(Tag::Paragraph),
2673 Event::InlineHtml(CowStr::Boxed("<foo\nbar>".to_string().into())),
2674 Event::End(TagEnd::Paragraph),
2675 Event::End(TagEnd::BlockQuote(None)),
2676 ];
2677 assert_eq!(&events, &expected);
2678 }
2679}