1use std::cmp::max;
5use std::ops::Range;
6
7use crate::parse::{
8 scan_containers, Allocations, FootnoteDef, HeadingAttributes, Item, ItemBody, LinkDef,
9 LINK_MAX_NESTED_PARENS,
10};
11use crate::strings::CowStr;
12use crate::tree::{Tree, TreeIndex};
13use crate::Options;
14use crate::{
15 linklabel::{scan_link_label_rest, LinkLabel},
16 HeadingLevel,
17};
18use crate::{scanners::*, MetadataBlockKind};
19
20use unicase::UniCase;
21
22pub(crate) fn run_first_pass(text: &str, options: Options) -> (Tree<Item>, Allocations<'_>) {
25 let start_capacity = max(128, text.len() / 32);
28 let lookup_table = &create_lut(&options);
29 let first_pass = FirstPass {
30 text,
31 tree: Tree::with_capacity(start_capacity),
32 begin_list_item: None,
33 last_line_blank: false,
34 allocs: Allocations::new(),
35 options,
36 lookup_table,
37 next_paragraph_task: None,
38 brace_context_next: 0,
39 brace_context_stack: Vec::new(),
40 };
41 first_pass.run()
42}
43
44const MATH_BRACE_CONTEXT_MAX_NESTING: usize = 25;
54
55struct FirstPass<'a, 'b> {
57 text: &'a str,
58 tree: Tree<Item>,
59 begin_list_item: Option<usize>,
60 last_line_blank: bool,
61 allocs: Allocations<'a>,
62 options: Options,
63 lookup_table: &'b LookupTable,
64 next_paragraph_task: Option<Item>,
67 brace_context_stack: Vec<u8>,
69 brace_context_next: usize,
70}
71
72impl<'a, 'b> FirstPass<'a, 'b> {
73 fn run(mut self) -> (Tree<Item>, Allocations<'a>) {
74 let mut ix = 0;
75 while ix < self.text.len() {
76 ix = self.parse_block(ix);
77 }
78 while self.tree.spine_len() > 0 {
79 self.pop(ix);
80 }
81 (self.tree, self.allocs)
82 }
83
84 fn parse_block(&mut self, mut start_ix: usize) -> usize {
86 let bytes = self.text.as_bytes();
87 let mut line_start = LineStart::new(&bytes[start_ix..]);
88
89 self.brace_context_stack.clear();
91 self.brace_context_next = 0;
92
93 let i = scan_containers(
94 &self.tree,
95 &mut line_start,
96 self.options.has_gfm_footnotes(),
97 );
98 for _ in i..self.tree.spine_len() {
99 self.pop(start_ix);
100 }
101
102 if self.options.contains(Options::ENABLE_OLD_FOOTNOTES) {
103 if let Some(node_ix) = self.tree.peek_up() {
105 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
106 if self.last_line_blank {
107 self.pop(start_ix);
108 }
109 }
110 }
111
112 let container_start = start_ix + line_start.bytes_scanned();
116 if let Some(bytecount) = self.parse_footnote(container_start) {
117 start_ix = container_start + bytecount;
118 start_ix += scan_blank_line(&bytes[start_ix..]).unwrap_or(0);
119 line_start = LineStart::new(&bytes[start_ix..]);
120 }
121 }
122
123 loop {
125 let save = line_start.clone();
126 let outer_indent = line_start.scan_space_upto(4);
127 if outer_indent >= 4 {
128 line_start = save;
129 break;
130 }
131 if self.options.has_gfm_footnotes()
132 || self.options.contains(Options::ENABLE_OLD_FOOTNOTES)
133 {
134 let container_start = start_ix + line_start.bytes_scanned();
138 if let Some(bytecount) = self.parse_footnote(container_start) {
139 start_ix = container_start + bytecount;
140 line_start = LineStart::new(&bytes[start_ix..]);
141 continue;
142 }
143 }
144 let container_start = start_ix + line_start.bytes_scanned();
145 if let Some((ch, index, indent)) = line_start.scan_list_marker_with_indent(outer_indent)
146 {
147 let after_marker_index = start_ix + line_start.bytes_scanned();
148 self.continue_list(container_start - outer_indent, ch, index);
149 self.tree.append(Item {
150 start: container_start - outer_indent,
151 end: after_marker_index, body: ItemBody::ListItem(indent),
153 });
154 self.tree.push();
155 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
156 self.begin_list_item = Some(after_marker_index + n);
157 return after_marker_index + n;
158 }
159 if self.options.contains(Options::ENABLE_TASKLISTS) {
160 let task_list_marker =
161 line_start.scan_task_list_marker().map(|is_checked| Item {
162 start: after_marker_index,
163 end: start_ix + line_start.bytes_scanned(),
164 body: ItemBody::TaskListMarker(is_checked),
165 });
166 if let Some(task_list_marker) = task_list_marker {
167 if let Some(n) = scan_blank_line(&bytes[task_list_marker.end..]) {
168 self.tree.append(task_list_marker);
169 self.begin_list_item = Some(task_list_marker.end + n);
170 return task_list_marker.end + n;
171 } else {
172 self.next_paragraph_task = Some(task_list_marker);
173 }
174 }
175 }
176 } else if let Some((indent, child, item)) = self
177 .options
178 .contains(Options::ENABLE_DEFINITION_LIST)
179 .then(|| {
180 self.tree
181 .cur()
182 .map(|cur| (self.tree[cur].child, &mut self.tree[cur].item))
183 })
184 .flatten()
185 .filter(|(_, item)| {
186 matches!(
187 item,
188 Item {
189 body: ItemBody::Paragraph
190 | ItemBody::MaybeDefinitionListTitle
191 | ItemBody::DefinitionListDefinition(_),
192 ..
193 }
194 )
195 })
196 .and_then(|item| {
197 Some((
198 line_start
199 .scan_definition_list_definition_marker_with_indent(outer_indent)?,
200 item.0,
201 item.1,
202 ))
203 })
204 {
205 match item.body {
206 ItemBody::Paragraph => {
207 item.body = ItemBody::DefinitionList(true);
208 let Item { start, end, .. } = *item;
209 let list_idx = self.tree.cur().unwrap();
210 let title_idx = self.tree.create_node(Item {
211 start,
212 end, body: ItemBody::DefinitionListTitle,
214 });
215 self.tree[title_idx].child = child;
216 self.tree[list_idx].child = Some(title_idx);
217 self.tree.push();
218 }
219 ItemBody::MaybeDefinitionListTitle => {
220 item.body = ItemBody::DefinitionListTitle;
221 }
222 ItemBody::DefinitionListDefinition(_) => {}
223 _ => unreachable!(),
224 }
225 let after_marker_index = start_ix + line_start.bytes_scanned();
226 self.tree.append(Item {
227 start: container_start - outer_indent,
228 end: after_marker_index, body: ItemBody::DefinitionListDefinition(indent),
230 });
231 if let Some(ItemBody::DefinitionList(ref mut is_tight)) =
232 self.tree.peek_up().map(|cur| &mut self.tree[cur].item.body)
233 {
234 if self.last_line_blank {
235 *is_tight = false;
236 self.last_line_blank = false;
237 }
238 }
239 self.tree.push();
240 if let Some(n) = scan_blank_line(&bytes[after_marker_index..]) {
241 self.begin_list_item = Some(after_marker_index + n);
242 return after_marker_index + n;
243 }
244 } else if line_start.scan_blockquote_marker() {
245 let kind = if self.options.contains(Options::ENABLE_GFM) {
246 line_start.scan_blockquote_tag()
247 } else {
248 None
249 };
250 self.finish_list(start_ix);
251 self.tree.append(Item {
252 start: container_start,
253 end: 0, body: ItemBody::BlockQuote(kind),
255 });
256 self.tree.push();
257 if kind.is_some() {
258 let ix = start_ix + line_start.bytes_scanned();
262 let mut lazy_line_start = LineStart::new(&bytes[ix..]);
263 let current_container = scan_containers(
264 &self.tree,
265 &mut lazy_line_start,
266 self.options.has_gfm_footnotes(),
267 ) == self.tree.spine_len();
268 if !lazy_line_start.scan_space(4)
269 && self.scan_paragraph_interrupt(
270 &bytes[ix + lazy_line_start.bytes_scanned()..],
271 current_container,
272 )
273 {
274 return ix;
275 } else {
276 line_start = lazy_line_start;
279 line_start.scan_all_space();
280 start_ix = ix;
281 break;
282 }
283 }
284 } else {
285 line_start = save;
286 break;
287 }
288 }
289
290 let ix = start_ix + line_start.bytes_scanned();
291
292 if let Some(n) = scan_blank_line(&bytes[ix..]) {
293 if let Some(node_ix) = self.tree.peek_up() {
294 match &mut self.tree[node_ix].item.body {
295 ItemBody::BlockQuote(..) => (),
296 ItemBody::ListItem(indent) | ItemBody::DefinitionListDefinition(indent)
297 if self.begin_list_item.is_some() =>
298 {
299 self.last_line_blank = true;
300 *indent = 0;
305 }
306 _ => {
307 self.last_line_blank = true;
308 }
309 }
310 } else {
311 self.last_line_blank = true;
312 }
313 return ix + n;
314 }
315
316 let remaining_space = line_start.remaining_space();
318
319 let indent = line_start.scan_space_upto(4);
320 if indent == 4 {
321 self.finish_list(start_ix);
322 let ix = start_ix + line_start.bytes_scanned();
323 let remaining_space = line_start.remaining_space();
324 return self.parse_indented_code_block(ix, remaining_space);
325 }
326
327 let ix = start_ix + line_start.bytes_scanned();
328
329 if indent == 0 {
331 if let Some((_n, metadata_block_ch)) = scan_metadata_block(
332 &bytes[ix..],
333 self.options
334 .contains(Options::ENABLE_YAML_STYLE_METADATA_BLOCKS),
335 self.options
336 .contains(Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS),
337 ) {
338 self.finish_list(start_ix);
339 return self.parse_metadata_block(ix, metadata_block_ch);
340 }
341 }
342
343 if bytes[ix] == b'<' {
345 if let Some(html_end_tag) = get_html_end_tag(&bytes[(ix + 1)..]) {
348 self.finish_list(start_ix);
349 return self.parse_html_block_type_1_to_5(
350 ix,
351 html_end_tag,
352 remaining_space,
353 indent,
354 );
355 }
356
357 if starts_html_block_type_6(&bytes[(ix + 1)..]) {
359 self.finish_list(start_ix);
360 return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
361 }
362
363 if let Some(_html_bytes) = scan_html_type_7(&bytes[ix..]) {
365 self.finish_list(start_ix);
366 return self.parse_html_block_type_6_or_7(ix, remaining_space, indent);
367 }
368 }
369
370 if let Ok(n) = scan_hrule(&bytes[ix..]) {
371 self.finish_list(start_ix);
372 return self.parse_hrule(n, ix);
373 }
374
375 if let Some(atx_size) = scan_atx_heading(&bytes[ix..]) {
376 self.finish_list(start_ix);
377 return self.parse_atx_heading(ix, atx_size);
378 }
379
380 if let Some((n, fence_ch)) = scan_code_fence(&bytes[ix..]) {
381 self.finish_list(start_ix);
382 return self.parse_fenced_code_block(ix, indent, fence_ch, n);
383 }
384
385 while let Some((bytecount, label, link_def)) =
387 self.parse_refdef_total(start_ix + line_start.bytes_scanned())
388 {
389 self.allocs.refdefs.0.entry(label).or_insert(link_def);
390 let container_start = start_ix + line_start.bytes_scanned();
391 let mut ix = container_start + bytecount;
392 if let Some(nl) = scan_blank_line(&bytes[ix..]) {
408 ix += nl;
409 let mut lazy_line_start = LineStart::new(&bytes[ix..]);
410 let current_container = scan_containers(
411 &self.tree,
412 &mut lazy_line_start,
413 self.options.has_gfm_footnotes(),
414 ) == self.tree.spine_len();
415 if !lazy_line_start.scan_space(4)
416 && self.scan_paragraph_interrupt(
417 &bytes[ix + lazy_line_start.bytes_scanned()..],
418 current_container,
419 )
420 {
421 self.finish_list(start_ix);
422 return ix;
423 } else {
424 line_start = lazy_line_start;
425 line_start.scan_all_space();
426 start_ix = ix;
427 }
428 } else {
429 self.finish_list(start_ix);
430 return ix;
431 }
432 }
433
434 let ix = start_ix + line_start.bytes_scanned();
435
436 self.parse_paragraph(ix)
437 }
438
439 fn parse_table(
443 &mut self,
444 table_cols: usize,
445 head_start: usize,
446 body_start: usize,
447 ) -> Option<usize> {
448 let mut missing_empty_cells = 0;
451 let (_sep_start, thead_ix) =
453 self.parse_table_row_inner(head_start, table_cols, &mut missing_empty_cells)?;
454 self.tree[thead_ix].item.body = ItemBody::TableHead;
455
456 let mut ix = body_start;
458 while let Some((next_ix, _row_ix)) =
459 self.parse_table_row(ix, table_cols, &mut missing_empty_cells)
460 {
461 ix = next_ix;
462 }
463
464 self.pop(ix);
465 Some(ix)
466 }
467
468 fn parse_table_row_inner(
471 &mut self,
472 mut ix: usize,
473 row_cells: usize,
474 missing_empty_cells: &mut usize,
475 ) -> Option<(usize, TreeIndex)> {
476 const MAX_AUTOCOMPLETED_CELLS: usize = 1 << 18; let bytes = self.text.as_bytes();
480 let mut cells = 0;
481 let mut final_cell_ix = None;
482
483 let old_cur = self.tree.cur();
484 let row_ix = self.tree.append(Item {
485 start: ix,
486 end: 0, body: ItemBody::TableRow,
488 });
489 self.tree.push();
490
491 loop {
492 ix += scan_ch(&bytes[ix..], b'|');
493 let start_ix = ix;
494 ix += scan_whitespace_no_nl(&bytes[ix..]);
495
496 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
497 ix += eol_bytes;
498 break;
499 }
500
501 let cell_ix = self.tree.append(Item {
502 start: start_ix,
503 end: ix,
504 body: ItemBody::TableCell,
505 });
506 self.tree.push();
507 let (next_ix, _brk) = self.parse_line(ix, None, TableParseMode::Active);
508
509 self.tree[cell_ix].item.end = next_ix;
510 self.tree.pop();
511
512 ix = next_ix;
513 cells += 1;
514
515 if cells == row_cells {
516 final_cell_ix = Some(cell_ix);
517 }
518 }
519
520 if let (Some(cur), 0) = (old_cur, cells) {
521 self.pop(ix);
522 self.tree[cur].next = None;
523 return None;
524 }
525
526 for _ in cells..row_cells {
530 if *missing_empty_cells >= MAX_AUTOCOMPLETED_CELLS {
531 return None;
532 }
533 *missing_empty_cells += 1;
534 self.tree.append(Item {
535 start: ix,
536 end: ix,
537 body: ItemBody::TableCell,
538 });
539 }
540
541 if let Some(cell_ix) = final_cell_ix {
543 self.tree[cell_ix].next = None;
544 }
545
546 self.pop(ix);
547
548 Some((ix, row_ix))
549 }
550
551 fn parse_table_row(
553 &mut self,
554 mut ix: usize,
555 row_cells: usize,
556 missing_empty_cells: &mut usize,
557 ) -> Option<(usize, TreeIndex)> {
558 let bytes = self.text.as_bytes();
559 let mut line_start = LineStart::new(&bytes[ix..]);
560 let current_container = scan_containers(
561 &self.tree,
562 &mut line_start,
563 self.options.has_gfm_footnotes(),
564 ) == self.tree.spine_len();
565 if !current_container {
566 return None;
567 }
568 line_start.scan_all_space();
569 ix += line_start.bytes_scanned();
570 if scan_paragraph_interrupt_no_table(
571 &bytes[ix..],
572 current_container,
573 self.options.contains(Options::ENABLE_FOOTNOTES),
574 self.options.contains(Options::ENABLE_DEFINITION_LIST),
575 &self.tree,
576 ) {
577 return None;
578 }
579
580 let (ix, row_ix) = self.parse_table_row_inner(ix, row_cells, missing_empty_cells)?;
581 Some((ix, row_ix))
582 }
583
584 fn parse_paragraph(&mut self, start_ix: usize) -> usize {
586 let body = if let Some(ItemBody::DefinitionList(_)) =
587 self.tree.peek_up().map(|idx| self.tree[idx].item.body)
588 {
589 if self.tree.cur().map_or(true, |idx| {
590 matches!(
591 &self.tree[idx].item.body,
592 ItemBody::DefinitionListDefinition(..)
593 )
594 }) {
595 self.last_line_blank = false;
597 ItemBody::MaybeDefinitionListTitle
598 } else {
599 self.finish_list(start_ix);
600 ItemBody::Paragraph
601 }
602 } else {
603 self.finish_list(start_ix);
604 ItemBody::Paragraph
605 };
606 let node_ix = self.tree.append(Item {
607 start: start_ix,
608 end: 0, body,
610 });
611 self.tree.push();
612
613 if let Some(item) = self.next_paragraph_task {
614 self.tree.append(item);
615 self.next_paragraph_task = None;
616 }
617
618 let bytes = self.text.as_bytes();
619 let mut ix = start_ix;
620 loop {
621 let scan_mode = if self.options.contains(Options::ENABLE_TABLES) && ix == start_ix {
622 TableParseMode::Scan
623 } else {
624 TableParseMode::Disabled
625 };
626 let (next_ix, brk) = self.parse_line(ix, None, scan_mode);
627
628 if let Some(Item {
630 body: ItemBody::Table(alignment_ix),
631 ..
632 }) = brk
633 {
634 let table_cols = self.allocs[alignment_ix].len();
635 self.tree[node_ix].item.body = ItemBody::Table(alignment_ix);
636 self.tree[node_ix].child = None;
639 self.tree.pop();
640 if body == ItemBody::MaybeDefinitionListTitle {
641 self.finish_list(ix);
642 }
643 self.tree.push();
644 if let Some(ix) = self.parse_table(table_cols, ix, next_ix) {
645 return ix;
646 }
647 }
648
649 ix = next_ix;
650 let mut line_start = LineStart::new(&bytes[ix..]);
651 let current_container = scan_containers(
652 &self.tree,
653 &mut line_start,
654 self.options.has_gfm_footnotes(),
655 ) == self.tree.spine_len();
656 let trailing_backslash_pos = match brk {
657 Some(Item {
658 start,
659 body: ItemBody::HardBreak(true),
660 ..
661 }) if bytes[start] == b'\\' => Some(start),
662 _ => None,
663 };
664 if !line_start.scan_space(4) {
665 let ix_new = ix + line_start.bytes_scanned();
666 if current_container {
667 if let Some(ix_setext) =
668 self.parse_setext_heading(ix_new, node_ix, trailing_backslash_pos.is_some())
669 {
670 if let Some(pos) = trailing_backslash_pos {
671 self.tree.append_text(pos, pos + 1, false);
672 }
673 self.pop(ix_setext);
674 if body == ItemBody::MaybeDefinitionListTitle {
675 self.finish_list(ix);
676 }
677 return ix_setext;
678 }
679 }
680 let suffix = &bytes[ix_new..];
682 if self.scan_paragraph_interrupt(suffix, current_container) {
683 if let Some(pos) = trailing_backslash_pos {
684 self.tree.append_text(pos, pos + 1, false);
685 }
686 break;
687 }
688 }
689 line_start.scan_all_space();
690 if line_start.is_at_eol() {
691 if let Some(pos) = trailing_backslash_pos {
692 self.tree.append_text(pos, pos + 1, false);
693 }
694 break;
695 }
696 ix = next_ix + line_start.bytes_scanned();
697 if let Some(item) = brk {
698 self.tree.append(item);
699 }
700 }
701
702 self.pop(ix);
703 ix
704 }
705
706 fn parse_setext_heading(
708 &mut self,
709 ix: usize,
710 node_ix: TreeIndex,
711 has_trailing_content: bool,
712 ) -> Option<usize> {
713 let bytes = self.text.as_bytes();
714 let (n, level) = scan_setext_heading(&bytes[ix..])?;
715 let mut attrs = None;
716
717 if let Some(cur_ix) = self.tree.cur() {
718 let parent_ix = self.tree.peek_up().unwrap();
719 let header_start = self.tree[parent_ix].item.start;
720 let header_end = self.tree[cur_ix].item.end;
724
725 let (content_end, attrs_) =
727 self.extract_and_parse_heading_attribute_block(header_start, header_end);
728 attrs = attrs_;
729
730 let new_end = if has_trailing_content {
732 content_end
733 } else {
734 let mut last_line_start = header_start;
735 if attrs.is_some() {
736 loop {
737 let next_line_start =
738 last_line_start + scan_nextline(&bytes[last_line_start..content_end]);
739 if next_line_start >= content_end {
740 break;
741 }
742 let mut line_start = LineStart::new(&bytes[next_line_start..content_end]);
743 if scan_containers(
744 &self.tree,
745 &mut line_start,
746 self.options.has_gfm_footnotes(),
747 ) != self.tree.spine_len()
748 {
749 break;
750 }
751 last_line_start = next_line_start + line_start.bytes_scanned();
752 }
753 }
754 let trailing_ws = scan_rev_while(
755 &bytes[last_line_start..content_end],
756 is_ascii_whitespace_no_nl,
757 );
758 content_end - trailing_ws
759 };
760
761 if attrs.is_some() {
762 self.tree.truncate_siblings(new_end);
764 }
765
766 if let Some(cur_ix) = self.tree.cur() {
767 self.tree[cur_ix].item.end = new_end;
768 }
769 }
770
771 self.tree[node_ix].item.body = ItemBody::Heading(
772 level,
773 attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
774 );
775
776 Some(ix + n)
777 }
778
779 fn parse_line(
783 &mut self,
784 start: usize,
785 end: Option<usize>,
786 mode: TableParseMode,
787 ) -> (usize, Option<Item>) {
788 let bytes = self.text.as_bytes();
789 let bytes = match end {
790 Some(end) => &bytes[..end],
791 None => bytes,
792 };
793 let bytes_len = bytes.len();
794 let mut pipes = 0;
795 let mut last_pipe_ix = start;
796 let mut begin_text = start;
797 let mut backslash_escaped = false;
798
799 let (final_ix, brk) = iterate_special_bytes(self.lookup_table, bytes, start, |ix, byte| {
800 match byte {
801 b'\n' | b'\r' => {
802 if let TableParseMode::Active = mode {
803 return LoopInstruction::BreakAtWith(ix, None);
804 }
805
806 let mut i = ix;
807 let eol_bytes = scan_eol(&bytes[ix..]).unwrap();
808
809 let end_ix = ix + eol_bytes;
810 let trailing_backslashes = scan_rev_while(&bytes[..ix], |b| b == b'\\');
811 if trailing_backslashes % 2 == 1 && end_ix < bytes_len {
812 i -= 1;
813 self.tree.append_text(begin_text, i, backslash_escaped);
814 backslash_escaped = false;
815 return LoopInstruction::BreakAtWith(
816 end_ix,
817 Some(Item {
818 start: i,
819 end: end_ix,
820 body: ItemBody::HardBreak(true),
821 }),
822 );
823 }
824
825 if mode == TableParseMode::Scan && pipes > 0 {
826 let next_line_ix = ix + eol_bytes;
828 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
829 if scan_containers(
830 &self.tree,
831 &mut line_start,
832 self.options.has_gfm_footnotes(),
833 ) == self.tree.spine_len()
834 {
835 let table_head_ix = next_line_ix + line_start.bytes_scanned();
836 let (table_head_bytes, alignment) =
837 scan_table_head(&bytes[table_head_ix..]);
838
839 if table_head_bytes > 0 {
840 let header_count =
842 count_header_cols(bytes, pipes, start, last_pipe_ix);
843
844 if alignment.len() == header_count {
846 let alignment_ix = self.allocs.allocate_alignment(alignment);
847 let end_ix = table_head_ix + table_head_bytes;
848 return LoopInstruction::BreakAtWith(
849 end_ix,
850 Some(Item {
851 start: i,
852 end: end_ix, body: ItemBody::Table(alignment_ix),
854 }),
855 );
856 }
857 }
858 }
859 }
860
861 let trailing_whitespace =
862 scan_rev_while(&bytes[..ix], is_ascii_whitespace_no_nl);
863 if trailing_whitespace >= 2 {
864 i -= trailing_whitespace;
865 self.tree.append_text(begin_text, i, backslash_escaped);
866 backslash_escaped = false;
867 return LoopInstruction::BreakAtWith(
868 end_ix,
869 Some(Item {
870 start: i,
871 end: end_ix,
872 body: ItemBody::HardBreak(false),
873 }),
874 );
875 }
876
877 self.tree
878 .append_text(begin_text, ix - trailing_whitespace, backslash_escaped);
879 backslash_escaped = false;
880
881 LoopInstruction::BreakAtWith(
882 end_ix,
883 Some(Item {
884 start: i,
885 end: end_ix,
886 body: ItemBody::SoftBreak,
887 }),
888 )
889 }
890 b'\\' => {
891 if ix + 1 < bytes_len && is_ascii_punctuation(bytes[ix + 1]) {
892 self.tree.append_text(begin_text, ix, backslash_escaped);
893 if bytes[ix + 1] == b'`' {
894 let count = 1 + scan_ch_repeat(&bytes[(ix + 2)..], b'`');
895 self.tree.append(Item {
896 start: ix + 1,
897 end: ix + count + 1,
898 body: ItemBody::MaybeCode(count, true),
899 });
900 begin_text = ix + 1 + count;
901 backslash_escaped = false;
902 LoopInstruction::ContinueAndSkip(count)
903 } else if bytes[ix + 1] == b'|' && TableParseMode::Active == mode {
904 begin_text = ix + 1;
911 backslash_escaped = false;
912 LoopInstruction::ContinueAndSkip(1)
913 } else if ix + 2 < bytes_len
914 && bytes[ix + 1] == b'\\'
915 && bytes[ix + 2] == b'|'
916 && TableParseMode::Active == mode
917 {
918 begin_text = ix + 2;
920 backslash_escaped = true;
921 LoopInstruction::ContinueAndSkip(2)
922 } else {
923 begin_text = ix + 1;
924 backslash_escaped = true;
925 LoopInstruction::ContinueAndSkip(1)
926 }
927 } else {
928 LoopInstruction::ContinueAndSkip(0)
929 }
930 }
931 c @ b'*' | c @ b'_' | c @ b'~' => {
932 let string_suffix = &self.text[ix..];
933 let count = 1 + scan_ch_repeat(&string_suffix.as_bytes()[1..], c);
934 let can_open = delim_run_can_open(
935 &self.text[start..],
936 string_suffix,
937 count,
938 ix - start,
939 mode,
940 );
941 let can_close = delim_run_can_close(
942 &self.text[start..],
943 string_suffix,
944 count,
945 ix - start,
946 mode,
947 );
948 let is_valid_seq = c != b'~' || count <= 2;
949
950 if (can_open || can_close) && is_valid_seq {
951 self.tree.append_text(begin_text, ix, backslash_escaped);
952 backslash_escaped = false;
953 for i in 0..count {
954 self.tree.append(Item {
955 start: ix + i,
956 end: ix + i + 1,
957 body: ItemBody::MaybeEmphasis(count - i, can_open, can_close),
958 });
959 }
960 begin_text = ix + count;
961 }
962 LoopInstruction::ContinueAndSkip(count - 1)
963 }
964 b'$' => {
965 let string_suffix = &self.text[ix..];
966 let can_open = !string_suffix[1..]
967 .as_bytes()
968 .first()
969 .copied()
970 .map_or(true, is_ascii_whitespace);
971 let can_close = ix > start
972 && !self.text[..ix]
973 .as_bytes()
974 .last()
975 .copied()
976 .map_or(true, is_ascii_whitespace);
977
978 let brace_context =
987 if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
988 self.brace_context_next as u8
989 } else {
990 self.brace_context_stack.last().copied().unwrap_or_else(|| {
991 self.brace_context_stack.push(!0);
992 !0
993 })
994 };
995
996 self.tree.append_text(begin_text, ix, backslash_escaped);
997 self.tree.append(Item {
998 start: ix,
999 end: ix + 1,
1000 body: ItemBody::MaybeMath(can_open, can_close, brace_context),
1001 });
1002 begin_text = ix + 1;
1003 LoopInstruction::ContinueAndSkip(0)
1004 }
1005 b'{' => {
1006 if self.brace_context_stack.len() == MATH_BRACE_CONTEXT_MAX_NESTING {
1007 self.brace_context_stack.push(self.brace_context_next as u8);
1008 self.brace_context_next = MATH_BRACE_CONTEXT_MAX_NESTING;
1009 } else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
1010 self.brace_context_next += 1;
1013 } else if !self.brace_context_stack.is_empty() {
1014 self.brace_context_stack.push(self.brace_context_next as u8);
1016 self.brace_context_next += 1;
1017 }
1018 LoopInstruction::ContinueAndSkip(0)
1019 }
1020 b'}' => {
1021 if let &mut [ref mut top_level_context] = &mut self.brace_context_stack[..] {
1022 *top_level_context = top_level_context.wrapping_sub(1);
1038 } else if self.brace_context_stack.len() > MATH_BRACE_CONTEXT_MAX_NESTING {
1039 if self.brace_context_next <= MATH_BRACE_CONTEXT_MAX_NESTING {
1042 self.brace_context_stack.pop();
1043 } else {
1044 self.brace_context_next -= 1;
1045 }
1046 } else {
1047 self.brace_context_stack.pop();
1048 }
1049 LoopInstruction::ContinueAndSkip(0)
1050 }
1051 b'`' => {
1052 self.tree.append_text(begin_text, ix, backslash_escaped);
1053 backslash_escaped = false;
1054 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'`');
1055 self.tree.append(Item {
1056 start: ix,
1057 end: ix + count,
1058 body: ItemBody::MaybeCode(count, false),
1059 });
1060 begin_text = ix + count;
1061 LoopInstruction::ContinueAndSkip(count - 1)
1062 }
1063 b'<' if bytes.get(ix + 1) != Some(&b'\\') => {
1064 self.tree.append_text(begin_text, ix, backslash_escaped);
1067 backslash_escaped = false;
1068 self.tree.append(Item {
1069 start: ix,
1070 end: ix + 1,
1071 body: ItemBody::MaybeHtml,
1072 });
1073 begin_text = ix + 1;
1074 LoopInstruction::ContinueAndSkip(0)
1075 }
1076 b'!' => {
1077 if ix + 1 < bytes_len && bytes[ix + 1] == b'[' {
1078 self.tree.append_text(begin_text, ix, backslash_escaped);
1079 backslash_escaped = false;
1080 self.tree.append(Item {
1081 start: ix,
1082 end: ix + 2,
1083 body: ItemBody::MaybeImage,
1084 });
1085 begin_text = ix + 2;
1086 LoopInstruction::ContinueAndSkip(1)
1087 } else {
1088 LoopInstruction::ContinueAndSkip(0)
1089 }
1090 }
1091 b'[' => {
1092 self.tree.append_text(begin_text, ix, backslash_escaped);
1093 backslash_escaped = false;
1094 self.tree.append(Item {
1095 start: ix,
1096 end: ix + 1,
1097 body: ItemBody::MaybeLinkOpen,
1098 });
1099 begin_text = ix + 1;
1100 LoopInstruction::ContinueAndSkip(0)
1101 }
1102 b']' => {
1103 self.tree.append_text(begin_text, ix, backslash_escaped);
1104 backslash_escaped = false;
1105 self.tree.append(Item {
1106 start: ix,
1107 end: ix + 1,
1108 body: ItemBody::MaybeLinkClose(true),
1109 });
1110 begin_text = ix + 1;
1111 LoopInstruction::ContinueAndSkip(0)
1112 }
1113 b'&' => match scan_entity(&bytes[ix..]) {
1114 (n, Some(value)) => {
1115 self.tree.append_text(begin_text, ix, backslash_escaped);
1116 backslash_escaped = false;
1117 self.tree.append(Item {
1118 start: ix,
1119 end: ix + n,
1120 body: ItemBody::SynthesizeText(self.allocs.allocate_cow(value)),
1121 });
1122 begin_text = ix + n;
1123 LoopInstruction::ContinueAndSkip(n - 1)
1124 }
1125 _ => LoopInstruction::ContinueAndSkip(0),
1126 },
1127 b'|' => {
1128 if ix != 0 && bytes[ix - 1] == b'\\' {
1129 LoopInstruction::ContinueAndSkip(0)
1130 } else if let TableParseMode::Active = mode {
1131 LoopInstruction::BreakAtWith(ix, None)
1132 } else {
1133 last_pipe_ix = ix;
1134 pipes += 1;
1135 LoopInstruction::ContinueAndSkip(0)
1136 }
1137 }
1138 b'.' => {
1139 if ix + 2 < bytes.len() && bytes[ix + 1] == b'.' && bytes[ix + 2] == b'.' {
1140 self.tree.append_text(begin_text, ix, backslash_escaped);
1141 backslash_escaped = false;
1142 self.tree.append(Item {
1143 start: ix,
1144 end: ix + 3,
1145 body: ItemBody::SynthesizeChar('…'),
1146 });
1147 begin_text = ix + 3;
1148 LoopInstruction::ContinueAndSkip(2)
1149 } else {
1150 LoopInstruction::ContinueAndSkip(0)
1151 }
1152 }
1153 b'-' => {
1154 let count = 1 + scan_ch_repeat(&bytes[(ix + 1)..], b'-');
1155 if count == 1 {
1156 LoopInstruction::ContinueAndSkip(0)
1157 } else {
1158 let itembody = if count == 2 {
1159 ItemBody::SynthesizeChar('–')
1160 } else if count == 3 {
1161 ItemBody::SynthesizeChar('—')
1162 } else {
1163 let (ems, ens) = match count % 6 {
1164 0 | 3 => (count / 3, 0),
1165 2 | 4 => (0, count / 2),
1166 1 => (count / 3 - 1, 2),
1167 _ => (count / 3, 1),
1168 };
1169 let mut buf = String::with_capacity(3 * (ems + ens));
1171 for _ in 0..ems {
1172 buf.push('—');
1173 }
1174 for _ in 0..ens {
1175 buf.push('–');
1176 }
1177 ItemBody::SynthesizeText(self.allocs.allocate_cow(buf.into()))
1178 };
1179
1180 self.tree.append_text(begin_text, ix, backslash_escaped);
1181 backslash_escaped = false;
1182 self.tree.append(Item {
1183 start: ix,
1184 end: ix + count,
1185 body: itembody,
1186 });
1187 begin_text = ix + count;
1188 LoopInstruction::ContinueAndSkip(count - 1)
1189 }
1190 }
1191 c @ b'\'' | c @ b'"' => {
1192 let string_suffix = &self.text[ix..];
1193 let can_open =
1194 delim_run_can_open(&self.text[start..], string_suffix, 1, ix - start, mode);
1195 let can_close = delim_run_can_close(
1196 &self.text[start..],
1197 string_suffix,
1198 1,
1199 ix - start,
1200 mode,
1201 );
1202
1203 self.tree.append_text(begin_text, ix, backslash_escaped);
1204 backslash_escaped = false;
1205 self.tree.append(Item {
1206 start: ix,
1207 end: ix + 1,
1208 body: ItemBody::MaybeSmartQuote(c, can_open, can_close),
1209 });
1210 begin_text = ix + 1;
1211
1212 LoopInstruction::ContinueAndSkip(0)
1213 }
1214 _ => LoopInstruction::ContinueAndSkip(0),
1215 }
1216 });
1217
1218 if brk.is_none() {
1219 let trailing_whitespace =
1220 scan_rev_while(&bytes[begin_text..final_ix], is_ascii_whitespace_no_nl);
1221 self.tree.append_text(
1223 begin_text,
1224 final_ix - trailing_whitespace,
1225 backslash_escaped,
1226 );
1227 }
1228 (final_ix, brk)
1229 }
1230
1231 fn parse_html_block_type_1_to_5(
1237 &mut self,
1238 start_ix: usize,
1239 html_end_tag: &str,
1240 mut remaining_space: usize,
1241 mut indent: usize,
1242 ) -> usize {
1243 self.tree.append(Item {
1244 start: start_ix,
1245 end: 0, body: ItemBody::HtmlBlock,
1247 });
1248 self.tree.push();
1249
1250 let bytes = self.text.as_bytes();
1251 let mut ix = start_ix;
1252 let end_ix;
1253 loop {
1254 let line_start_ix = ix;
1255 ix += scan_nextline(&bytes[ix..]);
1256 self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1257
1258 let mut line_start = LineStart::new(&bytes[ix..]);
1259 let n_containers = scan_containers(
1260 &self.tree,
1261 &mut line_start,
1262 self.options.has_gfm_footnotes(),
1263 );
1264 if n_containers < self.tree.spine_len() {
1265 end_ix = ix;
1266 break;
1267 }
1268
1269 if self.text[line_start_ix..ix].contains(html_end_tag) {
1270 end_ix = ix;
1271 break;
1272 }
1273
1274 let next_line_ix = ix + line_start.bytes_scanned();
1275 if next_line_ix == self.text.len() {
1276 end_ix = next_line_ix;
1277 break;
1278 }
1279 ix = next_line_ix;
1280 remaining_space = line_start.remaining_space();
1281 indent = 0;
1282 }
1283 self.pop(end_ix);
1284 ix
1285 }
1286
1287 fn parse_html_block_type_6_or_7(
1291 &mut self,
1292 start_ix: usize,
1293 mut remaining_space: usize,
1294 mut indent: usize,
1295 ) -> usize {
1296 self.tree.append(Item {
1297 start: start_ix,
1298 end: 0, body: ItemBody::HtmlBlock,
1300 });
1301 self.tree.push();
1302
1303 let bytes = self.text.as_bytes();
1304 let mut ix = start_ix;
1305 let end_ix;
1306 loop {
1307 let line_start_ix = ix;
1308 ix += scan_nextline(&bytes[ix..]);
1309 self.append_html_line(remaining_space.max(indent), line_start_ix, ix);
1310
1311 let mut line_start = LineStart::new(&bytes[ix..]);
1312 let n_containers = scan_containers(
1313 &self.tree,
1314 &mut line_start,
1315 self.options.has_gfm_footnotes(),
1316 );
1317 if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
1318 end_ix = ix;
1319 break;
1320 }
1321
1322 let next_line_ix = ix + line_start.bytes_scanned();
1323 if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
1324 {
1325 end_ix = next_line_ix;
1326 break;
1327 }
1328 ix = next_line_ix;
1329 remaining_space = line_start.remaining_space();
1330 indent = 0;
1331 }
1332 self.pop(end_ix);
1333 ix
1334 }
1335
1336 fn parse_indented_code_block(&mut self, start_ix: usize, mut remaining_space: usize) -> usize {
1337 self.tree.append(Item {
1338 start: start_ix,
1339 end: 0, body: ItemBody::IndentCodeBlock,
1341 });
1342 self.tree.push();
1343 let bytes = self.text.as_bytes();
1344 let mut last_nonblank_child = None;
1345 let mut last_nonblank_ix = 0;
1346 let mut end_ix = 0;
1347 self.last_line_blank = false;
1348
1349 let mut ix = start_ix;
1350 loop {
1351 let line_start_ix = ix;
1352 ix += scan_nextline(&bytes[ix..]);
1353 self.append_code_text(remaining_space, line_start_ix, ix);
1354 if !self.last_line_blank {
1357 last_nonblank_child = self.tree.cur();
1358 last_nonblank_ix = ix;
1359 end_ix = ix;
1360 }
1361
1362 let mut line_start = LineStart::new(&bytes[ix..]);
1363 let n_containers = scan_containers(
1364 &self.tree,
1365 &mut line_start,
1366 self.options.has_gfm_footnotes(),
1367 );
1368 if n_containers < self.tree.spine_len()
1369 || !(line_start.scan_space(4) || line_start.is_at_eol())
1370 {
1371 break;
1372 }
1373 let next_line_ix = ix + line_start.bytes_scanned();
1374 if next_line_ix == self.text.len() {
1375 break;
1376 }
1377 ix = next_line_ix;
1378 remaining_space = line_start.remaining_space();
1379 self.last_line_blank = scan_blank_line(&bytes[ix..]).is_some();
1380 }
1381
1382 if let Some(child) = last_nonblank_child {
1384 self.tree[child].next = None;
1385 self.tree[child].item.end = last_nonblank_ix;
1386 }
1387 self.pop(end_ix);
1388 ix
1389 }
1390
1391 fn parse_fenced_code_block(
1392 &mut self,
1393 start_ix: usize,
1394 indent: usize,
1395 fence_ch: u8,
1396 n_fence_char: usize,
1397 ) -> usize {
1398 let bytes = self.text.as_bytes();
1399 let mut info_start = start_ix + n_fence_char;
1400 info_start += scan_whitespace_no_nl(&bytes[info_start..]);
1401 let mut ix = info_start + scan_nextline(&bytes[info_start..]);
1404 let info_end = ix - scan_rev_while(&bytes[info_start..ix], is_ascii_whitespace);
1405 let info_string = unescape(&self.text[info_start..info_end], self.tree.is_in_table());
1406 self.tree.append(Item {
1407 start: start_ix,
1408 end: 0, body: ItemBody::FencedCodeBlock(self.allocs.allocate_cow(info_string)),
1410 });
1411 self.tree.push();
1412 loop {
1413 let mut line_start = LineStart::new(&bytes[ix..]);
1414 let n_containers = scan_containers(
1415 &self.tree,
1416 &mut line_start,
1417 self.options.has_gfm_footnotes(),
1418 );
1419 if n_containers < self.tree.spine_len() {
1420 self.pop(ix);
1423 return ix;
1424 }
1425 line_start.scan_space(indent);
1426 let mut close_line_start = line_start.clone();
1427 if !close_line_start.scan_space(4 - indent) {
1428 let close_ix = ix + close_line_start.bytes_scanned();
1429 if let Some(n) = scan_closing_code_fence(&bytes[close_ix..], fence_ch, n_fence_char)
1430 {
1431 ix = close_ix + n;
1432 self.pop(ix);
1433 return ix + scan_blank_line(&bytes[ix..]).unwrap_or(0);
1435 }
1436 }
1437 let remaining_space = line_start.remaining_space();
1438 ix += line_start.bytes_scanned();
1439 let next_ix = ix + scan_nextline(&bytes[ix..]);
1440 self.append_code_text(remaining_space, ix, next_ix);
1441 ix = next_ix;
1442 }
1443 }
1444
1445 fn parse_metadata_block(&mut self, start_ix: usize, metadata_block_ch: u8) -> usize {
1446 let bytes = self.text.as_bytes();
1447 let metadata_block_kind = match metadata_block_ch {
1448 b'-' => MetadataBlockKind::YamlStyle,
1449 b'+' => MetadataBlockKind::PlusesStyle,
1450 _ => panic!("Erroneous metadata block character when parsing metadata block"),
1451 };
1452 let mut ix = start_ix + 3 + scan_nextline(&bytes[start_ix + 3..]);
1454 self.tree.append(Item {
1455 start: start_ix,
1456 end: 0, body: ItemBody::MetadataBlock(metadata_block_kind),
1458 });
1459 self.tree.push();
1460 loop {
1461 let mut line_start = LineStart::new(&bytes[ix..]);
1462 let n_containers = scan_containers(
1463 &self.tree,
1464 &mut line_start,
1465 self.options.has_gfm_footnotes(),
1466 );
1467 if n_containers < self.tree.spine_len() {
1468 break;
1469 }
1470 if let (_, 0) = calc_indent(&bytes[ix..], 4) {
1471 if let Some(n) = scan_closing_metadata_block(&bytes[ix..], metadata_block_ch) {
1472 ix += n;
1473 break;
1474 }
1475 }
1476 let remaining_space = line_start.remaining_space();
1477 ix += line_start.bytes_scanned();
1478 let next_ix = ix + scan_nextline(&bytes[ix..]);
1479 self.append_code_text(remaining_space, ix, next_ix);
1480 ix = next_ix;
1481 }
1482
1483 self.pop(ix);
1484
1485 ix + scan_blank_line(&bytes[ix..]).unwrap_or(0)
1487 }
1488
1489 fn append_code_text(&mut self, remaining_space: usize, start: usize, end: usize) {
1490 if remaining_space > 0 {
1491 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1492 self.tree.append(Item {
1493 start,
1494 end: start,
1495 body: ItemBody::SynthesizeText(cow_ix),
1496 });
1497 }
1498 if self.text.as_bytes()[end - 2] == b'\r' {
1499 self.tree.append_text(start, end - 2, false);
1501 self.tree.append_text(end - 1, end, false);
1502 } else {
1503 self.tree.append_text(start, end, false);
1504 }
1505 }
1506
1507 fn append_html_line(&mut self, remaining_space: usize, start: usize, end: usize) {
1509 if remaining_space > 0 {
1510 let cow_ix = self.allocs.allocate_cow(" "[..remaining_space].into());
1511 self.tree.append(Item {
1512 start,
1513 end: start,
1514 body: ItemBody::SynthesizeText(cow_ix),
1515 });
1516 }
1517 if self.text.as_bytes()[end - 2] == b'\r' {
1518 self.tree.append(Item {
1520 start,
1521 end: end - 2,
1522 body: ItemBody::Html,
1523 });
1524 self.tree.append(Item {
1525 start: end - 1,
1526 end,
1527 body: ItemBody::Html,
1528 });
1529 } else {
1530 self.tree.append(Item {
1531 start,
1532 end,
1533 body: ItemBody::Html,
1534 });
1535 }
1536 }
1537
1538 fn pop(&mut self, ix: usize) {
1540 let cur_ix = self.tree.pop().unwrap();
1541 self.tree[cur_ix].item.end = ix;
1542 if let ItemBody::DefinitionList(_) = self.tree[cur_ix].item.body {
1543 fixup_end_of_definition_list(&mut self.tree, cur_ix);
1544 self.begin_list_item = None;
1545 }
1546 if let ItemBody::List(true, _, _) | ItemBody::DefinitionList(true) =
1547 self.tree[cur_ix].item.body
1548 {
1549 surgerize_tight_list(&mut self.tree, cur_ix);
1550 self.begin_list_item = None;
1551 }
1552 }
1553
1554 fn finish_list(&mut self, ix: usize) {
1557 self.finish_empty_list_item();
1558 if let Some(node_ix) = self.tree.peek_up() {
1559 if let ItemBody::List(_, _, _) | ItemBody::DefinitionList(_) =
1560 self.tree[node_ix].item.body
1561 {
1562 self.pop(ix);
1563 }
1564 }
1565 if self.last_line_blank {
1566 if let Some(node_ix) = self.tree.peek_grandparent() {
1567 if let ItemBody::List(ref mut is_tight, _, _)
1568 | ItemBody::DefinitionList(ref mut is_tight) = self.tree[node_ix].item.body
1569 {
1570 *is_tight = false;
1571 }
1572 }
1573 self.last_line_blank = false;
1574 }
1575 }
1576
1577 fn finish_empty_list_item(&mut self) {
1578 if let Some(begin_list_item) = self.begin_list_item {
1579 if self.last_line_blank {
1580 if let Some(node_ix) = self.tree.peek_up() {
1582 if let ItemBody::ListItem(_) | ItemBody::DefinitionListDefinition(_) =
1583 self.tree[node_ix].item.body
1584 {
1585 self.pop(begin_list_item);
1586 }
1587 }
1588 }
1589 }
1590 self.begin_list_item = None;
1591 }
1592
1593 fn continue_list(&mut self, start: usize, ch: u8, index: u64) {
1596 self.finish_empty_list_item();
1597 if let Some(node_ix) = self.tree.peek_up() {
1598 if let ItemBody::List(ref mut is_tight, existing_ch, _) = self.tree[node_ix].item.body {
1599 if existing_ch == ch {
1600 if self.last_line_blank {
1601 *is_tight = false;
1602 self.last_line_blank = false;
1603 }
1604 return;
1605 }
1606 }
1607 self.finish_list(start);
1609 }
1610 self.tree.append(Item {
1611 start,
1612 end: 0, body: ItemBody::List(true, ch, index),
1614 });
1615 self.tree.push();
1616 self.last_line_blank = false;
1617 }
1618
1619 fn parse_hrule(&mut self, hrule_size: usize, ix: usize) -> usize {
1623 self.tree.append(Item {
1624 start: ix,
1625 end: ix + hrule_size,
1626 body: ItemBody::Rule,
1627 });
1628 ix + hrule_size
1629 }
1630
1631 fn parse_atx_heading(&mut self, start: usize, atx_level: HeadingLevel) -> usize {
1635 let mut ix = start;
1636 let heading_ix = self.tree.append(Item {
1637 start,
1638 end: 0, body: ItemBody::default(), });
1641 ix += atx_level as usize;
1642 let bytes = self.text.as_bytes();
1644 if let Some(eol_bytes) = scan_eol(&bytes[ix..]) {
1645 self.tree[heading_ix].item.end = ix + eol_bytes;
1646 self.tree[heading_ix].item.body = ItemBody::Heading(atx_level, None);
1647 return ix + eol_bytes;
1648 }
1649 let skip_spaces = scan_whitespace_no_nl(&bytes[ix..]);
1651 ix += skip_spaces;
1652
1653 let header_start = ix;
1655 let header_node_idx = self.tree.push(); let (end, content_end, attrs) = if self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES)
1659 {
1660 let header_end = header_start + scan_nextline(&bytes[header_start..]);
1663 let (content_end, attrs) =
1664 self.extract_and_parse_heading_attribute_block(header_start, header_end);
1665 self.parse_line(ix, Some(content_end), TableParseMode::Disabled);
1666 (header_end, content_end, attrs)
1667 } else {
1668 let (line_ix, line_brk) = self.parse_line(ix, None, TableParseMode::Disabled);
1669 ix = line_ix;
1670 if let Some(Item {
1672 start,
1673 end,
1674 body: ItemBody::HardBreak(true),
1675 }) = line_brk
1676 {
1677 self.tree.append_text(start, end, false);
1678 }
1679 (ix, ix, None)
1680 };
1681 self.tree[header_node_idx].item.end = end;
1682
1683 let mut empty_text_node = false;
1685 if let Some(cur_ix) = self.tree.cur() {
1686 let header_text = &bytes[header_start..content_end];
1688 let mut limit = header_text
1689 .iter()
1690 .rposition(|&b| !(b == b'\n' || b == b'\r' || b == b' '))
1691 .map_or(0, |i| i + 1);
1692 let closer = header_text[..limit]
1693 .iter()
1694 .rposition(|&b| b != b'#')
1695 .map_or(0, |i| i + 1);
1696 if closer == 0 {
1697 limit = closer;
1698 } else {
1699 let spaces = scan_rev_while(&header_text[..closer], |b| b == b' ');
1700 if spaces > 0 {
1701 limit = closer - spaces;
1702 }
1703 }
1704 self.tree[cur_ix].item.end = limit + header_start;
1706
1707 if limit == 0 {
1709 empty_text_node = true;
1710 }
1711 }
1712
1713 if empty_text_node {
1714 self.tree.remove_node();
1715 } else {
1716 self.tree.pop();
1717 }
1718 self.tree[heading_ix].item.body = ItemBody::Heading(
1719 atx_level,
1720 attrs.map(|attrs| self.allocs.allocate_heading(attrs)),
1721 );
1722
1723 end
1724 }
1725
1726 fn parse_footnote(&mut self, start: usize) -> Option<usize> {
1728 let bytes = &self.text.as_bytes()[start..];
1729 if !bytes.starts_with(b"[^") {
1730 return None;
1731 }
1732 let (mut i, label) = if self.options.has_gfm_footnotes() {
1733 scan_link_label_rest(&self.text[start + 2..], &|_| None, self.tree.is_in_table())?
1737 } else {
1738 self.parse_refdef_label(start + 2)?
1739 };
1740 if self.options.has_gfm_footnotes() && label.bytes().any(|b| b == b'\r' || b == b'\n') {
1741 return None;
1744 }
1745 i += 2;
1746 if scan_ch(&bytes[i..], b':') == 0 {
1747 return None;
1748 }
1749 i += 1;
1750 self.finish_list(start);
1751 if let Some(node_ix) = self.tree.peek_up() {
1752 if let ItemBody::FootnoteDefinition(..) = self.tree[node_ix].item.body {
1753 self.pop(start);
1755 }
1756 }
1757 if self.options.has_gfm_footnotes() {
1758 i += scan_whitespace_no_nl(&bytes[i..]);
1759 }
1760 self.allocs
1761 .footdefs
1762 .0
1763 .insert(UniCase::new(label.clone()), FootnoteDef { use_count: 0 });
1764 self.tree.append(Item {
1765 start,
1766 end: 0, body: ItemBody::FootnoteDefinition(self.allocs.allocate_cow(label)),
1769 });
1770 self.tree.push();
1771 Some(i)
1772 }
1773
1774 fn parse_refdef_label(&self, start: usize) -> Option<(usize, CowStr<'a>)> {
1777 scan_link_label_rest(
1778 &self.text[start..],
1779 &|bytes| {
1780 let mut line_start = LineStart::new(bytes);
1781 let current_container = scan_containers(
1782 &self.tree,
1783 &mut line_start,
1784 self.options.has_gfm_footnotes(),
1785 ) == self.tree.spine_len();
1786 if line_start.scan_space(4) {
1787 return Some(line_start.bytes_scanned());
1788 }
1789 let bytes_scanned = line_start.bytes_scanned();
1790 let suffix = &bytes[bytes_scanned..];
1791 if self.scan_paragraph_interrupt(suffix, current_container)
1792 || (current_container && scan_setext_heading(suffix).is_some())
1793 {
1794 None
1795 } else {
1796 Some(bytes_scanned)
1797 }
1798 },
1799 self.tree.is_in_table(),
1800 )
1801 }
1802
1803 fn parse_refdef_total(&mut self, start: usize) -> Option<(usize, LinkLabel<'a>, LinkDef<'a>)> {
1805 let bytes = &self.text.as_bytes()[start..];
1806 if scan_ch(bytes, b'[') == 0 {
1807 return None;
1808 }
1809 let (mut i, label) = self.parse_refdef_label(start + 1)?;
1810 i += 1;
1811 if scan_ch(&bytes[i..], b':') == 0 {
1812 return None;
1813 }
1814 i += 1;
1815 let (bytecount, link_def) = self.scan_refdef(start, start + i)?;
1816 Some((bytecount + i, UniCase::new(label), link_def))
1817 }
1818
1819 fn scan_refdef_space(&self, bytes: &[u8], mut i: usize) -> Option<(usize, usize)> {
1821 let mut newlines = 0;
1822 loop {
1823 let whitespaces = scan_whitespace_no_nl(&bytes[i..]);
1824 i += whitespaces;
1825 if let Some(eol_bytes) = scan_eol(&bytes[i..]) {
1826 i += eol_bytes;
1827 newlines += 1;
1828 if newlines > 1 {
1829 return None;
1830 }
1831 } else {
1832 break;
1833 }
1834 let mut line_start = LineStart::new(&bytes[i..]);
1835 let current_container = scan_containers(
1836 &self.tree,
1837 &mut line_start,
1838 self.options.has_gfm_footnotes(),
1839 ) == self.tree.spine_len();
1840 if !line_start.scan_space(4) {
1841 let suffix = &bytes[i + line_start.bytes_scanned()..];
1842 if self.scan_paragraph_interrupt(suffix, current_container)
1843 || scan_setext_heading(suffix).is_some()
1844 {
1845 return None;
1846 }
1847 }
1848 i += line_start.bytes_scanned();
1849 }
1850 Some((i, newlines))
1851 }
1852
1853 fn scan_refdef_title<'t>(&self, text: &'t str) -> Option<(usize, CowStr<'t>)> {
1855 let bytes = text.as_bytes();
1856 let closing_delim = match bytes.first()? {
1857 b'\'' => b'\'',
1858 b'"' => b'"',
1859 b'(' => b')',
1860 _ => return None,
1861 };
1862 let mut bytecount = 1;
1863 let mut linestart = 1;
1864
1865 let mut linebuf = None;
1866
1867 while let Some(&c) = bytes.get(bytecount) {
1868 match c {
1869 b'(' if closing_delim == b')' => {
1870 return None;
1874 }
1875 b'\n' | b'\r' => {
1876 let linebuf = if let Some(linebuf) = &mut linebuf {
1884 linebuf
1885 } else {
1886 linebuf = Some(String::new());
1887 linebuf.as_mut().unwrap()
1888 };
1889 linebuf.push_str(&text[linestart..bytecount]);
1890 linebuf.push('\n'); bytecount += 1;
1893 if c == b'\r' && bytes.get(bytecount) == Some(&b'\n') {
1894 bytecount += 1;
1895 }
1896 let mut line_start = LineStart::new(&bytes[bytecount..]);
1897 let current_container = scan_containers(
1898 &self.tree,
1899 &mut line_start,
1900 self.options.has_gfm_footnotes(),
1901 ) == self.tree.spine_len();
1902 if !line_start.scan_space(4) {
1903 let suffix = &bytes[bytecount + line_start.bytes_scanned()..];
1904 if self.scan_paragraph_interrupt(suffix, current_container)
1905 || scan_setext_heading(suffix).is_some()
1906 {
1907 return None;
1908 }
1909 }
1910 line_start.scan_all_space();
1911 bytecount += line_start.bytes_scanned();
1912 linestart = bytecount;
1913 if scan_blank_line(&bytes[bytecount..]).is_some() {
1914 return None;
1916 }
1917 }
1918 b'\\' => {
1919 bytecount += 1;
1920 if let Some(c) = bytes.get(bytecount) {
1921 if c != &b'\r' && c != &b'\n' {
1922 bytecount += 1;
1923 }
1924 }
1925 }
1926 c if c == closing_delim => {
1927 let cow = if let Some(mut linebuf) = linebuf {
1928 linebuf.push_str(&text[linestart..bytecount]);
1929 CowStr::from(linebuf)
1930 } else {
1931 CowStr::from(&text[linestart..bytecount])
1932 };
1933 return Some((bytecount + 1, cow));
1934 }
1935 _ => {
1936 bytecount += 1;
1937 }
1938 }
1939 }
1940 None
1941 }
1942
1943 fn scan_refdef(&self, span_start: usize, start: usize) -> Option<(usize, LinkDef<'a>)> {
1946 let bytes = self.text.as_bytes();
1947
1948 let (mut i, _newlines) = self.scan_refdef_space(bytes, start)?;
1950
1951 let (dest_length, dest) = scan_link_dest(self.text, i, LINK_MAX_NESTED_PARENS)?;
1953 if dest_length == 0 {
1954 return None;
1955 }
1956 let dest = unescape(dest, self.tree.is_in_table());
1957 i += dest_length;
1958
1959 let mut backup = (
1961 i - start,
1962 LinkDef {
1963 dest,
1964 title: None,
1965 span: span_start..i,
1966 },
1967 );
1968
1969 let (mut i, newlines) =
1971 if let Some((new_i, mut newlines)) = self.scan_refdef_space(bytes, i) {
1972 if i == self.text.len() {
1973 newlines += 1;
1974 }
1975 if new_i == i && newlines == 0 {
1976 return None;
1977 }
1978 if newlines > 1 {
1979 return Some(backup);
1980 };
1981 (new_i, newlines)
1982 } else {
1983 return Some(backup);
1984 };
1985
1986 if let Some((title_length, title)) = self.scan_refdef_title(&self.text[i..]) {
1989 i += title_length;
1990 if scan_blank_line(&bytes[i..]).is_some() {
1991 backup.0 = i - start;
1992 backup.1.span = span_start..i;
1993 backup.1.title = Some(unescape(title, self.tree.is_in_table()));
1994 return Some(backup);
1995 }
1996 }
1997 if newlines > 0 {
1998 Some(backup)
1999 } else {
2000 None
2001 }
2002 }
2003
2004 fn scan_paragraph_interrupt(&self, bytes: &[u8], current_container: bool) -> bool {
2006 if scan_paragraph_interrupt_no_table(
2007 bytes,
2008 current_container,
2009 self.options.contains(Options::ENABLE_FOOTNOTES),
2010 self.options.contains(Options::ENABLE_DEFINITION_LIST),
2011 &self.tree,
2012 ) {
2013 return true;
2014 }
2015 if !self.options.contains(Options::ENABLE_TABLES) || !bytes.starts_with(b"|") {
2030 return false;
2031 }
2032
2033 let mut pipes = 0;
2036 let mut next_line_ix = 0;
2037 let mut bsesc = false;
2038 let mut last_pipe_ix = 0;
2039 for (i, &byte) in bytes.iter().enumerate() {
2040 match byte {
2041 b'\\' => {
2042 bsesc = true;
2043 continue;
2044 }
2045 b'|' if !bsesc => {
2046 pipes += 1;
2047 last_pipe_ix = i;
2048 }
2049 b'\r' | b'\n' => {
2050 next_line_ix = i + scan_eol(&bytes[i..]).unwrap();
2051 break;
2052 }
2053 _ => {}
2054 }
2055 bsesc = false;
2056 }
2057
2058 if next_line_ix == 0 {
2060 return false;
2061 }
2062
2063 let mut line_start = LineStart::new(&bytes[next_line_ix..]);
2076 if scan_containers(
2077 &self.tree,
2078 &mut line_start,
2079 self.options.has_gfm_footnotes(),
2080 ) != self.tree.spine_len()
2081 {
2082 return false;
2083 }
2084 let table_head_ix = next_line_ix + line_start.bytes_scanned();
2085 let (table_head_bytes, alignment) = scan_table_head(&bytes[table_head_ix..]);
2086
2087 if table_head_bytes == 0 {
2088 return false;
2089 }
2090
2091 let header_count = count_header_cols(bytes, pipes, 0, last_pipe_ix);
2093
2094 alignment.len() == header_count
2096 }
2097
2098 fn extract_and_parse_heading_attribute_block(
2105 &mut self,
2106 header_start: usize,
2107 header_end: usize,
2108 ) -> (usize, Option<HeadingAttributes<'a>>) {
2109 if !self.options.contains(Options::ENABLE_HEADING_ATTRIBUTES) {
2110 return (header_end, None);
2111 }
2112
2113 let header_bytes = &self.text.as_bytes()[header_start..header_end];
2115 let (content_len, attr_block_range_rel) =
2116 extract_attribute_block_content_from_header_text(header_bytes);
2117 let content_end = header_start + content_len;
2118 let attrs = attr_block_range_rel.and_then(|r| {
2119 parse_inside_attribute_block(
2120 &self.text[(header_start + r.start)..(header_start + r.end)],
2121 )
2122 });
2123 (content_end, attrs)
2124 }
2125}
2126
2127#[derive(PartialEq, Eq, Copy, Clone)]
2129enum TableParseMode {
2130 Scan,
2132 Active,
2134 Disabled,
2136}
2137
2138fn count_header_cols(
2141 bytes: &[u8],
2142 mut pipes: usize,
2143 mut start: usize,
2144 last_pipe_ix: usize,
2145) -> usize {
2146 start += scan_whitespace_no_nl(&bytes[start..]);
2148 if bytes[start] == b'|' {
2149 pipes -= 1;
2150 }
2151
2152 if scan_blank_line(&bytes[(last_pipe_ix + 1)..]).is_some() {
2154 pipes
2155 } else {
2156 pipes + 1
2157 }
2158}
2159
2160fn scan_paragraph_interrupt_no_table(
2165 bytes: &[u8],
2166 current_container: bool,
2167 has_footnote: bool,
2168 definition_list: bool,
2169 tree: &Tree<Item>,
2170) -> bool {
2171 scan_eol(bytes).is_some()
2172 || scan_hrule(bytes).is_ok()
2173 || scan_atx_heading(bytes).is_some()
2174 || scan_code_fence(bytes).is_some()
2175 || scan_blockquote_start(bytes).is_some()
2176 || scan_listitem(bytes).map_or(false, |(ix, delim, index, _)| {
2177 ! current_container ||
2178 tree.is_in_table() ||
2179 (delim == b'*' || delim == b'-' || delim == b'+' || index == 1)
2182 && (scan_blank_line(&bytes[ix..]).is_none())
2183 })
2184 || bytes.starts_with(b"<")
2185 && (get_html_end_tag(&bytes[1..]).is_some() || starts_html_block_type_6(&bytes[1..]))
2186 || definition_list && bytes.starts_with(b":")
2187 || (has_footnote
2188 && bytes.starts_with(b"[^")
2189 && scan_link_label_rest(
2190 std::str::from_utf8(&bytes[2..]).unwrap(),
2191 &|_| None,
2192 tree.is_in_table(),
2193 )
2194 .map_or(false, |(len, _)| bytes.get(2 + len) == Some(&b':')))
2195}
2196
2197fn get_html_end_tag(text_bytes: &[u8]) -> Option<&'static str> {
2199 static BEGIN_TAGS: &[&[u8]; 4] = &[b"pre", b"style", b"script", b"textarea"];
2200 static ST_BEGIN_TAGS: &[&[u8]; 3] = &[b"!--", b"?", b"![CDATA["];
2201
2202 for (beg_tag, end_tag) in BEGIN_TAGS
2203 .iter()
2204 .zip(["</pre>", "</style>", "</script>", "</textarea>"].iter())
2205 {
2206 let tag_len = beg_tag.len();
2207
2208 if text_bytes.len() < tag_len {
2209 break;
2211 }
2212
2213 if !text_bytes[..tag_len].eq_ignore_ascii_case(beg_tag) {
2214 continue;
2215 }
2216
2217 if text_bytes.len() == tag_len {
2219 return Some(end_tag);
2220 }
2221
2222 let s = text_bytes[tag_len];
2224 if is_ascii_whitespace(s) || s == b'>' {
2225 return Some(end_tag);
2226 }
2227 }
2228
2229 for (beg_tag, end_tag) in ST_BEGIN_TAGS.iter().zip(["-->", "?>", "]]>"].iter()) {
2230 if text_bytes.starts_with(beg_tag) {
2231 return Some(end_tag);
2232 }
2233 }
2234
2235 if text_bytes.len() > 1 && text_bytes[0] == b'!' && text_bytes[1].is_ascii_alphabetic() {
2236 Some(">")
2237 } else {
2238 None
2239 }
2240}
2241
2242fn surgerize_tight_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
2244 let mut list_item = tree[list_ix].child;
2245 while let Some(listitem_ix) = list_item {
2246 let list_item_firstborn = tree[listitem_ix].child;
2248
2249 if let Some(firstborn_ix) = list_item_firstborn {
2251 if let ItemBody::Paragraph = tree[firstborn_ix].item.body {
2252 tree[listitem_ix].child = tree[firstborn_ix].child;
2253 }
2254
2255 let mut list_item_child = Some(firstborn_ix);
2256 let mut node_to_repoint = None;
2257 while let Some(child_ix) = list_item_child {
2258 let repoint_ix = if let ItemBody::Paragraph = tree[child_ix].item.body {
2260 if let Some(child_firstborn) = tree[child_ix].child {
2261 if let Some(repoint_ix) = node_to_repoint {
2262 tree[repoint_ix].next = Some(child_firstborn);
2263 }
2264 let mut child_lastborn = child_firstborn;
2265 while let Some(lastborn_next_ix) = tree[child_lastborn].next {
2266 child_lastborn = lastborn_next_ix;
2267 }
2268 child_lastborn
2269 } else {
2270 child_ix
2271 }
2272 } else {
2273 child_ix
2274 };
2275
2276 node_to_repoint = Some(repoint_ix);
2277 tree[repoint_ix].next = tree[child_ix].next;
2278 list_item_child = tree[child_ix].next;
2279 }
2280 }
2281
2282 list_item = tree[listitem_ix].next;
2283 }
2284}
2285
2286fn fixup_end_of_definition_list(tree: &mut Tree<Item>, list_ix: TreeIndex) {
2287 let mut list_item = tree[list_ix].child;
2288 let mut previous_list_item = None;
2289 while let Some(listitem_ix) = list_item {
2290 match &mut tree[listitem_ix].item.body {
2291 ItemBody::DefinitionListTitle | ItemBody::DefinitionListDefinition(_) => {
2292 previous_list_item = list_item;
2293 list_item = tree[listitem_ix].next;
2294 }
2295 body @ ItemBody::MaybeDefinitionListTitle => {
2296 *body = ItemBody::Paragraph;
2297 break;
2298 }
2299 _ => break,
2300 }
2301 }
2302 if let Some(previous_list_item) = previous_list_item {
2303 tree.truncate_to_parent(previous_list_item);
2304 }
2305}
2306
2307fn delim_run_can_open(
2313 s: &str,
2314 suffix: &str,
2315 run_len: usize,
2316 ix: usize,
2317 mode: TableParseMode,
2318) -> bool {
2319 let next_char = if let Some(c) = suffix[run_len..].chars().next() {
2320 c
2321 } else {
2322 return false;
2323 };
2324 if next_char.is_whitespace() {
2325 return false;
2326 }
2327 if ix == 0 {
2328 return true;
2329 }
2330 if mode == TableParseMode::Active {
2331 if s.as_bytes()[..ix].ends_with(b"|") && !s.as_bytes()[..ix].ends_with(br"\|") {
2332 return true;
2333 }
2334 if next_char == '|' {
2335 return false;
2336 }
2337 }
2338 let delim = suffix.bytes().next().unwrap();
2339 if delim == b'*' && !is_punctuation(next_char) {
2341 return true;
2342 }
2343 if delim == b'~' && run_len > 1 {
2344 return true;
2345 }
2346 let prev_char = s[..ix].chars().last().unwrap();
2347 if delim == b'~' && prev_char == '~' && !is_punctuation(next_char) {
2348 return true;
2349 }
2350
2351 prev_char.is_whitespace()
2352 || is_punctuation(prev_char) && (delim != b'\'' || ![']', ')'].contains(&prev_char))
2353}
2354
2355fn delim_run_can_close(
2359 s: &str,
2360 suffix: &str,
2361 run_len: usize,
2362 ix: usize,
2363 mode: TableParseMode,
2364) -> bool {
2365 if ix == 0 {
2366 return false;
2367 }
2368 let prev_char = s[..ix].chars().last().unwrap();
2369 if prev_char.is_whitespace() {
2370 return false;
2371 }
2372 let next_char = if let Some(c) = suffix[run_len..].chars().next() {
2373 c
2374 } else {
2375 return true;
2376 };
2377 if mode == TableParseMode::Active {
2378 if s.as_bytes()[..ix].ends_with(b"|") && !s.as_bytes()[..ix].ends_with(br"\|") {
2379 return false;
2380 }
2381 if next_char == '|' {
2382 return true;
2383 }
2384 }
2385 let delim = suffix.bytes().next().unwrap();
2386 if (delim == b'*' || (delim == b'~' && run_len > 1)) && !is_punctuation(prev_char) {
2388 return true;
2389 }
2390 if delim == b'~' && prev_char == '~' {
2391 return true;
2392 }
2393
2394 next_char.is_whitespace() || is_punctuation(next_char)
2395}
2396
2397fn create_lut(options: &Options) -> LookupTable {
2398 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2399 {
2400 LookupTable {
2401 simd: simd::compute_lookup(options),
2402 scalar: special_bytes(options),
2403 }
2404 }
2405 #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2406 {
2407 special_bytes(options)
2408 }
2409}
2410
2411fn special_bytes(options: &Options) -> [bool; 256] {
2412 let mut bytes = [false; 256];
2413 let standard_bytes = [
2414 b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2415 ];
2416
2417 for &byte in &standard_bytes {
2418 bytes[byte as usize] = true;
2419 }
2420 if options.contains(Options::ENABLE_TABLES) {
2421 bytes[b'|' as usize] = true;
2422 }
2423 if options.contains(Options::ENABLE_STRIKETHROUGH) {
2424 bytes[b'~' as usize] = true;
2425 }
2426 if options.contains(Options::ENABLE_MATH) {
2427 bytes[b'$' as usize] = true;
2428 bytes[b'{' as usize] = true;
2429 bytes[b'}' as usize] = true;
2430 }
2431 if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2432 for &byte in &[b'.', b'-', b'"', b'\''] {
2433 bytes[byte as usize] = true;
2434 }
2435 }
2436
2437 bytes
2438}
2439
2440enum LoopInstruction<T> {
2441 ContinueAndSkip(usize),
2443 BreakAtWith(usize, T),
2445}
2446
2447#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2448struct LookupTable {
2449 simd: [u8; 16],
2450 scalar: [bool; 256],
2451}
2452
2453#[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2454type LookupTable = [bool; 256];
2455
2456fn iterate_special_bytes<F, T>(
2467 lut: &LookupTable,
2468 bytes: &[u8],
2469 ix: usize,
2470 callback: F,
2471) -> (usize, Option<T>)
2472where
2473 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2474{
2475 #[cfg(all(target_arch = "x86_64", feature = "simd"))]
2476 {
2477 simd::iterate_special_bytes(lut, bytes, ix, callback)
2478 }
2479 #[cfg(not(all(target_arch = "x86_64", feature = "simd")))]
2480 {
2481 scalar_iterate_special_bytes(lut, bytes, ix, callback)
2482 }
2483}
2484
2485fn scalar_iterate_special_bytes<F, T>(
2486 lut: &[bool; 256],
2487 bytes: &[u8],
2488 mut ix: usize,
2489 mut callback: F,
2490) -> (usize, Option<T>)
2491where
2492 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2493{
2494 while ix < bytes.len() {
2495 let b = bytes[ix];
2496 if lut[b as usize] {
2497 match callback(ix, b) {
2498 LoopInstruction::ContinueAndSkip(skip) => {
2499 ix += skip;
2500 }
2501 LoopInstruction::BreakAtWith(ix, val) => {
2502 return (ix, val);
2503 }
2504 }
2505 }
2506 ix += 1;
2507 }
2508
2509 (ix, None)
2510}
2511
2512fn extract_attribute_block_content_from_header_text(
2525 heading: &[u8],
2526) -> (usize, Option<Range<usize>>) {
2527 let heading_len = heading.len();
2528 let mut ix = heading_len;
2529 ix -= scan_rev_while(heading, |b| {
2530 b == b'\n' || b == b'\r' || b == b' ' || b == b'\t'
2531 });
2532 if ix == 0 {
2533 return (heading_len, None);
2534 }
2535
2536 let attr_block_close = ix - 1;
2537 if heading.get(attr_block_close) != Some(&b'}') {
2538 return (heading_len, None);
2540 }
2541 ix -= 1;
2543
2544 ix -= scan_rev_while(&heading[..ix], |b| {
2545 !matches!(b, b'{' | b'}' | b'<' | b'>' | b'\\' | b'\n' | b'\r')
2560 });
2561 if ix == 0 {
2562 return (heading_len, None);
2564 }
2565 let attr_block_open = ix - 1;
2566 if heading[attr_block_open] != b'{' {
2567 return (heading_len, None);
2569 }
2570
2571 (attr_block_open, Some(ix..attr_block_close))
2572}
2573
2574fn parse_inside_attribute_block(inside_attr_block: &str) -> Option<HeadingAttributes<'_>> {
2594 let mut id = None;
2595 let mut classes = Vec::new();
2596 let mut attrs = Vec::new();
2597
2598 for attr in inside_attr_block.split_ascii_whitespace() {
2599 if attr.len() > 1 {
2602 let first_byte = attr.as_bytes()[0];
2603 if first_byte == b'#' {
2604 id = Some(attr[1..].into());
2605 } else if first_byte == b'.' {
2606 classes.push(attr[1..].into());
2607 } else {
2608 let split = attr.split_once('=');
2609 if let Some((key, value)) = split {
2610 attrs.push((key.into(), Some(value.into())));
2611 } else {
2612 attrs.push((attr.into(), None));
2613 }
2614 }
2615 }
2616 }
2617
2618 Some(HeadingAttributes { id, classes, attrs })
2619}
2620
2621#[cfg(all(target_arch = "x86_64", feature = "simd"))]
2622mod simd {
2623 use super::{LookupTable, LoopInstruction};
2642 use crate::Options;
2643 use core::arch::x86_64::*;
2644
2645 const VECTOR_SIZE: usize = std::mem::size_of::<__m128i>();
2646
2647 pub(super) fn compute_lookup(options: &Options) -> [u8; 16] {
2652 let mut lookup = [0u8; 16];
2653 let standard_bytes = [
2654 b'\n', b'\r', b'*', b'_', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2655 ];
2656
2657 for &byte in &standard_bytes {
2658 add_lookup_byte(&mut lookup, byte);
2659 }
2660 if options.contains(Options::ENABLE_TABLES) {
2661 add_lookup_byte(&mut lookup, b'|');
2662 }
2663 if options.contains(Options::ENABLE_STRIKETHROUGH) {
2664 add_lookup_byte(&mut lookup, b'~');
2665 }
2666 if options.contains(Options::ENABLE_MATH) {
2667 add_lookup_byte(&mut lookup, b'$');
2668 add_lookup_byte(&mut lookup, b'{');
2669 add_lookup_byte(&mut lookup, b'}');
2670 }
2671 if options.contains(Options::ENABLE_SMART_PUNCTUATION) {
2672 for &byte in &[b'.', b'-', b'"', b'\''] {
2673 add_lookup_byte(&mut lookup, byte);
2674 }
2675 }
2676
2677 lookup
2678 }
2679
2680 fn add_lookup_byte(lookup: &mut [u8; 16], byte: u8) {
2681 lookup[(byte & 0x0f) as usize] |= 1 << (byte >> 4);
2682 }
2683
2684 #[target_feature(enable = "ssse3")]
2691 #[inline]
2692 unsafe fn compute_mask(lut: &[u8; 16], bytes: &[u8], ix: usize) -> i32 {
2693 debug_assert!(bytes.len() >= ix + VECTOR_SIZE);
2694
2695 let bitmap = _mm_loadu_si128(lut.as_ptr() as *const __m128i);
2696 let bitmask_lookup =
2699 _mm_setr_epi8(1, 2, 4, 8, 16, 32, 64, -128, -1, -1, -1, -1, -1, -1, -1, -1);
2700
2701 let raw_ptr = bytes.as_ptr().add(ix) as *const __m128i;
2703 let input = _mm_loadu_si128(raw_ptr);
2704 let bitset = _mm_shuffle_epi8(bitmap, input);
2708 let higher_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0f));
2711 let bitmask = _mm_shuffle_epi8(bitmask_lookup, higher_nibbles);
2715 let tmp = _mm_and_si128(bitset, bitmask);
2717 let result = _mm_cmpeq_epi8(tmp, bitmask);
2722
2723 _mm_movemask_epi8(result)
2725 }
2726
2727 pub(super) fn iterate_special_bytes<F, T>(
2732 lut: &LookupTable,
2733 bytes: &[u8],
2734 ix: usize,
2735 callback: F,
2736 ) -> (usize, Option<T>)
2737 where
2738 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2739 {
2740 if is_x86_feature_detected!("ssse3") && bytes.len() >= VECTOR_SIZE {
2741 unsafe { simd_iterate_special_bytes(&lut.simd, bytes, ix, callback) }
2742 } else {
2743 super::scalar_iterate_special_bytes(&lut.scalar, bytes, ix, callback)
2744 }
2745 }
2746
2747 unsafe fn process_mask<F, T>(
2752 mut mask: i32,
2753 bytes: &[u8],
2754 mut offset: usize,
2755 callback: &mut F,
2756 ) -> Result<usize, (usize, Option<T>)>
2757 where
2758 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2759 {
2760 while mask != 0 {
2761 let mask_ix = mask.trailing_zeros() as usize;
2762 offset += mask_ix;
2763 match callback(offset, *bytes.get_unchecked(offset)) {
2764 LoopInstruction::ContinueAndSkip(skip) => {
2765 offset += skip + 1;
2766 mask = mask.wrapping_shr((skip + 1 + mask_ix) as u32);
2767 }
2768 LoopInstruction::BreakAtWith(ix, val) => return Err((ix, val)),
2769 }
2770 }
2771 Ok(offset)
2772 }
2773
2774 #[target_feature(enable = "ssse3")]
2775 unsafe fn simd_iterate_special_bytes<F, T>(
2778 lut: &[u8; 16],
2779 bytes: &[u8],
2780 mut ix: usize,
2781 mut callback: F,
2782 ) -> (usize, Option<T>)
2783 where
2784 F: FnMut(usize, u8) -> LoopInstruction<Option<T>>,
2785 {
2786 debug_assert!(bytes.len() >= VECTOR_SIZE);
2787 let upperbound = bytes.len() - VECTOR_SIZE;
2788
2789 while ix < upperbound {
2790 let mask = compute_mask(lut, bytes, ix);
2791 let block_start = ix;
2792 ix = match process_mask(mask, bytes, ix, &mut callback) {
2793 Ok(ix) => std::cmp::max(ix, VECTOR_SIZE + block_start),
2794 Err((end_ix, val)) => return (end_ix, val),
2795 };
2796 }
2797
2798 if bytes.len() > ix {
2799 let mask = compute_mask(lut, bytes, upperbound) >> ix - upperbound;
2801 if let Err((end_ix, val)) = process_mask(mask, bytes, ix, &mut callback) {
2802 return (end_ix, val);
2803 }
2804 }
2805
2806 (bytes.len(), None)
2807 }
2808
2809 #[cfg(test)]
2810 mod simd_test {
2811 use super::super::create_lut;
2812 use super::{iterate_special_bytes, LoopInstruction};
2813 use crate::Options;
2814
2815 fn check_expected_indices(bytes: &[u8], expected: &[usize], skip: usize) {
2816 let mut opts = Options::empty();
2817 opts.insert(Options::ENABLE_MATH);
2818 opts.insert(Options::ENABLE_TABLES);
2819 opts.insert(Options::ENABLE_FOOTNOTES);
2820 opts.insert(Options::ENABLE_STRIKETHROUGH);
2821 opts.insert(Options::ENABLE_TASKLISTS);
2822
2823 let lut = create_lut(&opts);
2824 let mut indices = vec![];
2825
2826 iterate_special_bytes::<_, i32>(&lut, bytes, 0, |ix, _byte_ty| {
2827 indices.push(ix);
2828 LoopInstruction::ContinueAndSkip(skip)
2829 });
2830
2831 assert_eq!(&indices[..], expected);
2832 }
2833
2834 #[test]
2835 fn simple_no_match() {
2836 check_expected_indices("abcdef0123456789".as_bytes(), &[], 0);
2837 }
2838
2839 #[test]
2840 fn simple_match() {
2841 check_expected_indices("*bcd&f0123456789".as_bytes(), &[0, 4], 0);
2842 }
2843
2844 #[test]
2845 fn single_open_fish() {
2846 check_expected_indices("<".as_bytes(), &[0], 0);
2847 }
2848
2849 #[test]
2850 fn long_match() {
2851 check_expected_indices("0123456789abcde~*bcd&f0".as_bytes(), &[15, 16, 20], 0);
2852 }
2853
2854 #[test]
2855 fn border_skip() {
2856 check_expected_indices("0123456789abcde~~~~d&f0".as_bytes(), &[15, 20], 3);
2857 }
2858
2859 #[test]
2860 fn exhaustive_search() {
2861 let chars = [
2862 b'\n', b'\r', b'*', b'_', b'~', b'|', b'&', b'\\', b'[', b']', b'<', b'!', b'`',
2863 b'$', b'{', b'}',
2864 ];
2865
2866 for &c in &chars {
2867 for i in 0u8..=255 {
2868 if !chars.contains(&i) {
2869 let mut buf = [i; 18];
2871 buf[3] = c;
2872 buf[6] = c;
2873
2874 check_expected_indices(&buf[..], &[3, 6], 0);
2875 }
2876 }
2877 }
2878 }
2879 }
2880}