pulldown_cmark/
utils.rs

1//! Miscellaneous utilities to increase comfort.
2//! Special thanks to:
3//!
4//! - <https://github.com/BenjaminRi/Redwood-Wiki/blob/master/src/markdown_utils.rs>.
5//! Its author authorized the use of this GPL code in this project in
6//! <https://github.com/raphlinus/pulldown-cmark/issues/507>.
7//!
8//! - <https://gist.github.com/rambip/a507c312ed61c99c24b2a54f98325721>.
9//! Its author proposed the solution in
10//! <https://github.com/raphlinus/pulldown-cmark/issues/708>.
11
12use crate::{CowStr, Event};
13use std::ops::Range;
14
15/// Merge consecutive `Event::Text` events into only one.
16#[derive(Debug)]
17pub struct TextMergeStream<'a, I> {
18    inner: TextMergeWithOffset<'a, DummyOffsets<I>>,
19}
20
21impl<'a, I> TextMergeStream<'a, I>
22where
23    I: Iterator<Item = Event<'a>>,
24{
25    pub fn new(iter: I) -> Self {
26        Self {
27            inner: TextMergeWithOffset::new(DummyOffsets(iter)),
28        }
29    }
30}
31
32impl<'a, I> Iterator for TextMergeStream<'a, I>
33where
34    I: Iterator<Item = Event<'a>>,
35{
36    type Item = Event<'a>;
37
38    fn next(&mut self) -> Option<Self::Item> {
39        self.inner.next().map(|(event, _)| event)
40    }
41}
42
43#[derive(Debug)]
44struct DummyOffsets<I>(I);
45
46impl<'a, I> Iterator for DummyOffsets<I>
47where
48    I: Iterator<Item = Event<'a>>,
49{
50    type Item = (Event<'a>, Range<usize>);
51
52    fn next(&mut self) -> Option<Self::Item> {
53        self.0.next().map(|event| (event, 0..0))
54    }
55}
56
57/// Merge consecutive `Event::Text` events into only one, with offsets.
58///
59/// Compatible with with [`OffsetIter`](crate::OffsetIter).
60#[derive(Debug)]
61pub struct TextMergeWithOffset<'a, I> {
62    iter: I,
63    last_event: Option<(Event<'a>, Range<usize>)>,
64}
65
66impl<'a, I> TextMergeWithOffset<'a, I>
67where
68    I: Iterator<Item = (Event<'a>, Range<usize>)>,
69{
70    pub fn new(iter: I) -> Self {
71        Self {
72            iter,
73            last_event: None,
74        }
75    }
76}
77
78impl<'a, I> Iterator for TextMergeWithOffset<'a, I>
79where
80    I: Iterator<Item = (Event<'a>, Range<usize>)>,
81{
82    type Item = (Event<'a>, Range<usize>);
83
84    fn next(&mut self) -> Option<Self::Item> {
85        match (self.last_event.take(), self.iter.next()) {
86            (
87                Some((Event::Text(last_text), last_offset)),
88                Some((Event::Text(next_text), next_offset)),
89            ) => {
90                // We need to start merging consecutive text events together into one
91                let mut string_buf: String = last_text.into_string();
92                string_buf.push_str(&next_text);
93                let mut offset = last_offset;
94                offset.end = next_offset.end;
95                loop {
96                    // Avoid recursion to avoid stack overflow and to optimize concatenation
97                    match self.iter.next() {
98                        Some((Event::Text(next_text), next_offset)) => {
99                            string_buf.push_str(&next_text);
100                            offset.end = next_offset.end;
101                        }
102                        next_event => {
103                            self.last_event = next_event;
104                            if string_buf.is_empty() {
105                                // Discard text event(s) altogether if there is no text
106                                break self.next();
107                            } else {
108                                break Some((
109                                    Event::Text(CowStr::Boxed(string_buf.into_boxed_str())),
110                                    offset,
111                                ));
112                            }
113                        }
114                    }
115                }
116            }
117            (None, Some(next_event)) => {
118                // This only happens once during the first iteration and if there are items
119                self.last_event = Some(next_event);
120                self.next()
121            }
122            (None, None) => {
123                // This happens when the iterator is depleted
124                None
125            }
126            (last_event, next_event) => {
127                // The ordinary case, emit one event after the other without modification
128                self.last_event = next_event;
129                last_event
130            }
131        }
132    }
133}
134
135#[cfg(test)]
136mod test {
137    use super::*;
138    use crate::Parser;
139
140    #[test]
141    fn text_merge_stream_indent() {
142        let source = r#"
143    first line
144    second line
145"#;
146        let parser = TextMergeStream::new(Parser::new(source));
147        let text_events: Vec<_> = parser.filter(|e| matches!(e, Event::Text(_))).collect();
148        assert_eq!(
149            text_events,
150            [Event::Text("first line\nsecond line\n".into())]
151        );
152    }
153
154    #[test]
155    fn text_merge_with_offset_indent() {
156        let source = r#"
157    first line
158    second line
159"#;
160        let parser = TextMergeWithOffset::new(Parser::new(source).into_offset_iter());
161        let text_events: Vec<_> = parser
162            .filter(|e| matches!(e, (Event::Text(_), _)))
163            .collect();
164        assert_eq!(
165            text_events,
166            [(Event::Text("first line\nsecond line\n".into()), 5..32)]
167        );
168    }
169
170    #[test]
171    fn text_merge_empty_is_discarded() {
172        let events = [
173            Event::Rule,
174            Event::Text("".into()),
175            Event::Text("".into()),
176            Event::Rule,
177        ];
178        let result: Vec<_> = TextMergeStream::new(events.into_iter()).collect();
179        assert_eq!(result, [Event::Rule, Event::Rule]);
180    }
181}