ropey/
rope_builder.rs

1use std::sync::Arc;
2
3use smallvec::SmallVec;
4
5use crate::crlf;
6use crate::rope::Rope;
7use crate::tree::{Node, NodeChildren, NodeText, MAX_BYTES, MAX_CHILDREN, MIN_BYTES};
8
9/// An efficient incremental `Rope` builder.
10///
11/// This is used to efficiently build ropes from sequences of text
12/// chunks.  It is useful for creating ropes from:
13///
14/// - ...large text files, without pre-loading their entire contents into
15///   memory (but see [`from_reader()`](Rope::from_reader) for a convenience
16///   function that does this for casual use-cases).
17/// - ...streaming data sources.
18/// - ...non-utf8 text data, doing the encoding conversion incrementally
19///   as you go.
20///
21/// Unlike repeatedly calling `Rope::insert()` on the end of a rope,
22/// this API runs in time linear to the amount of data fed to it, and
23/// is overall much faster.
24///
25/// # Example
26/// ```
27/// # use ropey::RopeBuilder;
28/// #
29/// let mut builder = RopeBuilder::new();
30///
31/// builder.append("Hello ");
32/// builder.append("world!\n");
33/// builder.append("How's ");
34/// builder.append("it goin");
35/// builder.append("g?");
36///
37/// let rope = builder.finish();
38///
39/// assert_eq!(rope, "Hello world!\nHow's it going?");
40/// ```
41#[derive(Debug, Clone)]
42pub struct RopeBuilder {
43    stack: SmallVec<[Arc<Node>; 4]>,
44    buffer: String,
45    last_chunk_len_bytes: usize,
46}
47
48impl RopeBuilder {
49    /// Creates a new RopeBuilder, ready for input.
50    pub fn new() -> Self {
51        RopeBuilder {
52            stack: {
53                let mut stack = SmallVec::new();
54                stack.push(Arc::new(Node::new()));
55                stack
56            },
57            buffer: String::new(),
58            last_chunk_len_bytes: 0,
59        }
60    }
61
62    /// Appends `chunk` to the end of the in-progress `Rope`.
63    ///
64    /// Call this method repeatedly to incrementally build up a
65    /// `Rope`.  The passed text chunk can be as large or small as
66    /// desired, but larger chunks are more efficient.
67    ///
68    /// `chunk` must be valid utf8 text.
69    pub fn append(&mut self, chunk: &str) {
70        self.append_internal(chunk, false);
71    }
72
73    /// Finishes the build, and returns the `Rope`.
74    ///
75    /// Note: this method consumes the builder.  If you want to continue
76    /// building other ropes with the same prefix, you can clone the builder
77    /// before calling `finish()`.
78    pub fn finish(mut self) -> Rope {
79        // Append the last leaf
80        self.append_internal("", true);
81        self.finish_internal(true)
82    }
83
84    /// Builds a rope all at once from a single string slice.
85    ///
86    /// This avoids the creation and use of the internal buffer.  This is
87    /// for internal use only, because the public-facing API has
88    /// Rope::from_str(), which actually uses this for its implementation.
89    pub(crate) fn build_at_once(mut self, chunk: &str) -> Rope {
90        self.append_internal(chunk, true);
91        self.finish_internal(true)
92    }
93
94    /// NOT PART OF THE PUBLIC API (hidden from docs for a reason!).
95    ///
96    /// Appends `contents` to the in-progress rope as a single leaf
97    /// node (chunk).  This is useful for building ropes with specific
98    /// chunk configurations for testing purposes.  It will happily append
99    /// both empty and more-than-max-size chunks.
100    ///
101    /// This makes no attempt to be consistent with the standard `append()`
102    /// method, and should not be used in conjunction with it.
103    #[doc(hidden)]
104    pub fn _append_chunk(&mut self, contents: &str) {
105        self.append_leaf_node(Arc::new(Node::Leaf(NodeText::from_str(contents))));
106    }
107
108    /// NOT PART OF THE PUBLIC API (hidden from docs for a reason!).
109    ///
110    /// Finishes the build without doing any tree fixing to adhere
111    /// to the btree invariants. To be used with `_append_chunk()` to
112    /// construct ropes with specific chunk boundaries for testing.
113    #[doc(hidden)]
114    pub fn _finish_no_fix(self) -> Rope {
115        self.finish_internal(false)
116    }
117
118    //-----------------------------------------------------------------
119
120    // Internal workings of `append()`.
121    fn append_internal(&mut self, chunk: &str, is_last_chunk: bool) {
122        let mut chunk = chunk;
123
124        // Repeatedly chop text off the end of the input, creating
125        // leaf nodes out of them and appending them to the tree.
126        while !chunk.is_empty() || (!self.buffer.is_empty() && is_last_chunk) {
127            // Get the text for the next leaf
128            let (leaf_text, remainder) = self.get_next_leaf_text(chunk, is_last_chunk);
129            chunk = remainder;
130
131            self.last_chunk_len_bytes = chunk.len();
132
133            // Append the leaf to the rope
134            match leaf_text {
135                NextText::None => break,
136                NextText::UseBuffer => {
137                    let leaf_text = NodeText::from_str(&self.buffer);
138                    self.append_leaf_node(Arc::new(Node::Leaf(leaf_text)));
139                    self.buffer.clear();
140                }
141                NextText::String(s) => {
142                    self.append_leaf_node(Arc::new(Node::Leaf(NodeText::from_str(s))));
143                }
144            }
145        }
146    }
147
148    // Internal workings of `finish()`.
149    //
150    // When `fix_tree` is false, the resulting node tree is NOT fixed up
151    // to adhere to the btree invariants.  This is useful for some testing
152    // code.  But generally, `fix_tree` should be set to true.
153    fn finish_internal(mut self, fix_tree: bool) -> Rope {
154        // Zip up all the remaining nodes on the stack
155        let mut stack_idx = self.stack.len() - 1;
156        while stack_idx >= 1 {
157            let node = self.stack.pop().unwrap();
158            if let Node::Internal(ref mut children) = *Arc::make_mut(&mut self.stack[stack_idx - 1])
159            {
160                children.push((node.text_info(), node));
161            } else {
162                unreachable!();
163            }
164            stack_idx -= 1;
165        }
166
167        // Create the rope.
168        let mut rope = Rope {
169            root: self.stack.pop().unwrap(),
170        };
171
172        // Fix up the tree to be well-formed.
173        if fix_tree {
174            Arc::make_mut(&mut rope.root).zip_fix_right();
175            if self.last_chunk_len_bytes < MIN_BYTES
176                && self.last_chunk_len_bytes != rope.len_bytes()
177            {
178                // Merge the last chunk if it was too small.
179                let idx = rope.len_chars()
180                    - rope.byte_to_char(rope.len_bytes() - self.last_chunk_len_bytes);
181                Arc::make_mut(&mut rope.root).fix_tree_seam(idx);
182            }
183            rope.pull_up_singular_nodes();
184        }
185
186        return rope;
187    }
188
189    // Returns (next_leaf_text, remaining_text)
190    #[inline(always)]
191    fn get_next_leaf_text<'a>(
192        &mut self,
193        text: &'a str,
194        is_last_chunk: bool,
195    ) -> (NextText<'a>, &'a str) {
196        assert!(
197            self.buffer.len() < MAX_BYTES,
198            "RopeBuilder: buffer is already full when receiving a chunk! \
199             This should never happen!",
200        );
201
202        // Simplest case: empty buffer and enough in `text` for a full
203        // chunk, so just chop a chunk off from `text` and use that.
204        if self.buffer.is_empty() && text.len() >= MAX_BYTES {
205            let split_idx = crlf::find_good_split(
206                MAX_BYTES.min(text.len() - 1), // - 1 to avoid CRLF split.
207                text.as_bytes(),
208                true,
209            );
210            return (NextText::String(&text[..split_idx]), &text[split_idx..]);
211        }
212        // If the buffer + `text` is enough for a full chunk, push enough
213        // of `text` onto the buffer to fill it and use that.
214        else if (text.len() + self.buffer.len()) >= MAX_BYTES {
215            let mut split_idx =
216                crlf::find_good_split(MAX_BYTES - self.buffer.len(), text.as_bytes(), true);
217            if split_idx == text.len() && text.as_bytes()[text.len() - 1] == 0x0D {
218                // Avoid CRLF split.
219                split_idx -= 1;
220            };
221            self.buffer.push_str(&text[..split_idx]);
222            return (NextText::UseBuffer, &text[split_idx..]);
223        }
224        // If we don't have enough text for a full chunk.
225        else {
226            // If it's our last chunk, wrap it all up!
227            if is_last_chunk {
228                if self.buffer.is_empty() {
229                    return if text.is_empty() {
230                        (NextText::None, "")
231                    } else {
232                        (NextText::String(text), "")
233                    };
234                } else {
235                    self.buffer.push_str(text);
236                    return (NextText::UseBuffer, "");
237                }
238            }
239            // Otherwise, just push to the buffer.
240            else {
241                self.buffer.push_str(text);
242                return (NextText::None, "");
243            }
244        }
245    }
246
247    fn append_leaf_node(&mut self, leaf: Arc<Node>) {
248        let last = self.stack.pop().unwrap();
249        match *last {
250            Node::Leaf(_) => {
251                if last.leaf_text().is_empty() {
252                    self.stack.push(leaf);
253                } else {
254                    let mut children = NodeChildren::new();
255                    children.push((last.text_info(), last));
256                    children.push((leaf.text_info(), leaf));
257                    self.stack.push(Arc::new(Node::Internal(children)));
258                }
259            }
260
261            Node::Internal(_) => {
262                self.stack.push(last);
263                let mut left = leaf;
264                let mut stack_idx = (self.stack.len() - 1) as isize;
265                loop {
266                    if stack_idx < 0 {
267                        // We're above the root, so do a root split.
268                        let mut children = NodeChildren::new();
269                        children.push((left.text_info(), left));
270                        self.stack.insert(0, Arc::new(Node::Internal(children)));
271                        break;
272                    } else if self.stack[stack_idx as usize].child_count() < (MAX_CHILDREN - 1) {
273                        // There's room to add a child, so do that.
274                        Arc::make_mut(&mut self.stack[stack_idx as usize])
275                            .children_mut()
276                            .push((left.text_info(), left));
277                        break;
278                    } else {
279                        // Not enough room to fit a child, so split.
280                        left = Arc::new(Node::Internal(
281                            Arc::make_mut(&mut self.stack[stack_idx as usize])
282                                .children_mut()
283                                .push_split((left.text_info(), left)),
284                        ));
285                        std::mem::swap(&mut left, &mut self.stack[stack_idx as usize]);
286                        stack_idx -= 1;
287                    }
288                }
289            }
290        }
291    }
292}
293
294impl Default for RopeBuilder {
295    fn default() -> Self {
296        Self::new()
297    }
298}
299
300enum NextText<'a> {
301    None,
302    UseBuffer,
303    String(&'a str),
304}
305
306//===========================================================================
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    // 127 bytes, 103 chars, 4 lines
313    const TEXT: &str = "Hello there!  How're you doing?\r\nIt's \
314                        a fine day, isn't it?\r\nAren't you glad \
315                        we're alive?\r\nこんにちは、みんなさん!";
316
317    #[test]
318    fn rope_builder_01() {
319        let mut b = RopeBuilder::new();
320
321        b.append("Hello there!  How're you doing?\r");
322        b.append("\nIt's a fine ");
323        b.append("d");
324        b.append("a");
325        b.append("y,");
326        b.append(" ");
327        b.append("isn't it?");
328        b.append("\r");
329        b.append("\nAren't you ");
330        b.append("glad we're alive?\r");
331        b.append("\n");
332        b.append("こんにち");
333        b.append("は、みんなさ");
334        b.append("ん!");
335
336        let r = b.finish();
337
338        assert_eq!(r, TEXT);
339
340        r.assert_integrity();
341        r.assert_invariants();
342    }
343
344    #[test]
345    fn rope_builder_default_01() {
346        let mut b = RopeBuilder::default();
347
348        b.append("Hello there!  How're you doing?\r");
349        b.append("\nIt's a fine day, isn't it?\r\nAren't you ");
350        b.append("glad we're alive?\r\nこんにちは、みんなさん!");
351
352        let r = b.finish();
353
354        assert_eq!(r, TEXT);
355
356        r.assert_integrity();
357        r.assert_invariants();
358    }
359}