ropey/tree/
node.rs

1use std::sync::Arc;
2
3use crate::str_utils::{
4    byte_to_char_idx, byte_to_line_idx, byte_to_utf16_surrogate_idx, char_to_byte_idx,
5};
6use crate::tree::node_text::fix_segment_seam;
7use crate::tree::{
8    Count, NodeChildren, NodeText, TextInfo, MAX_BYTES, MAX_CHILDREN, MIN_BYTES, MIN_CHILDREN,
9};
10
11#[derive(Debug, Clone)]
12#[repr(u8, C)]
13pub(crate) enum Node {
14    Leaf(NodeText),
15    Internal(NodeChildren),
16}
17
18impl Node {
19    /// Creates an empty node.
20    #[inline(always)]
21    pub fn new() -> Self {
22        Node::Leaf(NodeText::from_str(""))
23    }
24
25    /// Total number of bytes in the Rope.
26    #[inline(always)]
27    pub fn byte_count(&self) -> usize {
28        self.text_info().bytes as usize
29    }
30
31    /// Total number of chars in the Rope.
32    #[inline(always)]
33    pub fn char_count(&self) -> usize {
34        self.text_info().chars as usize
35    }
36
37    /// Total number of line breaks in the Rope.
38    #[inline(always)]
39    pub fn line_break_count(&self) -> usize {
40        self.text_info().line_breaks as usize
41    }
42
43    /// Total number of line breaks in the Rope.
44    #[inline(always)]
45    pub fn utf16_surrogate_count(&self) -> usize {
46        self.text_info().utf16_surrogates as usize
47    }
48
49    /// Fetches a chunk mutably, and allows it to be edited via a closure.
50    ///
51    /// There are three parameters:
52    /// - char_idx: the chunk that contains this char is fetched,
53    /// - node_info: this is the text info of the node it's being called on.
54    ///              This makes it a little awkward to call, but is needed since
55    ///              it's actually the parent node that contains the text info,
56    ///              so the info needs to be passed in.
57    /// - edit: the closure that receives the chunk and does the edits.
58    ///
59    /// The closure is effectively the termination case for the recursion,
60    /// and takes essentially same parameters and returns the same things as
61    /// the method itself.  In particular, the closure receives the char offset
62    /// of char_idx within the given chunk and the TextInfo of the chunk.
63    /// The main difference is that it receives a NodeText instead of a node.
64    ///
65    /// The closure is expected to return the updated text info of the node,
66    /// and if the node had to be split, then it also returns the right-hand
67    /// node along with its TextInfo as well.
68    ///
69    /// The main method call will then return the total updated TextInfo for
70    /// the whole tree, and a new node only if the whole tree had to be split.
71    /// It is up to the caller to check for that new node, and handle it by
72    /// creating a new root with both the original node and the new node as
73    /// children.
74    pub fn edit_chunk_at_char<F>(
75        &mut self,
76        char_idx: usize,
77        node_info: TextInfo,
78        mut edit: F,
79    ) -> (TextInfo, Option<(TextInfo, Arc<Node>)>)
80    where
81        F: FnMut(usize, TextInfo, &mut NodeText) -> (TextInfo, Option<(TextInfo, Arc<Node>)>),
82    {
83        match *self {
84            Node::Leaf(ref mut leaf_text) => edit(char_idx, node_info, leaf_text),
85            Node::Internal(ref mut children) => {
86                // Compact leaf children if we're very close to maximum leaf
87                // fragmentation.  This basically guards against excessive memory
88                // ballooning when repeatedly appending to the end of a rope.
89                // The constant here was arrived at experimentally, and is otherwise
90                // fairly arbitrary.
91                const FRAG_MIN_BYTES: usize = (MAX_BYTES * MIN_CHILDREN) + (MAX_BYTES / 32);
92                if children.is_full()
93                    && children.nodes()[0].is_leaf()
94                    && (children.combined_info().bytes as usize) < FRAG_MIN_BYTES
95                {
96                    children.compact_leaves();
97                }
98
99                // Find the child we care about.
100                let (child_i, acc_char_idx) = children.search_char_idx_only(char_idx);
101                let info = children.info()[child_i];
102
103                // Recurse into the child.
104                let (l_info, residual) = Arc::make_mut(&mut children.nodes_mut()[child_i])
105                    .edit_chunk_at_char(char_idx - acc_char_idx, info, edit);
106                children.info_mut()[child_i] = l_info;
107
108                // Handle the residual node if there is one and return.
109                if let Some((r_info, r_node)) = residual {
110                    if children.len() < MAX_CHILDREN {
111                        children.insert(child_i + 1, (r_info, r_node));
112                        (node_info - info + l_info + r_info, None)
113                    } else {
114                        let r = children.insert_split(child_i + 1, (r_info, r_node));
115                        let r_info = r.combined_info();
116                        (
117                            children.combined_info(),
118                            Some((r_info, Arc::new(Node::Internal(r)))),
119                        )
120                    }
121                } else {
122                    (node_info - info + l_info, None)
123                }
124            }
125        }
126    }
127
128    /// Removes chars in the range `start_idx..end_idx`.
129    ///
130    /// Returns (in this order):
131    /// - The updated TextInfo for the node.
132    /// - Whether there's a possible CRLF seam that needs fixing.
133    /// - Whether fix_tree_seam() needs to be run after this.
134    ///
135    /// WARNING: does not correctly handle all text being removed.  That
136    /// should be special-cased in calling code.
137    pub fn remove_char_range(
138        &mut self,
139        start_idx: usize,
140        end_idx: usize,
141        node_info: TextInfo,
142    ) -> (TextInfo, bool, bool) {
143        if start_idx == end_idx {
144            return (node_info, false, false);
145        }
146
147        match *self {
148            // If it's a leaf
149            Node::Leaf(ref mut leaf_text) => {
150                let byte_start = char_to_byte_idx(leaf_text, start_idx);
151                let byte_end =
152                    byte_start + char_to_byte_idx(&leaf_text[byte_start..], end_idx - start_idx);
153
154                // Remove text and calculate new info & seam info
155                if byte_start > 0 || byte_end < leaf_text.len() {
156                    let seam = (byte_start == 0 && leaf_text.as_bytes()[byte_end] == 0x0A)
157                        || (byte_end == leaf_text.len()
158                            && leaf_text.as_bytes()[byte_start - 1] == 0x0D);
159
160                    let seg_len = byte_end - byte_start; // Length of removal segement
161                    if seg_len < (leaf_text.len() - seg_len) {
162                        #[allow(unused_mut)]
163                        let mut info =
164                            node_info - TextInfo::from_str(&leaf_text[byte_start..byte_end]);
165
166                        // Check for CRLF pairs on the removal seams, and
167                        // adjust line break counts accordingly.
168                        #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
169                        {
170                            if byte_end < leaf_text.len()
171                                && leaf_text.as_bytes()[byte_end - 1] == 0x0D
172                                && leaf_text.as_bytes()[byte_end] == 0x0A
173                            {
174                                info.line_breaks += 1;
175                            }
176                            if byte_start > 0 && leaf_text.as_bytes()[byte_start - 1] == 0x0D {
177                                if leaf_text.as_bytes()[byte_start] == 0x0A {
178                                    info.line_breaks += 1;
179                                }
180                                if byte_end < leaf_text.len()
181                                    && leaf_text.as_bytes()[byte_end] == 0x0A
182                                {
183                                    info.line_breaks -= 1;
184                                }
185                            }
186                        }
187
188                        // Remove the text
189                        leaf_text.remove_range(byte_start, byte_end);
190
191                        (info, seam, false)
192                    } else {
193                        // Remove the text
194                        leaf_text.remove_range(byte_start, byte_end);
195
196                        (TextInfo::from_str(leaf_text), seam, false)
197                    }
198                } else {
199                    // Remove all of the text
200                    leaf_text.remove_range(byte_start, byte_end);
201
202                    (TextInfo::new(), true, false)
203                }
204            }
205
206            // If it's internal, it's much more complicated
207            Node::Internal(ref mut children) => {
208                // Shared code for handling children.
209                // Returns (in this order):
210                // - Whether there's a possible CRLF seam that needs fixing.
211                // - Whether the tree may need invariant fixing.
212                // - Updated TextInfo of the node.
213                let handle_child = |children: &mut NodeChildren,
214                                    child_i: usize,
215                                    c_char_acc: usize|
216                 -> (bool, bool, TextInfo) {
217                    // Recurse into child
218                    let tmp_info = children.info()[child_i];
219                    let tmp_chars = children.info()[child_i].chars as usize;
220                    let (new_info, seam, needs_fix) =
221                        Arc::make_mut(&mut children.nodes_mut()[child_i]).remove_char_range(
222                            start_idx - c_char_acc.min(start_idx),
223                            (end_idx - c_char_acc).min(tmp_chars),
224                            tmp_info,
225                        );
226
227                    // Handle result
228                    if new_info.bytes == 0 {
229                        children.remove(child_i);
230                    } else {
231                        children.info_mut()[child_i] = new_info;
232                    }
233
234                    (seam, needs_fix, new_info)
235                };
236
237                // Shared code for merging children
238                let merge_child = |children: &mut NodeChildren, child_i: usize| {
239                    if child_i < children.len()
240                        && children.len() > 1
241                        && children.nodes()[child_i].is_undersized()
242                    {
243                        if child_i == 0 {
244                            children.merge_distribute(child_i, child_i + 1);
245                        } else {
246                            children.merge_distribute(child_i - 1, child_i);
247                        }
248                    }
249                };
250
251                // Get child info for the two char indices
252                let ((l_child_i, l_char_acc), (r_child_i, r_char_acc)) =
253                    children.search_char_idx_range(start_idx, end_idx);
254
255                // Both indices point into the same child
256                if l_child_i == r_child_i {
257                    let info = children.info()[l_child_i];
258                    let (seam, mut needs_fix, new_info) =
259                        handle_child(children, l_child_i, l_char_acc);
260
261                    if children.len() > 0 {
262                        merge_child(children, l_child_i);
263
264                        // If we couldn't get all children >= minimum size, then
265                        // we'll need to fix that later.
266                        if children.nodes()[l_child_i.min(children.len() - 1)].is_undersized() {
267                            needs_fix = true;
268                        }
269                    }
270
271                    return (node_info - info + new_info, seam, needs_fix);
272                }
273                // We're dealing with more than one child.
274                else {
275                    let mut needs_fix = false;
276
277                    // Calculate the start..end range of nodes to be removed.
278                    let r_child_exists: bool;
279                    let start_i = l_child_i + 1;
280                    let end_i = if r_char_acc + children.info()[r_child_i].chars as usize == end_idx
281                    {
282                        r_child_exists = false;
283                        r_child_i + 1
284                    } else {
285                        r_child_exists = true;
286                        r_child_i
287                    };
288
289                    // Remove the children
290                    for _ in start_i..end_i {
291                        children.remove(start_i);
292                    }
293
294                    // Handle right child
295                    if r_child_exists {
296                        let (_, fix, _) = handle_child(children, l_child_i + 1, r_char_acc);
297                        needs_fix |= fix;
298                    }
299
300                    // Handle left child
301                    let (seam, fix, _) = handle_child(children, l_child_i, l_char_acc);
302                    needs_fix |= fix;
303
304                    if children.len() > 0 {
305                        // Handle merging
306                        let merge_extent = 1 + if r_child_exists { 1 } else { 0 };
307                        for i in (l_child_i..(l_child_i + merge_extent)).rev() {
308                            merge_child(children, i);
309                        }
310
311                        // If we couldn't get all children >= minimum size, then
312                        // we'll need to fix that later.
313                        if children.nodes()[l_child_i.min(children.len() - 1)].is_undersized() {
314                            needs_fix = true;
315                        }
316                    }
317
318                    // Return
319                    return (children.combined_info(), seam, needs_fix);
320                }
321            }
322        }
323    }
324
325    pub fn append_at_depth(&mut self, other: Arc<Node>, depth: usize) -> Option<Arc<Node>> {
326        if depth == 0 {
327            match *self {
328                Node::Leaf(_) => {
329                    if !other.is_leaf() {
330                        panic!("Tree-append siblings have differing types.");
331                    } else {
332                        return Some(other);
333                    }
334                }
335                Node::Internal(ref mut children_l) => {
336                    let mut other = other;
337                    if let Node::Internal(ref mut children_r) = *Arc::make_mut(&mut other) {
338                        if (children_l.len() + children_r.len()) <= MAX_CHILDREN {
339                            for _ in 0..children_r.len() {
340                                children_l.push(children_r.remove(0));
341                            }
342                            return None;
343                        } else {
344                            children_l.distribute_with(children_r);
345                            // Return lower down, to avoid borrow-checker.
346                        }
347                    } else {
348                        panic!("Tree-append siblings have differing types.");
349                    }
350                    return Some(other);
351                }
352            }
353        } else if let Node::Internal(ref mut children) = *self {
354            let last_i = children.len() - 1;
355            let residual =
356                Arc::make_mut(&mut children.nodes_mut()[last_i]).append_at_depth(other, depth - 1);
357            children.update_child_info(last_i);
358            if let Some(extra_node) = residual {
359                if children.len() < MAX_CHILDREN {
360                    children.push((extra_node.text_info(), extra_node));
361                    return None;
362                } else {
363                    let r_children = children.push_split((extra_node.text_info(), extra_node));
364                    return Some(Arc::new(Node::Internal(r_children)));
365                }
366            } else {
367                return None;
368            }
369        } else {
370            panic!("Reached leaf before getting to target depth.");
371        }
372    }
373
374    pub fn prepend_at_depth(&mut self, other: Arc<Node>, depth: usize) -> Option<Arc<Node>> {
375        if depth == 0 {
376            match *self {
377                Node::Leaf(_) => {
378                    if !other.is_leaf() {
379                        panic!("Tree-append siblings have differing types.");
380                    } else {
381                        return Some(other);
382                    }
383                }
384                Node::Internal(ref mut children_r) => {
385                    let mut other = other;
386                    if let Node::Internal(ref mut children_l) = *Arc::make_mut(&mut other) {
387                        if (children_l.len() + children_r.len()) <= MAX_CHILDREN {
388                            for _ in 0..children_l.len() {
389                                children_r.insert(0, children_l.pop());
390                            }
391                            return None;
392                        } else {
393                            children_l.distribute_with(children_r);
394                            // Return lower down, to avoid borrow-checker.
395                        }
396                    } else {
397                        panic!("Tree-append siblings have differing types.");
398                    }
399                    return Some(other);
400                }
401            }
402        } else if let Node::Internal(ref mut children) = *self {
403            let residual =
404                Arc::make_mut(&mut children.nodes_mut()[0]).prepend_at_depth(other, depth - 1);
405            children.update_child_info(0);
406            if let Some(extra_node) = residual {
407                if children.len() < MAX_CHILDREN {
408                    children.insert(0, (extra_node.text_info(), extra_node));
409                    return None;
410                } else {
411                    let mut r_children =
412                        children.insert_split(0, (extra_node.text_info(), extra_node));
413                    std::mem::swap(children, &mut r_children);
414                    return Some(Arc::new(Node::Internal(r_children)));
415                }
416            } else {
417                return None;
418            }
419        } else {
420            panic!("Reached leaf before getting to target depth.");
421        }
422    }
423
424    /// Splits the `Node` at char index `char_idx`, returning
425    /// the right side of the split.
426    pub fn split(&mut self, char_idx: usize) -> Node {
427        debug_assert!(char_idx != 0);
428        debug_assert!(char_idx != (self.text_info().chars as usize));
429        match *self {
430            Node::Leaf(ref mut text) => {
431                let byte_idx = char_to_byte_idx(text, char_idx);
432                Node::Leaf(text.split_off(byte_idx))
433            }
434            Node::Internal(ref mut children) => {
435                let (child_i, acc_info) = children.search_char_idx(char_idx);
436                let child_info = children.info()[child_i];
437
438                if char_idx == acc_info.chars as usize {
439                    Node::Internal(children.split_off(child_i))
440                } else if char_idx == (acc_info.chars as usize + child_info.chars as usize) {
441                    Node::Internal(children.split_off(child_i + 1))
442                } else {
443                    let mut r_children = children.split_off(child_i + 1);
444
445                    // Recurse
446                    let r_node = Arc::make_mut(&mut children.nodes_mut()[child_i])
447                        .split(char_idx - acc_info.chars as usize);
448
449                    r_children.insert(0, (r_node.text_info(), Arc::new(r_node)));
450
451                    children.update_child_info(child_i);
452                    r_children.update_child_info(0);
453
454                    Node::Internal(r_children)
455                }
456            }
457        }
458    }
459
460    /// Returns the chunk that contains the given byte, and the TextInfo
461    /// corresponding to the start of the chunk.
462    pub fn get_chunk_at_byte(&self, byte_idx: usize) -> (&str, TextInfo) {
463        let mut node = self;
464        let mut byte_idx = byte_idx;
465        let mut info = TextInfo::new();
466
467        loop {
468            match *node {
469                Node::Leaf(ref text) => {
470                    return (text, info);
471                }
472                Node::Internal(ref children) => {
473                    let (child_i, acc_info) = children.search_byte_idx(byte_idx);
474                    info += acc_info;
475                    node = &*children.nodes()[child_i];
476                    byte_idx -= acc_info.bytes as usize;
477                }
478            }
479        }
480    }
481
482    /// Returns the chunk that contains the given char, and the TextInfo
483    /// corresponding to the start of the chunk.
484    pub fn get_chunk_at_char(&self, char_idx: usize) -> (&str, TextInfo) {
485        let mut node = self;
486        let mut char_idx = char_idx;
487        let mut info = TextInfo::new();
488
489        loop {
490            match *node {
491                Node::Leaf(ref text) => {
492                    return (text, info);
493                }
494                Node::Internal(ref children) => {
495                    let (child_i, acc_info) = children.search_char_idx(char_idx);
496                    info += acc_info;
497                    node = &*children.nodes()[child_i];
498                    char_idx -= acc_info.chars as usize;
499                }
500            }
501        }
502    }
503
504    /// Returns the chunk that contains the given utf16 code unit, and the
505    /// TextInfo corresponding to the start of the chunk.
506    pub fn get_chunk_at_utf16_code_unit(&self, utf16_idx: usize) -> (&str, TextInfo) {
507        let mut node = self;
508        let mut utf16_idx = utf16_idx;
509        let mut info = TextInfo::new();
510
511        loop {
512            match *node {
513                Node::Leaf(ref text) => {
514                    return (text, info);
515                }
516                Node::Internal(ref children) => {
517                    let (child_i, acc_info) = children.search_utf16_code_unit_idx(utf16_idx);
518                    info += acc_info;
519                    node = &*children.nodes()[child_i];
520                    utf16_idx -= (acc_info.chars + acc_info.utf16_surrogates) as usize;
521                }
522            }
523        }
524    }
525
526    /// Returns the chunk that contains the given line break, and the TextInfo
527    /// corresponding to the start of the chunk.
528    ///
529    /// Note: for convenience, both the beginning and end of the rope are
530    /// considered line breaks for indexing.
531    pub fn get_chunk_at_line_break(&self, line_break_idx: usize) -> (&str, TextInfo) {
532        let mut node = self;
533        let mut line_break_idx = line_break_idx;
534        let mut info = TextInfo::new();
535
536        loop {
537            match *node {
538                Node::Leaf(ref text) => {
539                    return (text, info);
540                }
541                Node::Internal(ref children) => {
542                    let (child_i, acc_info) = children.search_line_break_idx(line_break_idx);
543                    info += acc_info;
544                    node = &*children.nodes()[child_i];
545                    line_break_idx -= acc_info.line_breaks as usize;
546                }
547            }
548        }
549    }
550
551    /// Returns the TextInfo at the given char index.
552    #[inline(always)]
553    pub fn char_to_text_info(&self, char_idx: usize) -> TextInfo {
554        let (chunk, info) = self.get_chunk_at_char(char_idx);
555        let bi = char_to_byte_idx(chunk, char_idx - info.chars as usize);
556        TextInfo {
557            bytes: info.bytes + bi as Count,
558            chars: char_idx as Count,
559            utf16_surrogates: info.utf16_surrogates
560                + byte_to_utf16_surrogate_idx(chunk, bi) as Count,
561            line_breaks: info.line_breaks + byte_to_line_idx(chunk, bi) as Count,
562        }
563    }
564
565    /// Returns the TextInfo at the given byte index.
566    #[inline(always)]
567    pub fn byte_to_text_info(&self, byte_idx: usize) -> TextInfo {
568        let (chunk, info) = self.get_chunk_at_byte(byte_idx);
569        let bi = byte_idx - info.bytes as usize;
570        let ci = byte_to_char_idx(chunk, byte_idx - info.bytes as usize);
571        TextInfo {
572            bytes: byte_idx as Count,
573            chars: info.chars + ci as Count,
574            utf16_surrogates: info.utf16_surrogates
575                + byte_to_utf16_surrogate_idx(chunk, bi) as Count,
576            line_breaks: info.line_breaks + byte_to_line_idx(chunk, bi) as Count,
577        }
578    }
579
580    pub fn text_info(&self) -> TextInfo {
581        match *self {
582            Node::Leaf(ref text) => TextInfo::from_str(text),
583            Node::Internal(ref children) => children.combined_info(),
584        }
585    }
586
587    pub fn is_char_boundary(&self, byte_idx: usize) -> bool {
588        let (chunk, info) = self.get_chunk_at_byte(byte_idx);
589        chunk.is_char_boundary(byte_idx - info.bytes as usize)
590    }
591
592    #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
593    pub fn is_crlf_split(&self, char_idx: usize) -> bool {
594        let (chunk, info) = self.get_chunk_at_char(char_idx);
595        let idx = char_to_byte_idx(chunk, char_idx - info.chars as usize);
596        if idx == 0 || idx == chunk.len() {
597            false
598        } else {
599            let chunk = chunk.as_bytes();
600            chunk[idx - 1] == 0x0D && chunk[idx] == 0x0A
601        }
602    }
603
604    //-----------------------------------------
605
606    pub fn child_count(&self) -> usize {
607        if let Node::Internal(ref children) = *self {
608            children.len()
609        } else {
610            panic!()
611        }
612    }
613
614    pub fn children(&self) -> &NodeChildren {
615        match *self {
616            Node::Internal(ref children) => children,
617            _ => panic!(),
618        }
619    }
620
621    pub fn children_mut(&mut self) -> &mut NodeChildren {
622        match *self {
623            Node::Internal(ref mut children) => children,
624            _ => panic!(),
625        }
626    }
627
628    pub fn leaf_text(&self) -> &str {
629        if let Node::Leaf(ref text) = *self {
630            text
631        } else {
632            panic!()
633        }
634    }
635
636    pub fn leaf_text_mut(&mut self) -> &mut NodeText {
637        if let Node::Leaf(ref mut text) = *self {
638            text
639        } else {
640            panic!()
641        }
642    }
643
644    pub fn is_leaf(&self) -> bool {
645        match *self {
646            Node::Leaf(_) => true,
647            Node::Internal(_) => false,
648        }
649    }
650
651    pub fn is_undersized(&self) -> bool {
652        match *self {
653            Node::Leaf(ref text) => text.len() < MIN_BYTES,
654            Node::Internal(ref children) => children.len() < MIN_CHILDREN,
655        }
656    }
657
658    /// How many nodes deep the tree is.
659    ///
660    /// This counts root and leafs.  For example, a single leaf node
661    /// has depth 1.
662    pub fn depth(&self) -> usize {
663        let mut node = self;
664        let mut depth = 0;
665
666        loop {
667            match *node {
668                Node::Leaf(_) => return depth,
669                Node::Internal(ref children) => {
670                    depth += 1;
671                    node = &*children.nodes()[0];
672                }
673            }
674        }
675    }
676
677    /// Debugging tool to make sure that all of the meta-data of the
678    /// tree is consistent with the actual data.
679    pub fn assert_integrity(&self) {
680        match *self {
681            Node::Leaf(_) => {}
682            Node::Internal(ref children) => {
683                for (info, node) in children.iter() {
684                    if *info != node.text_info() {
685                        assert_eq!(*info, node.text_info());
686                    }
687                    node.assert_integrity();
688                }
689            }
690        }
691    }
692
693    /// Checks that the entire tree is the same height everywhere.
694    pub fn assert_balance(&self) -> usize {
695        // Depth, child count, and leaf node emptiness
696        match *self {
697            Node::Leaf(_) => 1,
698            Node::Internal(ref children) => {
699                let first_depth = children.nodes()[0].assert_balance();
700                for node in &children.nodes()[1..] {
701                    assert_eq!(node.assert_balance(), first_depth);
702                }
703                first_depth + 1
704            }
705        }
706    }
707
708    /// Checks that all internal nodes have the minimum number of
709    /// children and all non-root leaf nodes are non-empty.
710    pub fn assert_node_size(&self, is_root: bool) {
711        match *self {
712            Node::Leaf(ref text) => {
713                // Leaf size
714                if !is_root {
715                    assert!(text.len() > 0);
716                }
717            }
718            Node::Internal(ref children) => {
719                // Child count
720                if is_root {
721                    assert!(children.len() > 1);
722                } else {
723                    assert!(children.len() >= MIN_CHILDREN);
724                }
725
726                for node in children.nodes() {
727                    node.assert_node_size(false);
728                }
729            }
730        }
731    }
732
733    /// Checks to make sure that a boundary between leaf nodes (given as a byte
734    /// position in the rope) doesn't split a CRLF pair, and fixes it if it does.
735    ///
736    /// If `must_be_boundary` is true, panics if the given byte position is
737    /// not on the boundary between two leaf nodes.
738    ///
739    /// TODO: theoretically this can leave an internal node with fewer than
740    /// MIN_CHILDREN children, although it is exceedingly unlikely with any
741    /// remotely sane text.  In the mean time, right now no code actually
742    /// depends on there being at least MIN_CHILDREN in an internal node.
743    /// But this should nevertheless get addressed at some point.
744    /// Probably the most straight-forward way to address this is via the
745    /// `fix_info_*` methods below, but I'm not totally sure.
746    pub fn fix_crlf_seam(&mut self, byte_pos: Count, must_be_boundary: bool) {
747        if let Node::Internal(ref mut children) = *self {
748            if byte_pos == 0 {
749                // Special-case 1
750                Arc::make_mut(&mut children.nodes_mut()[0])
751                    .fix_crlf_seam(byte_pos, must_be_boundary);
752            } else if byte_pos == children.combined_info().bytes {
753                // Special-case 2
754                let (info, nodes) = children.data_mut();
755                Arc::make_mut(nodes.last_mut().unwrap())
756                    .fix_crlf_seam(info.last().unwrap().bytes, must_be_boundary);
757            } else {
758                // Find the child to navigate into
759                let (child_i, start_info) = children.search_byte_idx(byte_pos as usize);
760                let start_byte = start_info.bytes;
761
762                let pos_in_child = byte_pos - start_byte;
763                let child_len = children.info()[child_i].bytes;
764
765                if pos_in_child == 0 || pos_in_child == child_len {
766                    // Left or right edge, get neighbor and fix seam
767                    let l_child_i = if pos_in_child == 0 {
768                        debug_assert!(child_i != 0);
769                        child_i - 1
770                    } else {
771                        debug_assert!(child_i < children.len());
772                        child_i
773                    };
774
775                    // Scope for borrow
776                    {
777                        // Fetch the two children
778                        let (l_child, r_child) = children.get_two_mut(l_child_i, l_child_i + 1);
779                        let l_child_bytes = l_child.0.bytes;
780                        let l_child = Arc::make_mut(l_child.1);
781                        let r_child = Arc::make_mut(r_child.1);
782
783                        // Get the text of the two children and fix
784                        // the seam between them.
785                        // Scope for borrow.
786                        {
787                            let (l_text, l_offset) =
788                                l_child.get_chunk_at_byte_mut(l_child_bytes as usize);
789                            let (r_text, r_offset) = r_child.get_chunk_at_byte_mut(0);
790                            if must_be_boundary {
791                                assert!(l_offset == 0 || l_offset == l_text.len());
792                                assert!(r_offset == 0 || r_offset == r_text.len());
793                            }
794                            fix_segment_seam(l_text, r_text);
795                        }
796
797                        // Fix up the children's metadata after the change
798                        // to their text.
799                        l_child.fix_info_right();
800                        r_child.fix_info_left();
801                    }
802
803                    // Fix up this node's metadata for those
804                    // two children.
805                    children.update_child_info(l_child_i);
806                    children.update_child_info(l_child_i + 1);
807
808                    // Remove the children if empty.
809                    if children.info()[l_child_i + 1].bytes == 0 {
810                        children.remove(l_child_i + 1);
811                    } else if children.info()[l_child_i].bytes == 0 {
812                        children.remove(l_child_i);
813                    }
814                } else {
815                    // Internal to child
816                    Arc::make_mut(&mut children.nodes_mut()[child_i])
817                        .fix_crlf_seam(pos_in_child, must_be_boundary);
818
819                    children.update_child_info(child_i);
820
821                    if children.info()[child_i].bytes == 0 {
822                        children.remove(child_i);
823                    }
824                }
825            }
826        }
827    }
828
829    /// Returns the chunk that contains the given byte, and the offset
830    /// of that byte within the chunk.
831    pub fn get_chunk_at_byte_mut(&mut self, byte_idx: usize) -> (&mut NodeText, usize) {
832        match *self {
833            Node::Leaf(ref mut text) => return (text, byte_idx),
834            Node::Internal(ref mut children) => {
835                let (child_i, acc_info) = children.search_byte_idx(byte_idx);
836                Arc::make_mut(&mut children.nodes_mut()[child_i])
837                    .get_chunk_at_byte_mut(byte_idx - acc_info.bytes as usize)
838            }
839        }
840    }
841
842    /// Updates the tree meta-data down the left side of the tree, and removes empty
843    /// children as it goes as well.
844    fn fix_info_left(&mut self) {
845        match *self {
846            Node::Leaf(_) => {}
847            Node::Internal(ref mut children) => {
848                Arc::make_mut(&mut children.nodes_mut()[0]).fix_info_left();
849                children.update_child_info(0);
850                if children.info()[0].bytes == 0 {
851                    children.remove(0);
852                }
853            }
854        }
855    }
856
857    /// Updates the tree meta-data down the right side of the tree, and removes empty
858    /// children as it goes as well.
859    fn fix_info_right(&mut self) {
860        match *self {
861            Node::Leaf(_) => {}
862            Node::Internal(ref mut children) => {
863                let idx = children.len() - 1;
864                Arc::make_mut(&mut children.nodes_mut()[idx]).fix_info_right();
865                children.update_child_info(idx);
866                if children.info()[idx].bytes == 0 {
867                    children.remove(idx);
868                }
869            }
870        }
871    }
872
873    /// Fixes dangling nodes down the left side of the tree.
874    ///
875    /// Returns whether it did anything or not that would affect the
876    /// parent.
877    pub fn zip_fix_left(&mut self) -> bool {
878        if let Node::Internal(ref mut children) = *self {
879            let mut did_stuff = false;
880            loop {
881                let do_merge = (children.len() > 1)
882                    && match *children.nodes()[0] {
883                        Node::Leaf(ref text) => text.len() < MIN_BYTES,
884                        Node::Internal(ref children2) => children2.len() < MIN_CHILDREN,
885                    };
886
887                if do_merge {
888                    did_stuff |= children.merge_distribute(0, 1);
889                }
890
891                if !Arc::make_mut(&mut children.nodes_mut()[0]).zip_fix_left() {
892                    break;
893                }
894            }
895            did_stuff
896        } else {
897            false
898        }
899    }
900
901    /// Fixes dangling nodes down the right side of the tree.
902    ///
903    /// Returns whether it did anything or not that would affect the
904    /// parent. True: did stuff, false: didn't do stuff
905    pub fn zip_fix_right(&mut self) -> bool {
906        if let Node::Internal(ref mut children) = *self {
907            let mut did_stuff = false;
908            loop {
909                let last_i = children.len() - 1;
910                let do_merge = (children.len() > 1)
911                    && match *children.nodes()[last_i] {
912                        Node::Leaf(ref text) => text.len() < MIN_BYTES,
913                        Node::Internal(ref children2) => children2.len() < MIN_CHILDREN,
914                    };
915
916                if do_merge {
917                    did_stuff |= children.merge_distribute(last_i - 1, last_i);
918                }
919
920                if !Arc::make_mut(children.nodes_mut().last_mut().unwrap()).zip_fix_right() {
921                    break;
922                }
923            }
924            did_stuff
925        } else {
926            false
927        }
928    }
929
930    /// Fixes up the tree after remove_char_range() or Rope::append().
931    ///
932    /// Takes the char index of the start of the removal range.
933    ///
934    /// Returns whether it did anything or not that would affect the
935    /// parent. True: did stuff, false: didn't do stuff
936    pub fn fix_tree_seam(&mut self, char_idx: usize) -> bool {
937        if let Node::Internal(ref mut children) = *self {
938            let mut did_stuff = false;
939            loop {
940                // Do merging
941                if children.len() > 1 {
942                    let (child_i, start_info) = children.search_char_idx(char_idx);
943                    let mut do_merge = match *children.nodes()[child_i] {
944                        Node::Leaf(ref text) => text.len() < MIN_BYTES,
945                        Node::Internal(ref children2) => children2.len() < MIN_CHILDREN,
946                    };
947
948                    if child_i == 0 {
949                        if do_merge {
950                            did_stuff |= children.merge_distribute(0, 1);
951                        }
952                    } else {
953                        do_merge = do_merge
954                            || (start_info.chars as usize == char_idx
955                                && match *children.nodes()[child_i - 1] {
956                                    Node::Leaf(ref text) => text.len() < MIN_BYTES,
957                                    Node::Internal(ref children2) => children2.len() < MIN_CHILDREN,
958                                });
959                        if do_merge {
960                            let res = children.merge_distribute(child_i - 1, child_i);
961                            did_stuff |= res
962                        }
963                    }
964                }
965
966                // Do recursion
967                let (child_i, start_info) = children.search_char_idx(char_idx);
968
969                if start_info.chars as usize == char_idx && child_i != 0 {
970                    let tmp = children.info()[child_i - 1].chars as usize;
971                    let effect_1 =
972                        Arc::make_mut(&mut children.nodes_mut()[child_i - 1]).fix_tree_seam(tmp);
973                    let effect_2 =
974                        Arc::make_mut(&mut children.nodes_mut()[child_i]).fix_tree_seam(0);
975                    if (!effect_1) && (!effect_2) {
976                        break;
977                    }
978                } else if !Arc::make_mut(&mut children.nodes_mut()[child_i])
979                    .fix_tree_seam(char_idx - start_info.chars as usize)
980                {
981                    break;
982                }
983            }
984            debug_assert!(children.is_info_accurate());
985            did_stuff
986        } else {
987            false
988        }
989    }
990}
991
992//===========================================================================
993
994#[cfg(test)]
995mod tests {
996    use crate::Rope;
997
998    // 133 chars, 209 bytes
999    const TEXT: &str = "\r\nHello there!  How're you doing?  It's a fine day, \
1000                        isn't it?  Aren't you glad we're alive?\r\n\
1001                        こんにちは!元気ですか?日はいいですね。\
1002                        私たちが生きだって嬉しいではないか?\r\n";
1003
1004    #[test]
1005    fn line_to_byte_01() {
1006        let r = Rope::from_str(TEXT);
1007
1008        assert_eq!(3, r.root.line_break_count());
1009        assert_eq!(0, r.line_to_byte(0));
1010        assert_eq!(2, r.line_to_byte(1));
1011        assert_eq!(93, r.line_to_byte(2));
1012        assert_eq!(209, r.line_to_byte(3));
1013    }
1014
1015    #[test]
1016    fn line_to_char_01() {
1017        let r = Rope::from_str(TEXT);
1018
1019        assert_eq!(3, r.root.line_break_count());
1020        assert_eq!(0, r.line_to_char(0));
1021        assert_eq!(2, r.line_to_char(1));
1022        assert_eq!(93, r.line_to_char(2));
1023        assert_eq!(133, r.line_to_char(3));
1024    }
1025
1026    #[test]
1027    fn crlf_corner_case_01() {
1028        use super::Node;
1029        use crate::tree::{NodeChildren, NodeText, MAX_BYTES};
1030        use std::sync::Arc;
1031
1032        // Construct the corner case
1033        let nodel = Node::Leaf(NodeText::from_str(&"\n".repeat(MAX_BYTES - 1)));
1034        let noder = Node::Leaf(NodeText::from_str(&"\n".repeat(MAX_BYTES)));
1035        let mut children = NodeChildren::new();
1036        children.push((nodel.text_info(), Arc::new(nodel)));
1037        children.push((noder.text_info(), Arc::new(noder)));
1038        let root = Node::Internal(children);
1039        let mut rope = Rope {
1040            root: Arc::new(root),
1041        };
1042        assert_eq!(rope.char(0), '\n');
1043        assert_eq!(rope.len_chars(), MAX_BYTES * 2 - 1);
1044
1045        // Do the potentially problematic insertion
1046        rope.insert(MAX_BYTES - 1, "\r");
1047    }
1048
1049    #[test]
1050    fn crlf_corner_case_02() {
1051        use super::Node;
1052        use crate::tree::{NodeChildren, NodeText, MAX_BYTES};
1053        use std::sync::Arc;
1054
1055        // Construct the corner case
1056        let nodel = Node::Leaf(NodeText::from_str(&"\r".repeat(MAX_BYTES)));
1057        let noder = Node::Leaf(NodeText::from_str(&"\r".repeat(MAX_BYTES - 1)));
1058        let mut children = NodeChildren::new();
1059        children.push((nodel.text_info(), Arc::new(nodel)));
1060        children.push((noder.text_info(), Arc::new(noder)));
1061        let root = Node::Internal(children);
1062        let mut rope = Rope {
1063            root: Arc::new(root),
1064        };
1065        assert_eq!(rope.char(0), '\r');
1066        assert_eq!(rope.len_chars(), MAX_BYTES * 2 - 1);
1067
1068        // Do the potentially problematic insertion
1069        rope.insert(MAX_BYTES, "\n");
1070    }
1071}