ropey/tree/
node_text.rs

1use std::borrow::Borrow;
2use std::ops::Deref;
3use std::str;
4
5use crate::crlf;
6
7/// A custom small string.  The unsafe guts of this are in `NodeSmallString`
8/// further down in this file.
9#[derive(Clone, Default)]
10#[repr(C)]
11pub(crate) struct NodeText(inner::NodeSmallString);
12
13impl NodeText {
14    /// Creates a new empty `NodeText`
15    #[inline(always)]
16    pub fn new() -> Self {
17        NodeText(inner::NodeSmallString::new())
18    }
19
20    /// Creates a new `NodeText` with the same contents as the given `&str`.
21    pub fn from_str(string: &str) -> Self {
22        NodeText(inner::NodeSmallString::from_str(string))
23    }
24
25    /// Inserts a `&str` at byte offset `byte_idx`.
26    pub fn insert_str(&mut self, byte_idx: usize, string: &str) {
27        self.0.insert_str(byte_idx, string);
28    }
29
30    /// Inserts `string` at `byte_idx` and splits the resulting string in half,
31    /// returning the right half.
32    ///
33    /// Only splits on code point boundaries and will never split CRLF pairs,
34    /// so if the whole string is a single code point or CRLF pair, the split
35    /// will fail and the returned string will be empty.
36    pub fn insert_str_split(&mut self, byte_idx: usize, string: &str) -> Self {
37        debug_assert!(self.is_char_boundary(byte_idx));
38
39        let tot_len = self.len() + string.len();
40        let mid_idx = tot_len / 2;
41        let a = byte_idx;
42        let b = byte_idx + string.len();
43
44        // Figure out the split index, accounting for code point
45        // boundaries and CRLF pairs.
46        // We first copy the bytes in the area of the proposed split point into
47        // a small 8-byte buffer.  We then use that buffer to look for the
48        // real split point.
49        let split_idx = {
50            let mut buf = [0u8; 8];
51            let start = mid_idx - 4.min(mid_idx);
52            let end = (mid_idx + 4).min(tot_len);
53            for i in start..end {
54                buf[i - start] = if i < a {
55                    self.as_bytes()[i]
56                } else if i < b {
57                    string.as_bytes()[i - a]
58                } else {
59                    self.as_bytes()[i - string.len()]
60                };
61            }
62
63            crlf::nearest_internal_break(mid_idx - start, &buf[..(end - start)]) + start
64        };
65
66        let mut right = NodeText::new();
67        if split_idx <= a {
68            right.push_str(&self[split_idx..a]);
69            right.push_str(string);
70            right.push_str(&self[a..]);
71            self.truncate(split_idx);
72        } else if split_idx <= b {
73            right.push_str(&string[(split_idx - a)..]);
74            right.push_str(&self[a..]);
75            self.truncate(a);
76            self.push_str(&string[..(split_idx - a)]);
77        } else {
78            right.push_str(&self[(split_idx - string.len())..]);
79            self.truncate(split_idx - string.len());
80            self.insert_str(a, string);
81        }
82
83        self.0.inline_if_possible();
84        right
85    }
86
87    /// Appends a `&str` to end the of the `NodeText`.
88    pub fn push_str(&mut self, string: &str) {
89        let len = self.len();
90        self.0.insert_str(len, string);
91    }
92
93    /// Appends a `&str` and splits the resulting string in half, returning
94    /// the right half.
95    ///
96    /// Only splits on code point boundaries and will never split CRLF pairs,
97    /// so if the whole string is a single code point or CRLF pair, the split
98    /// will fail and the returned string will be empty.
99    pub fn push_str_split(&mut self, string: &str) -> Self {
100        let len = self.len();
101        self.insert_str_split(len, string)
102    }
103
104    /// Drops the text after byte index `byte_idx`.
105    pub fn truncate(&mut self, byte_idx: usize) {
106        self.0.truncate(byte_idx);
107        self.0.inline_if_possible();
108    }
109
110    /// Drops the text before byte index `byte_idx`, shifting the
111    /// rest of the text to fill in the space.
112    pub fn truncate_front(&mut self, byte_idx: usize) {
113        self.0.remove_range(0, byte_idx);
114        self.0.inline_if_possible();
115    }
116
117    /// Removes the text in the byte index interval `[byte_start, byte_end)`.
118    pub fn remove_range(&mut self, byte_start: usize, byte_end: usize) {
119        self.0.remove_range(byte_start, byte_end);
120        self.0.inline_if_possible();
121    }
122
123    /// Splits the `NodeText` at `byte_idx`.
124    ///
125    /// The left part remains in the original, and the right part is
126    /// returned in a new `NodeText`.
127    pub fn split_off(&mut self, byte_idx: usize) -> Self {
128        let other = NodeText(self.0.split_off(byte_idx));
129        self.0.inline_if_possible();
130        other
131    }
132}
133
134impl std::cmp::PartialEq for NodeText {
135    fn eq(&self, other: &Self) -> bool {
136        let (s1, s2): (&str, &str) = (self, other);
137        s1 == s2
138    }
139}
140
141impl<'a> PartialEq<NodeText> for &'a str {
142    fn eq(&self, other: &NodeText) -> bool {
143        *self == (other as &str)
144    }
145}
146
147impl<'a> PartialEq<&'a str> for NodeText {
148    fn eq(&self, other: &&'a str) -> bool {
149        (self as &str) == *other
150    }
151}
152
153impl std::fmt::Display for NodeText {
154    fn fmt(&self, fm: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
155        NodeText::deref(self).fmt(fm)
156    }
157}
158
159impl std::fmt::Debug for NodeText {
160    fn fmt(&self, fm: &mut std::fmt::Formatter) -> std::fmt::Result {
161        NodeText::deref(self).fmt(fm)
162    }
163}
164
165impl<'a> From<&'a str> for NodeText {
166    fn from(s: &str) -> Self {
167        Self::from_str(s)
168    }
169}
170
171impl Deref for NodeText {
172    type Target = str;
173
174    fn deref(&self) -> &str {
175        self.0.as_str()
176    }
177}
178
179impl AsRef<str> for NodeText {
180    fn as_ref(&self) -> &str {
181        self.0.as_str()
182    }
183}
184
185impl Borrow<str> for NodeText {
186    fn borrow(&self) -> &str {
187        self.0.as_str()
188    }
189}
190
191//=======================================================================
192
193/// Takes two `NodeText`s and mends the CRLF break between them, if any.
194///
195/// Note: this will leave one of the strings empty if the entire composite string
196/// is a single CRLF pair.
197pub(crate) fn fix_segment_seam(l: &mut NodeText, r: &mut NodeText) {
198    // Early out, if there's nothing to do.
199    if crlf::seam_is_break(l.as_bytes(), r.as_bytes()) {
200        return;
201    }
202
203    let tot_len = l.len() + r.len();
204
205    // Find the new split position, if any.
206    let new_split_pos = {
207        let l_split = crlf::prev_break(l.len(), l.as_bytes());
208        let r_split = l.len() + crlf::next_break(0, r.as_bytes());
209        if l_split != 0 && (r_split == tot_len || l.len() > r.len()) {
210            l_split
211        } else {
212            r_split
213        }
214    };
215
216    // Move the bytes to create the new split
217    if new_split_pos < l.len() {
218        r.insert_str(0, &l[new_split_pos..]);
219        l.truncate(new_split_pos);
220    } else {
221        let pos = new_split_pos - l.len();
222        l.push_str(&r[..pos]);
223        r.truncate_front(pos);
224    }
225}
226
227//=======================================================================
228
229/// The unsafe guts of NodeText, exposed through a safe API.
230///
231/// Try to keep this as small as possible, and implement functionality on
232/// NodeText via the safe APIs whenever possible.
233mod inner {
234    use crate::tree::MAX_BYTES;
235    use smallvec::{Array, SmallVec};
236    use std::str;
237
238    /// The backing internal buffer type for `NodeText`.
239    #[derive(Copy, Clone)]
240    struct BackingArray([u8; MAX_BYTES]);
241
242    /// We need a very specific size of array, which is not necessarily
243    /// supported directly by the impls in the smallvec crate.  We therefore
244    /// have to implement this unsafe trait for our specific array size.
245    /// TODO: once integer const generics land, and smallvec updates its APIs
246    /// to use them, switch over and get rid of this unsafe impl.
247    unsafe impl Array for BackingArray {
248        type Item = u8;
249        fn size() -> usize {
250            MAX_BYTES
251        }
252    }
253
254    /// Internal small string for `NodeText`.
255    #[derive(Clone, Default)]
256    #[repr(C)]
257    pub struct NodeSmallString {
258        buffer: SmallVec<BackingArray>,
259    }
260
261    impl NodeSmallString {
262        #[inline(always)]
263        pub fn new() -> Self {
264            NodeSmallString {
265                buffer: SmallVec::new(),
266            }
267        }
268
269        #[inline(always)]
270        pub fn with_capacity(capacity: usize) -> Self {
271            NodeSmallString {
272                buffer: SmallVec::with_capacity(capacity),
273            }
274        }
275
276        #[inline(always)]
277        pub fn from_str(string: &str) -> Self {
278            let mut nodetext = NodeSmallString::with_capacity(string.len());
279            nodetext.insert_str(0, string);
280            nodetext
281        }
282
283        #[inline(always)]
284        pub fn len(&self) -> usize {
285            self.buffer.len()
286        }
287
288        #[inline(always)]
289        pub fn as_str(&self) -> &str {
290            // NodeSmallString's methods don't allow `buffer` to become invalid
291            // utf8, so this is safe.
292            unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) }
293        }
294
295        /// Inserts `string` at `byte_idx`.
296        ///
297        /// Panics on out-of-bounds or of `byte_idx` isn't a char boundary.
298        #[inline(always)]
299        pub fn insert_str(&mut self, byte_idx: usize, string: &str) {
300            assert!(self.as_str().is_char_boundary(byte_idx));
301
302            // Copy bytes from `string` into the appropriate space in the
303            // buffer.
304            self.buffer.insert_from_slice(byte_idx, string.as_bytes());
305        }
306
307        /// Removes text in range `[start_byte_idx, end_byte_idx)`
308        ///
309        /// Panics on out-of-bounds or non-char-boundary indices.
310        #[inline(always)]
311        pub fn remove_range(&mut self, start_byte_idx: usize, end_byte_idx: usize) {
312            assert!(start_byte_idx <= end_byte_idx);
313            // Already checked by copy_within/is_char_boundary.
314            debug_assert!(end_byte_idx <= self.len());
315            assert!(self.as_str().is_char_boundary(start_byte_idx));
316            assert!(self.as_str().is_char_boundary(end_byte_idx));
317            let len = self.len();
318            let amt = end_byte_idx - start_byte_idx;
319
320            self.buffer.copy_within(end_byte_idx..len, start_byte_idx);
321
322            self.buffer.truncate(len - amt);
323        }
324
325        /// Removes text after `byte_idx`.
326        #[inline(always)]
327        pub fn truncate(&mut self, byte_idx: usize) {
328            // Already checked by is_char_boundary.
329            debug_assert!(byte_idx <= self.len());
330            assert!(self.as_str().is_char_boundary(byte_idx));
331            self.buffer.truncate(byte_idx);
332        }
333
334        /// Splits at `byte_idx`, returning the right part and leaving the
335        /// left part in the original.
336        ///
337        /// Panics on out-of-bounds or of `byte_idx` isn't a char boundary.
338        #[inline(always)]
339        pub fn split_off(&mut self, byte_idx: usize) -> Self {
340            // Already checked by is_char_boundary.
341            debug_assert!(byte_idx <= self.len());
342            assert!(self.as_str().is_char_boundary(byte_idx));
343            let len = self.len();
344            let mut other = NodeSmallString::with_capacity(len - byte_idx);
345            other.buffer.extend_from_slice(&self.buffer[byte_idx..]);
346            self.buffer.truncate(byte_idx);
347            other
348        }
349
350        /// Re-inlines the data if it's been heap allocated but can
351        /// fit inline.
352        #[inline(always)]
353        pub fn inline_if_possible(&mut self) {
354            if self.buffer.spilled() && (self.buffer.len() <= self.buffer.inline_size()) {
355                self.buffer.shrink_to_fit();
356            }
357        }
358    }
359
360    //-----------------------------------------------------------------------
361
362    #[cfg(test)]
363    mod tests {
364        use super::*;
365
366        #[test]
367        fn small_string_basics() {
368            let s = NodeSmallString::from_str("Hello!");
369            assert_eq!("Hello!", s.as_str());
370            assert_eq!(6, s.len());
371        }
372
373        #[test]
374        fn insert_str_01() {
375            let mut s = NodeSmallString::from_str("Hello!");
376            s.insert_str(3, "oz");
377            assert_eq!("Helozlo!", s.as_str());
378        }
379
380        #[test]
381        #[should_panic]
382        fn insert_str_02() {
383            let mut s = NodeSmallString::from_str("Hello!");
384            s.insert_str(7, "oz");
385        }
386
387        #[test]
388        #[should_panic]
389        fn insert_str_03() {
390            let mut s = NodeSmallString::from_str("こんにちは");
391            s.insert_str(4, "oz");
392        }
393
394        #[test]
395        fn remove_range_01() {
396            let mut s = NodeSmallString::from_str("Hello!");
397            s.remove_range(2, 4);
398            assert_eq!("Heo!", s.as_str());
399        }
400
401        #[test]
402        #[should_panic]
403        fn remove_range_02() {
404            let mut s = NodeSmallString::from_str("Hello!");
405            s.remove_range(4, 2);
406        }
407
408        #[test]
409        #[should_panic]
410        fn remove_range_03() {
411            let mut s = NodeSmallString::from_str("Hello!");
412            s.remove_range(2, 7);
413        }
414
415        #[test]
416        #[should_panic]
417        fn remove_range_04() {
418            let mut s = NodeSmallString::from_str("こんにちは");
419            s.remove_range(2, 4);
420        }
421
422        #[test]
423        fn truncate_01() {
424            let mut s = NodeSmallString::from_str("Hello!");
425            s.truncate(4);
426            assert_eq!("Hell", s.as_str());
427        }
428
429        #[test]
430        #[should_panic]
431        fn truncate_02() {
432            let mut s = NodeSmallString::from_str("Hello!");
433            s.truncate(7);
434        }
435
436        #[test]
437        #[should_panic]
438        fn truncate_03() {
439            let mut s = NodeSmallString::from_str("こんにちは");
440            s.truncate(4);
441        }
442
443        #[test]
444        fn split_off_01() {
445            let mut s1 = NodeSmallString::from_str("Hello!");
446            let s2 = s1.split_off(4);
447            assert_eq!("Hell", s1.as_str());
448            assert_eq!("o!", s2.as_str());
449        }
450
451        #[test]
452        #[should_panic]
453        fn split_off_02() {
454            let mut s1 = NodeSmallString::from_str("Hello!");
455            s1.split_off(7);
456        }
457
458        #[test]
459        #[should_panic]
460        fn split_off_03() {
461            let mut s1 = NodeSmallString::from_str("こんにちは");
462            s1.split_off(4);
463        }
464    }
465}