ropey/
lib.rs

1//! Ropey is a utf8 text rope for Rust.  It is fast, robust, and can handle
2//! huge texts and memory-incoherent edits with ease.
3//!
4//! Ropey's atomic unit of text is Unicode scalar values (or `char`s in Rust)
5//! encoded as utf8.  All of Ropey's editing and slicing operations are done
6//! in terms of char indices, which prevents accidental creation of invalid
7//! utf8 data.
8//!
9//! The library is made up of four main components:
10//!
11//! - [`Rope`]: the main rope type.
12//! - [`RopeSlice`]: an immutable view into part of a
13//!   `Rope`.
14//! - [`iter`]: iterators over `Rope`/`RopeSlice` data.
15//! - [`RopeBuilder`]: an efficient incremental
16//!   `Rope` builder.
17//!
18//!
19//! # A Basic Example
20//!
21//! Let's say we want to open up a text file, replace the 516th line (the
22//! writing was terrible!), and save it back to disk.  It's contrived, but will
23//! give a good sampling of the APIs and how they work together.
24//!
25//! ```no_run
26//! # use std::io::Result;
27//! use std::fs::File;
28//! use std::io::{BufReader, BufWriter};
29//! use ropey::Rope;
30//!
31//! # fn do_stuff() -> Result<()> {
32//! // Load a text file.
33//! let mut text = Rope::from_reader(
34//!     BufReader::new(File::open("my_great_book.txt")?)
35//! )?;
36//!
37//! // Print the 516th line (zero-indexed) to see the terrible
38//! // writing.
39//! println!("{}", text.line(515));
40//!
41//! // Get the start/end char indices of the line.
42//! let start_idx = text.line_to_char(515);
43//! let end_idx = text.line_to_char(516);
44//!
45//! // Remove the line...
46//! text.remove(start_idx..end_idx);
47//!
48//! // ...and replace it with something better.
49//! text.insert(start_idx, "The flowers are... so... dunno.\n");
50//!
51//! // Print the changes, along with the previous few lines for context.
52//! let start_idx = text.line_to_char(511);
53//! let end_idx = text.line_to_char(516);
54//! println!("{}", text.slice(start_idx..end_idx));
55//!
56//! // Write the file back out to disk.
57//! text.write_to(
58//!     BufWriter::new(File::create("my_great_book.txt")?)
59//! )?;
60//! # Ok(())
61//! # }
62//! # do_stuff().unwrap();
63//! ```
64//!
65//! More examples can be found in the `examples` directory of the git
66//! repository.  Many of those examples demonstrate doing non-trivial things
67//! with Ropey such as grapheme handling, search-and-replace, and streaming
68//! loading of non-utf8 text files.
69//!
70//!
71//! # Low-level APIs
72//!
73//! Ropey also provides access to some of its low-level APIs, enabling client
74//! code to efficiently work with a `Rope`'s data and implement new
75//! functionality.  The most important of those API's are:
76//!
77//! - The [`chunk_at_*()`](Rope::chunk_at_byte)
78//!   chunk-fetching methods of `Rope` and `RopeSlice`.
79//! - The [`Chunks`](iter::Chunks) iterator.
80//! - The functions in [`str_utils`] for operating on
81//!   `&str` slices.
82//!
83//! Internally, each `Rope` stores text as a segemented collection of utf8
84//! strings.  The chunk-fetching methods and `Chunks` iterator provide direct
85//! access to those strings (or "chunks") as `&str` slices, allowing client
86//! code to work directly with the underlying utf8 data.
87//!
88//! The chunk-fetching methods and `str_utils` functions are the basic
89//! building blocks that Ropey itself uses to build much of its functionality.
90//! For example, the [`Rope::byte_to_char()`]
91//! method can be reimplemented as a free function like this:
92//!
93//! ```no_run
94//! use ropey::{
95//!     Rope,
96//!     str_utils::byte_to_char_idx
97//! };
98//!
99//! fn byte_to_char(rope: &Rope, byte_idx: usize) -> usize {
100//!     let (chunk, b, c, _) = rope.chunk_at_byte(byte_idx);
101//!     c + byte_to_char_idx(chunk, byte_idx - b)
102//! }
103//! ```
104//!
105//! And this will be just as efficient as Ropey's implementation.
106//!
107//! The chunk-fetching methods in particular are among the fastest functions
108//! that Ropey provides, generally operating in the sub-hundred nanosecond
109//! range for medium-sized (~200kB) documents on recent-ish computer systems.
110//!
111//!
112//! # A Note About Line Breaks
113//!
114//! Some of Ropey's APIs use the concept of line breaks or lines of text.
115//!
116//! Ropey considers the start of the rope and positions immediately
117//! _after_ line breaks to be the start of new lines.  And it treats
118//! line breaks as being a part of the lines they mark the end of.
119//!
120//! For example, the rope `"Hello"` has a single line: `"Hello"`.  The
121//! rope `"Hello\nworld"` has two lines: `"Hello\n"` and `"world"`.  And
122//! the rope `"Hello\nworld\n"` has three lines: `"Hello\n"`,
123//! `"world\n"`, and `""`.
124//!
125//! Ropey can be configured at build time via feature flags to recognize
126//! different line breaks.  Ropey always recognizes:
127//!
128//! - `U+000A`          &mdash; LF (Line Feed)
129//! - `U+000D` `U+000A` &mdash; CRLF (Carriage Return + Line Feed)
130//!
131//! With the `cr_lines` feature, the following are also recognized:
132//!
133//! - `U+000D`          &mdash; CR (Carriage Return)
134//!
135//! With the `unicode_lines` feature, in addition to all of the
136//! above, the following are also recognized (bringing Ropey into
137//! conformance with
138//! [Unicode Annex #14](https://www.unicode.org/reports/tr14/#BK)):
139//!
140//! - `U+000B`          &mdash; VT (Vertical Tab)
141//! - `U+000C`          &mdash; FF (Form Feed)
142//! - `U+0085`          &mdash; NEL (Next Line)
143//! - `U+2028`          &mdash; Line Separator
144//! - `U+2029`          &mdash; Paragraph Separator
145//!
146//! (Note: `unicode_lines` is enabled by default, and always implies
147//! `cr_lines`.)
148//!
149//! CRLF pairs are always treated as a single line break, and are never split
150//! across chunks.  Note, however, that slicing can still split them.
151//!
152//!
153//! # A Note About SIMD Acceleration
154//!
155//! Ropey has a `simd` feature flag (enabled by default) that enables
156//! explicit SIMD on supported platforms to improve performance.
157//!
158//! There is a bit of a footgun here: if you disable default features to
159//! configure line break behavior (as per the section above) then SIMD
160//! will also get disabled, and performance will suffer.  So be careful
161//! to explicitly re-enable the `simd` feature flag (if desired) when
162//! doing that.
163
164#![allow(clippy::collapsible_if)]
165#![allow(clippy::inline_always)]
166#![allow(clippy::needless_return)]
167#![allow(clippy::redundant_field_names)]
168#![allow(clippy::type_complexity)]
169
170extern crate smallvec;
171extern crate str_indices;
172
173mod crlf;
174mod rope;
175mod rope_builder;
176mod slice;
177mod tree;
178
179pub mod iter;
180pub mod str_utils;
181
182use std::ops::Bound;
183
184pub use crate::rope::Rope;
185pub use crate::rope_builder::RopeBuilder;
186pub use crate::slice::RopeSlice;
187
188/// NOT PART OF THE PUBLIC API (hidden from docs for a reason!)
189/// These are only exposed for tests that live in the `tests` directory.
190#[doc(hidden)]
191pub use crate::tree::{MAX_BYTES, MAX_CHILDREN, MIN_BYTES, MIN_CHILDREN};
192
193//==============================================================
194// Error reporting types.
195
196/// Ropey's result type.
197pub type Result<T> = std::result::Result<T, Error>;
198
199/// Ropey's error type.
200#[derive(Clone, Copy)]
201#[non_exhaustive]
202pub enum Error {
203    /// Indicates that the passed byte index was out of bounds.
204    ///
205    /// Contains the index attempted and the actual length of the
206    /// `Rope`/`RopeSlice` in bytes, in that order.
207    ByteIndexOutOfBounds(usize, usize),
208
209    /// Indicates that the passed char index was out of bounds.
210    ///
211    /// Contains the index attempted and the actual length of the
212    /// `Rope`/`RopeSlice` in chars, in that order.
213    CharIndexOutOfBounds(usize, usize),
214
215    /// Indicates that the passed line index was out of bounds.
216    ///
217    /// Contains the index attempted and the actual length of the
218    /// `Rope`/`RopeSlice` in lines, in that order.
219    LineIndexOutOfBounds(usize, usize),
220
221    /// Indicates that the passed utf16 code-unit index was out of
222    /// bounds.
223    ///
224    /// Contains the index attempted and the actual length of the
225    /// `Rope`/`RopeSlice` in utf16 code units, in that order.
226    Utf16IndexOutOfBounds(usize, usize),
227
228    /// Indicates that the passed byte index was not a char boundary.
229    ///
230    /// Contains the passed byte index.
231    ByteIndexNotCharBoundary(usize),
232
233    /// Indicates that the passed byte range didn't line up with char
234    /// boundaries.
235    ///
236    /// Contains the [start, end) byte indices of the range, in that order.
237    /// When either the start or end are `None`, that indicates a half-open
238    /// range.
239    ByteRangeNotCharBoundary(
240        Option<usize>, // Start.
241        Option<usize>, // End.
242    ),
243
244    /// Indicates that a reversed byte-index range (end < start) was
245    /// encountered.
246    ///
247    /// Contains the [start, end) byte indices of the range, in that order.
248    ByteRangeInvalid(
249        usize, // Start.
250        usize, // End.
251    ),
252
253    /// Indicates that a reversed char-index range (end < start) was
254    /// encountered.
255    ///
256    /// Contains the [start, end) char indices of the range, in that order.
257    CharRangeInvalid(
258        usize, // Start.
259        usize, // End.
260    ),
261
262    /// Indicates that the passed byte-index range was partially or fully
263    /// out of bounds.
264    ///
265    /// Contains the [start, end) byte indices of the range and the actual
266    /// length of the `Rope`/`RopeSlice` in bytes, in that order.  When
267    /// either the start or end are `None`, that indicates a half-open range.
268    ByteRangeOutOfBounds(
269        Option<usize>, // Start.
270        Option<usize>, // End.
271        usize,         // Rope byte length.
272    ),
273
274    /// Indicates that the passed char-index range was partially or fully
275    /// out of bounds.
276    ///
277    /// Contains the [start, end) char indices of the range and the actual
278    /// length of the `Rope`/`RopeSlice` in chars, in that order.  When
279    /// either the start or end are `None`, that indicates a half-open range.
280    CharRangeOutOfBounds(
281        Option<usize>, // Start.
282        Option<usize>, // End.
283        usize,         // Rope char length.
284    ),
285}
286
287impl std::error::Error for Error {
288    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
289        None
290    }
291
292    // Deprecated in std.
293    fn description(&self) -> &str {
294        ""
295    }
296
297    // Deprecated in std.
298    fn cause(&self) -> Option<&dyn std::error::Error> {
299        None
300    }
301}
302
303impl std::fmt::Debug for Error {
304    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
305        match *self {
306            Error::ByteIndexOutOfBounds(index, len) => {
307                write!(
308                    f,
309                    "Byte index out of bounds: byte index {}, Rope/RopeSlice byte length {}",
310                    index, len
311                )
312            }
313            Error::CharIndexOutOfBounds(index, len) => {
314                write!(
315                    f,
316                    "Char index out of bounds: char index {}, Rope/RopeSlice char length {}",
317                    index, len
318                )
319            }
320            Error::LineIndexOutOfBounds(index, len) => {
321                write!(
322                    f,
323                    "Line index out of bounds: line index {}, Rope/RopeSlice line count {}",
324                    index, len
325                )
326            }
327            Error::Utf16IndexOutOfBounds(index, len) => {
328                write!(f, "Utf16 code-unit index out of bounds: utf16 index {}, Rope/RopeSlice utf16 length {}", index, len)
329            }
330            Error::ByteIndexNotCharBoundary(index) => {
331                write!(
332                    f,
333                    "Byte index is not a valid char boundary: byte index {}",
334                    index
335                )
336            }
337            Error::ByteRangeNotCharBoundary(start_idx_opt, end_idx_opt) => {
338                write!(f, "Byte range does not align with char boundaries: range ")?;
339                write_range(f, start_idx_opt, end_idx_opt)
340            }
341            Error::ByteRangeInvalid(start_idx, end_idx) => {
342                write!(
343                    f,
344                    "Invalid byte range {}..{}: start must be <= end",
345                    start_idx, end_idx
346                )
347            }
348            Error::CharRangeInvalid(start_idx, end_idx) => {
349                write!(
350                    f,
351                    "Invalid char range {}..{}: start must be <= end",
352                    start_idx, end_idx
353                )
354            }
355            Error::ByteRangeOutOfBounds(start_idx_opt, end_idx_opt, len) => {
356                write!(f, "Byte range out of bounds: byte range ")?;
357                write_range(f, start_idx_opt, end_idx_opt)?;
358                write!(f, ", Rope/RopeSlice byte length {}", len)
359            }
360            Error::CharRangeOutOfBounds(start_idx_opt, end_idx_opt, len) => {
361                write!(f, "Char range out of bounds: char range ")?;
362                write_range(f, start_idx_opt, end_idx_opt)?;
363                write!(f, ", Rope/RopeSlice char length {}", len)
364            }
365        }
366    }
367}
368
369impl std::fmt::Display for Error {
370    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
371        // Just re-use the debug impl.
372        std::fmt::Debug::fmt(self, f)
373    }
374}
375
376fn write_range(
377    f: &mut std::fmt::Formatter<'_>,
378    start_idx: Option<usize>,
379    end_idx: Option<usize>,
380) -> std::fmt::Result {
381    match (start_idx, end_idx) {
382        (None, None) => {
383            write!(f, "..")
384        }
385
386        (Some(start), None) => {
387            write!(f, "{}..", start)
388        }
389
390        (None, Some(end)) => {
391            write!(f, "..{}", end)
392        }
393
394        (Some(start), Some(end)) => {
395            write!(f, "{}..{}", start, end)
396        }
397    }
398}
399
400//==============================================================
401// Range handling utilities.
402
403#[inline(always)]
404pub(crate) fn start_bound_to_num(b: Bound<&usize>) -> Option<usize> {
405    match b {
406        Bound::Included(n) => Some(*n),
407        Bound::Excluded(n) => Some(*n + 1),
408        Bound::Unbounded => None,
409    }
410}
411
412#[inline(always)]
413pub(crate) fn end_bound_to_num(b: Bound<&usize>) -> Option<usize> {
414    match b {
415        Bound::Included(n) => Some(*n + 1),
416        Bound::Excluded(n) => Some(*n),
417        Bound::Unbounded => None,
418    }
419}