str_indices/
lib.rs

1#![no_std]
2
3//! Count and convert between different indexing schemes on utf8 string
4//! slices.
5//!
6//! This crate is organized into modules by indexing scheme.  Each module
7//! contains functions for counting relevant metrics for that scheme as
8//! well as functions for converting to/from byte indices.
9//!
10//! None of the functions in this crate panic: all inputs have a defined
11//! output.
12
13mod byte_chunk;
14pub mod chars;
15pub mod lines;
16pub mod lines_crlf;
17pub mod lines_lf;
18pub mod utf16;
19
20/// Returns the alignment difference between the start of `bytes` and the
21/// type `T`.
22///
23/// Or put differently: returns how many bytes into `bytes` you need to walk
24/// to reach the alignment of `T` in memory.
25///
26/// Will return 0 if already aligned at the start, and will return the length
27/// of `bytes` if alignment is beyond the end of `bytes`.
28#[inline(always)]
29fn alignment_diff<T>(bytes: &[u8]) -> usize {
30    let alignment = core::mem::align_of::<T>();
31    let ptr = bytes.as_ptr() as usize;
32    (alignment - ((ptr - 1) & (alignment - 1)) - 1).min(bytes.len())
33}
34
35/// Utility function used in some of the lines modules.
36#[inline(always)]
37fn is_not_crlf_middle(byte_idx: usize, text: &[u8]) -> bool {
38    byte_idx == 0
39        || byte_idx >= text.len()
40        || (text[byte_idx - 1] != 0x0D)
41        || (text[byte_idx] != 0x0A)
42}
43
44//======================================================================
45
46#[cfg(test)]
47mod tests {
48    use super::*;
49
50    // 124 bytes, 100 chars, 4 lines
51    const TEXT_LINES: &str = "Hello there!  How're you doing?\nIt's \
52                              a fine day, isn't it?\nAren't you glad \
53                              we're alive?\nこんにちは、みんなさん!";
54
55    fn char_to_line_idx(text: &str, idx: usize) -> usize {
56        lines::from_byte_idx(text, chars::to_byte_idx(text, idx))
57    }
58
59    fn line_to_char_idx(text: &str, idx: usize) -> usize {
60        chars::from_byte_idx(text, lines::to_byte_idx(text, idx))
61    }
62
63    #[test]
64    fn char_to_line_idx_01() {
65        let text = "Hello せ\nか\nい!";
66        assert_eq!(0, char_to_line_idx(text, 0));
67        assert_eq!(0, char_to_line_idx(text, 7));
68        assert_eq!(1, char_to_line_idx(text, 8));
69        assert_eq!(1, char_to_line_idx(text, 9));
70        assert_eq!(2, char_to_line_idx(text, 10));
71    }
72
73    #[test]
74    fn char_to_line_idx_02() {
75        // Line 0
76        for i in 0..32 {
77            assert_eq!(0, char_to_line_idx(TEXT_LINES, i));
78        }
79
80        // Line 1
81        for i in 32..59 {
82            assert_eq!(1, char_to_line_idx(TEXT_LINES, i));
83        }
84
85        // Line 2
86        for i in 59..88 {
87            assert_eq!(2, char_to_line_idx(TEXT_LINES, i));
88        }
89
90        // Line 3
91        for i in 88..100 {
92            assert_eq!(3, char_to_line_idx(TEXT_LINES, i));
93        }
94
95        // Past the end
96        for i in 100..110 {
97            assert_eq!(3, char_to_line_idx(TEXT_LINES, i));
98        }
99    }
100
101    #[test]
102    fn line_to_char_idx_01() {
103        let text = "Hello せ\nか\nい!";
104        assert_eq!(0, line_to_char_idx(text, 0));
105        assert_eq!(8, line_to_char_idx(text, 1));
106        assert_eq!(10, line_to_char_idx(text, 2));
107    }
108
109    #[test]
110    fn line_to_char_idx_02() {
111        assert_eq!(0, line_to_char_idx(TEXT_LINES, 0));
112        assert_eq!(32, line_to_char_idx(TEXT_LINES, 1));
113        assert_eq!(59, line_to_char_idx(TEXT_LINES, 2));
114        assert_eq!(88, line_to_char_idx(TEXT_LINES, 3));
115
116        // Past end
117        assert_eq!(100, line_to_char_idx(TEXT_LINES, 4));
118        assert_eq!(100, line_to_char_idx(TEXT_LINES, 5));
119        assert_eq!(100, line_to_char_idx(TEXT_LINES, 6));
120    }
121
122    #[test]
123    fn line_char_round_trip() {
124        let text = "\nHere\nare\nsome\nwords\n";
125        assert_eq!(6, line_to_char_idx(text, char_to_line_idx(text, 6)));
126        assert_eq!(2, char_to_line_idx(text, line_to_char_idx(text, 2)));
127
128        assert_eq!(0, line_to_char_idx(text, char_to_line_idx(text, 0)));
129        assert_eq!(0, char_to_line_idx(text, line_to_char_idx(text, 0)));
130
131        assert_eq!(21, line_to_char_idx(text, char_to_line_idx(text, 21)));
132        assert_eq!(5, char_to_line_idx(text, line_to_char_idx(text, 5)));
133    }
134}