1#![no_std]
23//! Count and convert between different indexing schemes on utf8 string
4//! slices.
5//!
6//! This crate is organized into modules by indexing scheme. Each module
7//! contains functions for counting relevant metrics for that scheme as
8//! well as functions for converting to/from byte indices.
9//!
10//! None of the functions in this crate panic: all inputs have a defined
11//! output.
1213mod byte_chunk;
14pub mod chars;
15pub mod lines;
16pub mod lines_crlf;
17pub mod lines_lf;
18pub mod utf16;
1920/// Returns the alignment difference between the start of `bytes` and the
21/// type `T`.
22///
23/// Or put differently: returns how many bytes into `bytes` you need to walk
24/// to reach the alignment of `T` in memory.
25///
26/// Will return 0 if already aligned at the start, and will return the length
27/// of `bytes` if alignment is beyond the end of `bytes`.
28#[inline(always)]
29fn alignment_diff<T>(bytes: &[u8]) -> usize {
30let alignment = core::mem::align_of::<T>();
31let ptr = bytes.as_ptr() as usize;
32 (alignment - ((ptr - 1) & (alignment - 1)) - 1).min(bytes.len())
33}
3435/// Utility function used in some of the lines modules.
36#[inline(always)]
37fn is_not_crlf_middle(byte_idx: usize, text: &[u8]) -> bool {
38 byte_idx == 0
39|| byte_idx >= text.len()
40 || (text[byte_idx - 1] != 0x0D)
41 || (text[byte_idx] != 0x0A)
42}
4344//======================================================================
4546#[cfg(test)]
47mod tests {
48use super::*;
4950// 124 bytes, 100 chars, 4 lines
51const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
52 a fine day, isn't it?\nAren't you glad \
53 we're alive?\nこんにちは、みんなさん!";
5455fn char_to_line_idx(text: &str, idx: usize) -> usize {
56 lines::from_byte_idx(text, chars::to_byte_idx(text, idx))
57 }
5859fn line_to_char_idx(text: &str, idx: usize) -> usize {
60 chars::from_byte_idx(text, lines::to_byte_idx(text, idx))
61 }
6263#[test]
64fn char_to_line_idx_01() {
65let text = "Hello せ\nか\nい!";
66assert_eq!(0, char_to_line_idx(text, 0));
67assert_eq!(0, char_to_line_idx(text, 7));
68assert_eq!(1, char_to_line_idx(text, 8));
69assert_eq!(1, char_to_line_idx(text, 9));
70assert_eq!(2, char_to_line_idx(text, 10));
71 }
7273#[test]
74fn char_to_line_idx_02() {
75// Line 0
76for i in 0..32 {
77assert_eq!(0, char_to_line_idx(TEXT_LINES, i));
78 }
7980// Line 1
81for i in 32..59 {
82assert_eq!(1, char_to_line_idx(TEXT_LINES, i));
83 }
8485// Line 2
86for i in 59..88 {
87assert_eq!(2, char_to_line_idx(TEXT_LINES, i));
88 }
8990// Line 3
91for i in 88..100 {
92assert_eq!(3, char_to_line_idx(TEXT_LINES, i));
93 }
9495// Past the end
96for i in 100..110 {
97assert_eq!(3, char_to_line_idx(TEXT_LINES, i));
98 }
99 }
100101#[test]
102fn line_to_char_idx_01() {
103let text = "Hello せ\nか\nい!";
104assert_eq!(0, line_to_char_idx(text, 0));
105assert_eq!(8, line_to_char_idx(text, 1));
106assert_eq!(10, line_to_char_idx(text, 2));
107 }
108109#[test]
110fn line_to_char_idx_02() {
111assert_eq!(0, line_to_char_idx(TEXT_LINES, 0));
112assert_eq!(32, line_to_char_idx(TEXT_LINES, 1));
113assert_eq!(59, line_to_char_idx(TEXT_LINES, 2));
114assert_eq!(88, line_to_char_idx(TEXT_LINES, 3));
115116// Past end
117assert_eq!(100, line_to_char_idx(TEXT_LINES, 4));
118assert_eq!(100, line_to_char_idx(TEXT_LINES, 5));
119assert_eq!(100, line_to_char_idx(TEXT_LINES, 6));
120 }
121122#[test]
123fn line_char_round_trip() {
124let text = "\nHere\nare\nsome\nwords\n";
125assert_eq!(6, line_to_char_idx(text, char_to_line_idx(text, 6)));
126assert_eq!(2, char_to_line_idx(text, line_to_char_idx(text, 2)));
127128assert_eq!(0, line_to_char_idx(text, char_to_line_idx(text, 0)));
129assert_eq!(0, char_to_line_idx(text, line_to_char_idx(text, 0)));
130131assert_eq!(21, line_to_char_idx(text, char_to_line_idx(text, 21)));
132assert_eq!(5, char_to_line_idx(text, line_to_char_idx(text, 5)));
133 }
134}