1use crate::byte_chunk::{ByteChunk, Chunk};
10
11#[inline]
15pub fn count_breaks(text: &str) -> usize {
16 count_breaks_impl::<Chunk>(text.as_bytes())
17}
18
19#[inline]
30pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
31 let i = byte_idx.min(text.len());
32 count_breaks_impl::<Chunk>(&text.as_bytes()[..i])
33}
34
35#[inline]
45pub fn to_byte_idx(text: &str, line_idx: usize) -> usize {
46 to_byte_idx_impl::<Chunk>(text.as_bytes(), line_idx)
47}
48
49#[inline(always)]
52fn to_byte_idx_impl<T: ByteChunk>(text: &[u8], line_idx: usize) -> usize {
53 let mut byte_count = 0;
54 let mut lf_count = 0;
55
56 let (start, middle, _) = unsafe { text.align_to::<T>() };
61
62 for byte in start.iter() {
64 if lf_count == line_idx {
65 return byte_count;
66 }
67 if *byte == 0x0A {
68 lf_count += 1;
69 }
70 byte_count += 1;
71 }
72
73 let mut chunk_count = 0;
75 for chunks in middle.chunks_exact(4) {
76 let val1 = chunks[0].cmp_eq_byte(0x0A);
77 let val2 = chunks[1].cmp_eq_byte(0x0A);
78 let val3 = chunks[2].cmp_eq_byte(0x0A);
79 let val4 = chunks[3].cmp_eq_byte(0x0A);
80 let new_lf_count = lf_count + val1.add(val2).add(val3.add(val4)).sum_bytes();
81 if new_lf_count >= line_idx {
82 break;
83 }
84 lf_count = new_lf_count;
85 byte_count += T::SIZE * 4;
86 chunk_count += 4;
87 }
88
89 for chunk in middle[chunk_count..].iter() {
91 let new_lf_count = lf_count + chunk.cmp_eq_byte(0x0A).sum_bytes();
92 if new_lf_count >= line_idx {
93 break;
94 }
95 lf_count = new_lf_count;
96 byte_count += T::SIZE;
97 }
98
99 for byte in &text[byte_count..] {
101 if lf_count == line_idx {
102 break;
103 }
104 lf_count += (*byte == 0x0A) as usize;
105 byte_count += 1;
106 }
107
108 byte_count
109}
110
111#[inline(always)]
116fn count_breaks_impl<T: ByteChunk>(text: &[u8]) -> usize {
117 if text.len() < T::SIZE {
118 text.iter().map(|byte| (*byte == 0x0A) as usize).sum()
121 } else {
122 let (start, middle, end) = unsafe { text.align_to::<T>() };
124
125 let mut count = 0;
126
127 count += start
129 .iter()
130 .map(|byte| (*byte == 0x0A) as usize)
131 .sum::<usize>();
132
133 for chunks in middle.chunks_exact(4) {
135 let val1 = chunks[0].cmp_eq_byte(0x0A);
136 let val2 = chunks[1].cmp_eq_byte(0x0A);
137 let val3 = chunks[2].cmp_eq_byte(0x0A);
138 let val4 = chunks[3].cmp_eq_byte(0x0A);
139 count += val1.add(val2).add(val3.add(val4)).sum_bytes();
140 }
141
142 let mut acc = T::zero();
144 for chunk in middle.chunks_exact(4).remainder() {
145 acc = acc.add(chunk.cmp_eq_byte(0x0A));
146 }
147 count += acc.sum_bytes();
148
149 count
151 + end
152 .iter()
153 .map(|byte| (*byte == 0x0A) as usize)
154 .sum::<usize>()
155 }
156}
157
158#[cfg(test)]
161mod tests {
162 use super::*;
163
164 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
166 a fine day, isn't it?\nAren't you glad \
167 we're alive?\nこんにちは、みんなさん!";
168
169 #[test]
170 fn count_breaks_01() {
171 let text = "\nHello\u{000D}\nせ\u{000B}か\u{000C}い\u{0085}. \
172 There\nis something.\u{2029}";
173 assert_eq!(45, text.len());
174 assert_eq!(3, count_breaks(text));
175 }
176
177 #[test]
178 fn from_byte_idx_01() {
179 let text = "Here\nare\nsome\nwords";
180 assert_eq!(0, from_byte_idx(text, 0));
181 assert_eq!(0, from_byte_idx(text, 4));
182 assert_eq!(1, from_byte_idx(text, 5));
183 assert_eq!(1, from_byte_idx(text, 8));
184 assert_eq!(2, from_byte_idx(text, 9));
185 assert_eq!(2, from_byte_idx(text, 13));
186 assert_eq!(3, from_byte_idx(text, 14));
187 assert_eq!(3, from_byte_idx(text, 19));
188 }
189
190 #[test]
191 fn from_byte_idx_02() {
192 let text = "\nHere\nare\nsome\nwords\n";
193 assert_eq!(0, from_byte_idx(text, 0));
194 assert_eq!(1, from_byte_idx(text, 1));
195 assert_eq!(1, from_byte_idx(text, 5));
196 assert_eq!(2, from_byte_idx(text, 6));
197 assert_eq!(2, from_byte_idx(text, 9));
198 assert_eq!(3, from_byte_idx(text, 10));
199 assert_eq!(3, from_byte_idx(text, 14));
200 assert_eq!(4, from_byte_idx(text, 15));
201 assert_eq!(4, from_byte_idx(text, 20));
202 assert_eq!(5, from_byte_idx(text, 21));
203 }
204
205 #[test]
206 fn from_byte_idx_03() {
207 let text = "Here\r\nare\r\nsome\r\nwords";
208 assert_eq!(0, from_byte_idx(text, 0));
209 assert_eq!(0, from_byte_idx(text, 4));
210 assert_eq!(0, from_byte_idx(text, 5));
211 assert_eq!(1, from_byte_idx(text, 6));
212 assert_eq!(1, from_byte_idx(text, 9));
213 assert_eq!(1, from_byte_idx(text, 10));
214 assert_eq!(2, from_byte_idx(text, 11));
215 assert_eq!(2, from_byte_idx(text, 15));
216 assert_eq!(2, from_byte_idx(text, 16));
217 assert_eq!(3, from_byte_idx(text, 17));
218 }
219
220 #[test]
221 fn from_byte_idx_04() {
222 for i in 0..32 {
224 assert_eq!(0, from_byte_idx(TEXT_LINES, i));
225 }
226
227 for i in 32..59 {
229 assert_eq!(1, from_byte_idx(TEXT_LINES, i));
230 }
231
232 for i in 59..88 {
234 assert_eq!(2, from_byte_idx(TEXT_LINES, i));
235 }
236
237 for i in 88..125 {
239 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
240 }
241
242 for i in 125..130 {
244 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
245 }
246 }
247
248 #[test]
249 fn to_byte_idx_01() {
250 let text = "Here\r\nare\r\nsome\r\nwords";
251 assert_eq!(0, to_byte_idx(text, 0));
252 assert_eq!(6, to_byte_idx(text, 1));
253 assert_eq!(11, to_byte_idx(text, 2));
254 assert_eq!(17, to_byte_idx(text, 3));
255 }
256
257 #[test]
258 fn to_byte_idx_02() {
259 let text = "\nHere\nare\nsome\nwords\n";
260 assert_eq!(0, to_byte_idx(text, 0));
261 assert_eq!(1, to_byte_idx(text, 1));
262 assert_eq!(6, to_byte_idx(text, 2));
263 assert_eq!(10, to_byte_idx(text, 3));
264 assert_eq!(15, to_byte_idx(text, 4));
265 assert_eq!(21, to_byte_idx(text, 5));
266 }
267
268 #[test]
269 fn to_byte_idx_03() {
270 assert_eq!(0, to_byte_idx(TEXT_LINES, 0));
271 assert_eq!(32, to_byte_idx(TEXT_LINES, 1));
272 assert_eq!(59, to_byte_idx(TEXT_LINES, 2));
273 assert_eq!(88, to_byte_idx(TEXT_LINES, 3));
274
275 assert_eq!(124, to_byte_idx(TEXT_LINES, 4));
277 assert_eq!(124, to_byte_idx(TEXT_LINES, 5));
278 assert_eq!(124, to_byte_idx(TEXT_LINES, 6));
279 }
280
281 #[test]
282 fn line_byte_round_trip() {
283 let text = "\nHere\nare\nsome\nwords\n";
284 assert_eq!(6, to_byte_idx(text, from_byte_idx(text, 6)));
285 assert_eq!(2, from_byte_idx(text, to_byte_idx(text, 2)));
286
287 assert_eq!(0, to_byte_idx(text, from_byte_idx(text, 0)));
288 assert_eq!(0, from_byte_idx(text, to_byte_idx(text, 0)));
289
290 assert_eq!(21, to_byte_idx(text, from_byte_idx(text, 21)));
291 assert_eq!(5, from_byte_idx(text, to_byte_idx(text, 5)));
292 }
293}