1use crate::alignment_diff;
16use crate::byte_chunk::{ByteChunk, Chunk};
17
18#[inline]
22pub fn count_breaks(text: &str) -> usize {
23 count_breaks_impl::<Chunk>(text.as_bytes())
24}
25
26#[inline]
37pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
38 let mut i = byte_idx.min(text.len());
39 while !text.is_char_boundary(i) {
40 i -= 1;
41 }
42 let nl_count = count_breaks_impl::<Chunk>(&text.as_bytes()[..i]);
43 if crate::is_not_crlf_middle(i, text.as_bytes()) {
44 nl_count
45 } else {
46 nl_count - 1
47 }
48}
49
50#[inline]
60pub fn to_byte_idx(text: &str, line_idx: usize) -> usize {
61 to_byte_idx_impl::<Chunk>(text, line_idx)
62}
63
64#[inline(always)]
67fn to_byte_idx_impl<T: ByteChunk>(text: &str, line_idx: usize) -> usize {
68 let mut bytes = text.as_bytes();
69 let mut line_break_count = 0;
70
71 let aligned_idx = alignment_diff::<T>(bytes);
73 if aligned_idx > 0 {
74 let result = count_breaks_up_to(bytes, aligned_idx, line_idx);
75 line_break_count += result.0;
76 bytes = &bytes[result.1..];
77 }
78
79 if alignment_diff::<T>(bytes) == 0 {
81 while bytes.len() >= T::SIZE {
82 let tmp = unsafe { count_breaks_in_chunk_from_ptr::<T>(bytes) }.sum_bytes();
84 if tmp + line_break_count >= line_idx {
85 break;
86 }
87 line_break_count += tmp;
88
89 bytes = &bytes[T::SIZE..];
90 }
91 }
92
93 let result = count_breaks_up_to(bytes, bytes.len(), line_idx - line_break_count);
95 bytes = &bytes[result.1..];
96
97 let mut byte_idx = text.len() - bytes.len();
99 while !text.is_char_boundary(byte_idx) {
100 byte_idx += 1;
101 }
102 byte_idx
103}
104
105#[inline(always)]
117fn count_breaks_impl<T: ByteChunk>(text: &[u8]) -> usize {
118 let mut bytes = text;
119 let mut count = 0;
120
121 let aligned_idx = alignment_diff::<T>(bytes);
123 if aligned_idx > 0 {
124 let result = count_breaks_up_to(bytes, aligned_idx, bytes.len());
125 count += result.0;
126 bytes = &bytes[result.1..];
127 }
128
129 let mut i = 0;
131 let mut acc = T::zero();
132 while bytes.len() >= T::SIZE {
133 acc = acc.add(unsafe { count_breaks_in_chunk_from_ptr::<T>(bytes) });
135 i += 1;
136 if i == T::MAX_ACC {
137 i = 0;
138 count += acc.sum_bytes();
139 acc = T::zero();
140 }
141 bytes = &bytes[T::SIZE..];
142 }
143 count += acc.sum_bytes();
144
145 count += count_breaks_up_to(bytes, bytes.len(), bytes.len()).0;
147
148 count
149}
150
151#[inline(always)]
156#[allow(clippy::if_same_then_else)]
157fn count_breaks_up_to(bytes: &[u8], max_bytes: usize, max_breaks: usize) -> (usize, usize) {
158 let mut ptr = 0;
159 let mut count = 0;
160 while ptr < max_bytes && count < max_breaks {
161 let byte = bytes[ptr];
162
163 if (0x0A..=0x0D).contains(&byte) {
165 count += 1;
166
167 if byte == 0x0D && (ptr + 1) < bytes.len() && bytes[ptr + 1] == 0x0A {
171 count -= 1;
172 }
173 }
174 else if byte == 0xC2 && (ptr + 1) < bytes.len() && bytes[ptr + 1] == 0x85 {
176 count += 1;
177 }
178 else if byte == 0xE2
180 && (ptr + 2) < bytes.len()
181 && bytes[ptr + 1] == 0x80
182 && (bytes[ptr + 2] >> 1) == 0x54
183 {
184 count += 1;
185 }
186
187 ptr += 1;
188 }
189
190 (count, ptr)
191}
192
193#[inline(always)]
199unsafe fn count_breaks_in_chunk_from_ptr<T: ByteChunk>(bytes: &[u8]) -> T {
200 let c = {
201 debug_assert_eq!(bytes.align_to::<T>().0.len(), 0);
203 debug_assert!(bytes.len() >= T::SIZE);
204 *(bytes.as_ptr() as *const T)
207 };
208 let end_i = T::SIZE;
209
210 let mut acc = T::zero();
211
212 let nl_1_flags = c.cmp_eq_byte(0xC2);
214 let sp_1_flags = c.cmp_eq_byte(0xE2);
215 let all_flags = c.bytes_between_127(0x09, 0x0E);
216 let cr_flags = c.cmp_eq_byte(0x0D);
217
218 if !nl_1_flags.is_zero() {
220 let nl_2_flags = c.cmp_eq_byte(0x85).shift_back_lex(1);
221 let flags = nl_1_flags.bitand(nl_2_flags);
222 acc = acc.add(flags);
223
224 if bytes.len() > end_i && bytes[end_i - 1] == 0xC2 && bytes[end_i] == 0x85 {
226 acc = acc.inc_nth_from_end_lex_byte(0);
227 }
228 }
229
230 if !sp_1_flags.is_zero() {
233 let sp_2_flags = c.cmp_eq_byte(0x80).shift_back_lex(1).bitand(sp_1_flags);
234 if !sp_2_flags.is_zero() {
235 let sp_3_flags = c
236 .shr(1)
237 .bitand(T::splat(!0x80))
238 .cmp_eq_byte(0x54)
239 .shift_back_lex(2);
240 let sp_flags = sp_2_flags.bitand(sp_3_flags);
241 acc = acc.add(sp_flags);
242 }
243
244 if bytes.len() > end_i
246 && bytes[end_i - 2] == 0xE2
247 && bytes[end_i - 1] == 0x80
248 && (bytes[end_i] >> 1) == 0x54
249 {
250 acc = acc.inc_nth_from_end_lex_byte(1);
251 } else if bytes.len() > (end_i + 1)
252 && bytes[end_i - 1] == 0xE2
253 && bytes[end_i] == 0x80
254 && (bytes[end_i + 1] >> 1) == 0x54
255 {
256 acc = acc.inc_nth_from_end_lex_byte(0);
257 }
258 }
259
260 acc = acc.add(all_flags);
266 if !cr_flags.is_zero() {
267 let lf_flags = c.cmp_eq_byte(0x0A);
269 let crlf_flags = cr_flags.bitand(lf_flags.shift_back_lex(1));
270 acc = acc.sub(crlf_flags);
271 if bytes.len() > end_i && bytes[end_i - 1] == 0x0D && bytes[end_i] == 0x0A {
272 acc = acc.dec_last_lex_byte();
273 }
274 }
275
276 acc
277}
278
279#[cfg(test)]
282mod tests {
283 use super::*;
284
285 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
287 a fine day, isn't it?\nAren't you glad \
288 we're alive?\nこんにちは、みんなさん!";
289
290 #[test]
291 fn count_breaks_01() {
292 let text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
293 There\u{2028}is something.\u{2029}";
294 assert_eq!(48, text.len());
295 assert_eq!(8, count_breaks(text));
296 }
297
298 #[test]
299 fn from_byte_idx_01() {
300 let text = "Here\nare\nsome\nwords";
301 assert_eq!(0, from_byte_idx(text, 0));
302 assert_eq!(0, from_byte_idx(text, 4));
303 assert_eq!(1, from_byte_idx(text, 5));
304 assert_eq!(1, from_byte_idx(text, 8));
305 assert_eq!(2, from_byte_idx(text, 9));
306 assert_eq!(2, from_byte_idx(text, 13));
307 assert_eq!(3, from_byte_idx(text, 14));
308 assert_eq!(3, from_byte_idx(text, 19));
309 }
310
311 #[test]
312 fn from_byte_idx_02() {
313 let text = "\nHere\nare\nsome\nwords\n";
314 assert_eq!(0, from_byte_idx(text, 0));
315 assert_eq!(1, from_byte_idx(text, 1));
316 assert_eq!(1, from_byte_idx(text, 5));
317 assert_eq!(2, from_byte_idx(text, 6));
318 assert_eq!(2, from_byte_idx(text, 9));
319 assert_eq!(3, from_byte_idx(text, 10));
320 assert_eq!(3, from_byte_idx(text, 14));
321 assert_eq!(4, from_byte_idx(text, 15));
322 assert_eq!(4, from_byte_idx(text, 20));
323 assert_eq!(5, from_byte_idx(text, 21));
324 }
325
326 #[test]
327 fn from_byte_idx_03() {
328 let text = "Here\r\nare\r\nsome\r\nwords";
329 assert_eq!(0, from_byte_idx(text, 0));
330 assert_eq!(0, from_byte_idx(text, 4));
331 assert_eq!(0, from_byte_idx(text, 5));
332 assert_eq!(1, from_byte_idx(text, 6));
333 assert_eq!(1, from_byte_idx(text, 9));
334 assert_eq!(1, from_byte_idx(text, 10));
335 assert_eq!(2, from_byte_idx(text, 11));
336 assert_eq!(2, from_byte_idx(text, 15));
337 assert_eq!(2, from_byte_idx(text, 16));
338 assert_eq!(3, from_byte_idx(text, 17));
339 }
340
341 #[test]
342 fn from_byte_idx_04() {
343 for i in 0..32 {
345 assert_eq!(0, from_byte_idx(TEXT_LINES, i));
346 }
347
348 for i in 32..59 {
350 assert_eq!(1, from_byte_idx(TEXT_LINES, i));
351 }
352
353 for i in 59..88 {
355 assert_eq!(2, from_byte_idx(TEXT_LINES, i));
356 }
357
358 for i in 88..125 {
360 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
361 }
362
363 for i in 125..130 {
365 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
366 }
367 }
368
369 #[test]
370 fn to_byte_idx_01() {
371 let text = "Here\r\nare\r\nsome\r\nwords";
372 assert_eq!(0, to_byte_idx(text, 0));
373 assert_eq!(6, to_byte_idx(text, 1));
374 assert_eq!(11, to_byte_idx(text, 2));
375 assert_eq!(17, to_byte_idx(text, 3));
376 }
377
378 #[test]
379 fn to_byte_idx_02() {
380 let text = "\nHere\nare\nsome\nwords\n";
381 assert_eq!(0, to_byte_idx(text, 0));
382 assert_eq!(1, to_byte_idx(text, 1));
383 assert_eq!(6, to_byte_idx(text, 2));
384 assert_eq!(10, to_byte_idx(text, 3));
385 assert_eq!(15, to_byte_idx(text, 4));
386 assert_eq!(21, to_byte_idx(text, 5));
387 }
388
389 #[test]
390 fn to_byte_idx_03() {
391 assert_eq!(0, to_byte_idx(TEXT_LINES, 0));
392 assert_eq!(32, to_byte_idx(TEXT_LINES, 1));
393 assert_eq!(59, to_byte_idx(TEXT_LINES, 2));
394 assert_eq!(88, to_byte_idx(TEXT_LINES, 3));
395
396 assert_eq!(124, to_byte_idx(TEXT_LINES, 4));
398 assert_eq!(124, to_byte_idx(TEXT_LINES, 5));
399 assert_eq!(124, to_byte_idx(TEXT_LINES, 6));
400 }
401
402 #[test]
403 fn line_byte_round_trip() {
404 let text = "\nHere\nare\nsome\nwords\n";
405 assert_eq!(6, to_byte_idx(text, from_byte_idx(text, 6)));
406 assert_eq!(2, from_byte_idx(text, to_byte_idx(text, 2)));
407
408 assert_eq!(0, to_byte_idx(text, from_byte_idx(text, 0)));
409 assert_eq!(0, from_byte_idx(text, to_byte_idx(text, 0)));
410
411 assert_eq!(21, to_byte_idx(text, from_byte_idx(text, 21)));
412 assert_eq!(5, from_byte_idx(text, to_byte_idx(text, 5)));
413 }
414}