1use crate::byte_chunk::{ByteChunk, Chunk};
13
14#[inline]
18pub fn count_breaks(text: &str) -> usize {
19 count_breaks_impl::<Chunk>(text.as_bytes())
20}
21
22#[inline]
33pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
34 let i = byte_idx.min(text.len());
35 let nl_count = count_breaks_impl::<Chunk>(&text.as_bytes()[..i]);
36 if crate::is_not_crlf_middle(i, text.as_bytes()) {
37 nl_count
38 } else {
39 nl_count - 1
40 }
41}
42
43#[inline]
53pub fn to_byte_idx(text: &str, line_idx: usize) -> usize {
54 to_byte_idx_impl::<Chunk>(text.as_bytes(), line_idx)
55}
56
57const LF: u8 = b'\n';
59const CR: u8 = b'\r';
60
61#[inline(always)]
62fn to_byte_idx_impl<T: ByteChunk>(text: &[u8], line_idx: usize) -> usize {
63 let (start, middle, _) = unsafe { text.align_to::<T>() };
68
69 let mut byte_count = 0;
70 let mut break_count = 0;
71
72 let mut last_was_cr = false;
74 for byte in start.iter().copied() {
75 let is_lf = byte == LF;
76 let is_cr = byte == CR;
77 if break_count == line_idx {
78 if last_was_cr && is_lf {
79 byte_count += 1;
80 }
81 return byte_count;
82 }
83 if is_cr || (is_lf && !last_was_cr) {
84 break_count += 1;
85 }
86 last_was_cr = is_cr;
87 byte_count += 1;
88 }
89
90 let mut chunk_count = 0;
92 let mut prev = T::splat(last_was_cr as u8);
93 for chunks in middle.chunks_exact(2) {
94 let lf_flags0 = chunks[0].cmp_eq_byte(LF);
95 let cr_flags0 = chunks[0].cmp_eq_byte(CR);
96 let crlf_flags0 = prev.shift_across(cr_flags0).bitand(lf_flags0);
97
98 let lf_flags1 = chunks[1].cmp_eq_byte(LF);
99 let cr_flags1 = chunks[1].cmp_eq_byte(CR);
100 let crlf_flags1 = cr_flags0.shift_across(cr_flags1).bitand(lf_flags1);
101 let new_break_count = break_count
102 + lf_flags0
103 .add(cr_flags0)
104 .add(lf_flags1)
105 .add(cr_flags1)
106 .sub(crlf_flags0)
107 .sub(crlf_flags1)
108 .sum_bytes();
109 if new_break_count >= line_idx {
110 break;
111 }
112 break_count = new_break_count;
113 byte_count += T::SIZE * 2;
114 chunk_count += 2;
115 prev = cr_flags1;
116 }
117
118 for chunk in middle[chunk_count..].iter() {
120 let lf_flags = chunk.cmp_eq_byte(LF);
121 let cr_flags = chunk.cmp_eq_byte(CR);
122 let crlf_flags = prev.shift_across(cr_flags).bitand(lf_flags);
123 let new_break_count = break_count + lf_flags.add(cr_flags).sub(crlf_flags).sum_bytes();
124 if new_break_count >= line_idx {
125 break;
126 }
127 break_count = new_break_count;
128 byte_count += T::SIZE;
129 prev = cr_flags;
130 }
131
132 last_was_cr = text.get(byte_count.saturating_sub(1)) == Some(&CR);
134 for byte in text[byte_count..].iter().copied() {
135 let is_lf = byte == LF;
136 let is_cr = byte == CR;
137 if break_count == line_idx {
138 if last_was_cr && is_lf {
139 byte_count += 1;
140 }
141 break;
142 }
143 if is_cr || (is_lf && !last_was_cr) {
144 break_count += 1;
145 }
146 last_was_cr = is_cr;
147 byte_count += 1;
148 }
149
150 byte_count
152}
153
154#[inline(always)]
160fn count_breaks_impl<T: ByteChunk>(text: &[u8]) -> usize {
161 let (start, middle, end) = unsafe { text.align_to::<T>() };
163
164 let mut count = 0;
165
166 let mut last_was_cr = false;
168 for byte in start.iter().copied() {
169 let is_lf = byte == LF;
170 let is_cr = byte == CR;
171 count += (is_cr | (is_lf & !last_was_cr)) as usize;
172 last_was_cr = is_cr;
173 }
174
175 let mut prev = T::splat(last_was_cr as u8);
177 for chunks in middle.chunks_exact(2) {
178 let lf_flags0 = chunks[0].cmp_eq_byte(LF);
179 let cr_flags0 = chunks[0].cmp_eq_byte(CR);
180 let crlf_flags0 = prev.shift_across(cr_flags0).bitand(lf_flags0);
181
182 let lf_flags1 = chunks[1].cmp_eq_byte(LF);
183 let cr_flags1 = chunks[1].cmp_eq_byte(CR);
184 let crlf_flags1 = cr_flags0.shift_across(cr_flags1).bitand(lf_flags1);
185 count += lf_flags0
186 .add(cr_flags0)
187 .sub(crlf_flags0)
188 .add(lf_flags1)
189 .add(cr_flags1)
190 .sub(crlf_flags1)
191 .sum_bytes();
192 prev = cr_flags1;
193 }
194
195 if let Some(chunk) = middle.chunks_exact(2).remainder().iter().next() {
196 let lf_flags = chunk.cmp_eq_byte(LF);
197 let cr_flags = chunk.cmp_eq_byte(CR);
198 let crlf_flags = prev.shift_across(cr_flags).bitand(lf_flags);
199 count += lf_flags.add(cr_flags).sub(crlf_flags).sum_bytes();
200 }
201
202 last_was_cr = text.get((text.len() - end.len()).saturating_sub(1)) == Some(&CR);
204 for byte in end.iter().copied() {
205 let is_lf = byte == LF;
206 let is_cr = byte == CR;
207 count += (is_cr | (is_lf & !last_was_cr)) as usize;
208 last_was_cr = is_cr;
209 }
210
211 count
212}
213
214#[cfg(test)]
217mod tests {
218 use super::*;
219
220 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
222 a fine day, isn't it?\nAren't you glad \
223 we're alive?\nこんにちは、みんなさん!";
224
225 #[test]
226 fn count_breaks_01() {
227 let text = "\u{000A}Hello\u{000D}\u{000A}せ\u{000B}か\u{000C}い\u{0085}. \
228 There\u{000A}is something.\u{2029}";
229 assert_eq!(45, text.len());
230 assert_eq!(3, count_breaks(text));
231 }
232
233 #[test]
234 fn from_byte_idx_01() {
235 let text = "Here\nare\nsome\nwords";
236 assert_eq!(0, from_byte_idx(text, 0));
237 assert_eq!(0, from_byte_idx(text, 4));
238 assert_eq!(1, from_byte_idx(text, 5));
239 assert_eq!(1, from_byte_idx(text, 8));
240 assert_eq!(2, from_byte_idx(text, 9));
241 assert_eq!(2, from_byte_idx(text, 13));
242 assert_eq!(3, from_byte_idx(text, 14));
243 assert_eq!(3, from_byte_idx(text, 19));
244 }
245
246 #[test]
247 fn from_byte_idx_02() {
248 let text = "\nHere\nare\nsome\nwords\n";
249 assert_eq!(0, from_byte_idx(text, 0));
250 assert_eq!(1, from_byte_idx(text, 1));
251 assert_eq!(1, from_byte_idx(text, 5));
252 assert_eq!(2, from_byte_idx(text, 6));
253 assert_eq!(2, from_byte_idx(text, 9));
254 assert_eq!(3, from_byte_idx(text, 10));
255 assert_eq!(3, from_byte_idx(text, 14));
256 assert_eq!(4, from_byte_idx(text, 15));
257 assert_eq!(4, from_byte_idx(text, 20));
258 assert_eq!(5, from_byte_idx(text, 21));
259 }
260
261 #[test]
262 fn from_byte_idx_03() {
263 let text = "Here\r\nare\r\nsome\r\nwords";
264 assert_eq!(0, from_byte_idx(text, 0));
265 assert_eq!(0, from_byte_idx(text, 4));
266 assert_eq!(0, from_byte_idx(text, 5));
267 assert_eq!(1, from_byte_idx(text, 6));
268 assert_eq!(1, from_byte_idx(text, 9));
269 assert_eq!(1, from_byte_idx(text, 10));
270 assert_eq!(2, from_byte_idx(text, 11));
271 assert_eq!(2, from_byte_idx(text, 15));
272 assert_eq!(2, from_byte_idx(text, 16));
273 assert_eq!(3, from_byte_idx(text, 17));
274 }
275
276 #[test]
277 fn from_byte_idx_04() {
278 for i in 0..32 {
280 assert_eq!(0, from_byte_idx(TEXT_LINES, i));
281 }
282
283 for i in 32..59 {
285 assert_eq!(1, from_byte_idx(TEXT_LINES, i));
286 }
287
288 for i in 59..88 {
290 assert_eq!(2, from_byte_idx(TEXT_LINES, i));
291 }
292
293 for i in 88..125 {
295 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
296 }
297
298 for i in 125..130 {
300 assert_eq!(3, from_byte_idx(TEXT_LINES, i));
301 }
302 }
303
304 #[test]
305 fn to_byte_idx_01() {
306 let text = "Here\r\nare\r\nsome\r\nwords";
307 assert_eq!(0, to_byte_idx(text, 0));
308 assert_eq!(6, to_byte_idx(text, 1));
309 assert_eq!(11, to_byte_idx(text, 2));
310 assert_eq!(17, to_byte_idx(text, 3));
311 }
312
313 #[test]
314 fn to_byte_idx_02() {
315 let text = "\nHere\nare\nsome\nwords\n";
316 assert_eq!(0, to_byte_idx(text, 0));
317 assert_eq!(1, to_byte_idx(text, 1));
318 assert_eq!(6, to_byte_idx(text, 2));
319 assert_eq!(10, to_byte_idx(text, 3));
320 assert_eq!(15, to_byte_idx(text, 4));
321 assert_eq!(21, to_byte_idx(text, 5));
322 }
323
324 #[test]
325 fn to_byte_idx_03() {
326 assert_eq!(0, to_byte_idx(TEXT_LINES, 0));
327 assert_eq!(32, to_byte_idx(TEXT_LINES, 1));
328 assert_eq!(59, to_byte_idx(TEXT_LINES, 2));
329 assert_eq!(88, to_byte_idx(TEXT_LINES, 3));
330
331 assert_eq!(124, to_byte_idx(TEXT_LINES, 4));
333 assert_eq!(124, to_byte_idx(TEXT_LINES, 5));
334 assert_eq!(124, to_byte_idx(TEXT_LINES, 6));
335 }
336
337 #[test]
338 fn line_byte_round_trip() {
339 let text = "\nHere\nare\nsome\nwords\n";
340 assert_eq!(6, to_byte_idx(text, from_byte_idx(text, 6)));
341 assert_eq!(2, from_byte_idx(text, to_byte_idx(text, 2)));
342
343 assert_eq!(0, to_byte_idx(text, from_byte_idx(text, 0)));
344 assert_eq!(0, from_byte_idx(text, to_byte_idx(text, 0)));
345
346 assert_eq!(21, to_byte_idx(text, from_byte_idx(text, 21)));
347 assert_eq!(5, from_byte_idx(text, to_byte_idx(text, 5)));
348 }
349}