1pub(crate) use str_indices::chars::count as count_chars;
8pub use str_indices::chars::from_byte_idx as byte_to_char_idx;
9pub use str_indices::chars::to_byte_idx as char_to_byte_idx;
10pub(crate) use str_indices::utf16::count_surrogates as count_utf16_surrogates;
11
12#[cfg(feature = "unicode_lines")]
14use str_indices::lines;
15#[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
16use str_indices::lines_crlf as lines;
17#[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
18use str_indices::lines_lf as lines;
19
20pub(crate) use self::lines::count_breaks as count_line_breaks;
21pub use self::lines::from_byte_idx as byte_to_line_idx;
22pub use self::lines::to_byte_idx as line_to_byte_idx;
23
24#[inline]
32pub fn char_to_line_idx(text: &str, char_idx: usize) -> usize {
33 lines::from_byte_idx(text, str_indices::chars::to_byte_idx(text, char_idx))
34}
35
36#[inline]
44pub fn line_to_char_idx(text: &str, line_idx: usize) -> usize {
45 str_indices::chars::from_byte_idx(text, lines::to_byte_idx(text, line_idx))
46}
47
48pub(crate) fn byte_to_utf16_surrogate_idx(text: &str, byte_idx: usize) -> usize {
51 let mut i = byte_idx;
52 while !text.is_char_boundary(i) {
53 i -= 1;
54 }
55 str_indices::utf16::count_surrogates(&text[..i])
56}
57
58pub(crate) fn utf16_code_unit_to_char_idx(text: &str, utf16_idx: usize) -> usize {
59 str_indices::chars::from_byte_idx(text, str_indices::utf16::to_byte_idx(text, utf16_idx))
60}
61
62pub(crate) fn last_line_start_byte_idx(text: &str) -> usize {
67 let mut itr = text.bytes().enumerate().rev();
68
69 while let Some((idx, byte)) = itr.next() {
70 match byte {
71 0x0A => {
72 return idx + 1;
73 }
74 0x0D => {
75 #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
76 return idx + 1;
77 }
78 0x0B | 0x0C => {
79 #[cfg(feature = "unicode_lines")]
80 return idx + 1;
81 }
82 0x85 =>
83 {
84 #[cfg(feature = "unicode_lines")]
85 if let Some((_, 0xC2)) = itr.next() {
86 return idx + 1;
87 }
88 }
89 0xA8 | 0xA9 =>
90 {
91 #[cfg(feature = "unicode_lines")]
92 if let Some((_, 0x80)) = itr.next() {
93 if let Some((_, 0xE2)) = itr.next() {
94 return idx + 1;
95 }
96 }
97 }
98 _ => {}
99 }
100 }
101
102 return 0;
103}
104
105#[inline]
109pub(crate) fn trim_line_break(text: &str) -> &str {
110 if text.is_empty() {
111 return "";
112 }
113
114 let mut i = text.len() - 1;
116 while !text.is_char_boundary(i) {
117 i -= 1;
118 }
119
120 let tail = &text[i..];
121
122 #[cfg(feature = "unicode_lines")]
124 if matches!(
125 tail,
126 "\u{000B}" | "\u{000C}" | "\u{0085}" | "\u{2028}" | "\u{2029}"
127 ) {
128 return &text[..i];
129 }
130
131 #[cfg(feature = "cr_lines")]
132 if tail == "\u{000D}" {
133 return &text[..i];
134 }
135
136 if tail == "\u{000A}" {
137 #[cfg(feature = "cr_lines")]
138 if i > 0 && text.as_bytes()[i - 1] == 0xd {
139 return &text[..(i - 1)];
140 }
141
142 return &text[..i];
143 }
144
145 return text;
146}
147
148#[inline]
150pub(crate) fn ends_with_line_break(text: &str) -> bool {
151 if text.is_empty() {
152 return false;
153 }
154
155 let mut i = text.len() - 1;
157 while !text.is_char_boundary(i) {
158 i -= 1;
159 }
160
161 #[cfg(feature = "unicode_lines")]
163 return matches!(
164 &text[i..],
165 "\u{000A}" | "\u{000B}" | "\u{000C}" | "\u{000D}" | "\u{0085}" | "\u{2028}" | "\u{2029}"
166 );
167
168 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
169 return matches!(&text[i..], "\u{000A}" | "\u{000D}");
170
171 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
172 return &text[i..] == "\u{000A}";
173}
174
175#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
182 #[test]
183 fn last_line_start_byte_idx_lf_01() {
184 assert_eq!(0, last_line_start_byte_idx(""));
185 assert_eq!(0, last_line_start_byte_idx("Hi"));
186
187 assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
188 assert_eq!(0, last_line_start_byte_idx("Hi\u{000B}there."));
189 assert_eq!(0, last_line_start_byte_idx("Hi\u{000C}there."));
190 assert_eq!(0, last_line_start_byte_idx("Hi\u{000D}there."));
191 assert_eq!(0, last_line_start_byte_idx("Hi\u{0085}there."));
192 assert_eq!(0, last_line_start_byte_idx("Hi\u{2028}there."));
193 assert_eq!(0, last_line_start_byte_idx("Hi\u{2029}there."));
194 }
195
196 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
197 #[test]
198 fn last_line_start_byte_idx_lf_02() {
199 let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
200 There\u{2028}is something.\u{2029}";
201
202 assert_eq!(48, text.len());
203 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
204 assert_eq!(8, text.len());
205 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
206 assert_eq!(1, text.len());
207 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
208 assert_eq!(0, text.len());
209 }
210
211 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
212 #[test]
213 fn last_line_start_byte_idx_crlf_01() {
214 assert_eq!(0, last_line_start_byte_idx(""));
215 assert_eq!(0, last_line_start_byte_idx("Hi"));
216
217 assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
218 assert_eq!(0, last_line_start_byte_idx("Hi\u{000B}there."));
219 assert_eq!(0, last_line_start_byte_idx("Hi\u{000C}there."));
220 assert_eq!(3, last_line_start_byte_idx("Hi\u{000D}there."));
221 assert_eq!(0, last_line_start_byte_idx("Hi\u{0085}there."));
222 assert_eq!(0, last_line_start_byte_idx("Hi\u{2028}there."));
223 assert_eq!(0, last_line_start_byte_idx("Hi\u{2029}there."));
224 }
225
226 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
227 #[test]
228 fn last_line_start_byte_idx_crlf_02() {
229 let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
230 There\u{2028}is something.\u{2029}";
231
232 assert_eq!(48, text.len());
233 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
234 assert_eq!(9, text.len());
235 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
236 assert_eq!(8, text.len());
237 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
238 assert_eq!(1, text.len());
239 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
240 assert_eq!(0, text.len());
241 }
242
243 #[cfg(feature = "unicode_lines")]
244 #[test]
245 fn last_line_start_byte_idx_unicode_01() {
246 assert_eq!(0, last_line_start_byte_idx(""));
247 assert_eq!(0, last_line_start_byte_idx("Hi"));
248
249 assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
250 assert_eq!(3, last_line_start_byte_idx("Hi\u{000B}there."));
251 assert_eq!(3, last_line_start_byte_idx("Hi\u{000C}there."));
252 assert_eq!(3, last_line_start_byte_idx("Hi\u{000D}there."));
253 assert_eq!(4, last_line_start_byte_idx("Hi\u{0085}there."));
254 assert_eq!(5, last_line_start_byte_idx("Hi\u{2028}there."));
255 assert_eq!(5, last_line_start_byte_idx("Hi\u{2029}there."));
256 }
257
258 #[cfg(feature = "unicode_lines")]
259 #[test]
260 fn last_line_start_byte_idx_unicode_02() {
261 let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
262 There\u{2028}is something.\u{2029}";
263
264 assert_eq!(48, text.len());
265 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
266 assert_eq!(32, text.len());
267 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
268 assert_eq!(22, text.len());
269 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
270 assert_eq!(17, text.len());
271 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
272 assert_eq!(13, text.len());
273 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
274 assert_eq!(9, text.len());
275 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
276 assert_eq!(8, text.len());
277 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
278 assert_eq!(1, text.len());
279 text = &text[..last_line_start_byte_idx(trim_line_break(text))];
280 assert_eq!(0, text.len());
281 }
282
283 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
284 #[test]
285 fn trim_line_break_lf_01() {
286 assert_eq!("", trim_line_break(""));
287 assert_eq!("Hi", trim_line_break("Hi"));
288
289 assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
290 assert_eq!("Hi\u{000B}", trim_line_break("Hi\u{000B}"));
291 assert_eq!("Hi\u{000C}", trim_line_break("Hi\u{000C}"));
292 assert_eq!("Hi\u{000D}", trim_line_break("Hi\u{000D}"));
293 assert_eq!("Hi\u{0085}", trim_line_break("Hi\u{0085}"));
294 assert_eq!("Hi\u{2028}", trim_line_break("Hi\u{2028}"));
295 assert_eq!("Hi\u{2029}", trim_line_break("Hi\u{2029}"));
296
297 assert_eq!("\r", trim_line_break("\r\n"));
298 assert_eq!("Hi\r", trim_line_break("Hi\r\n"));
299 }
300
301 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
302 #[test]
303 fn trim_line_break_crlf_01() {
304 assert_eq!("", trim_line_break(""));
305 assert_eq!("Hi", trim_line_break("Hi"));
306
307 assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
308 assert_eq!("Hi\u{000B}", trim_line_break("Hi\u{000B}"));
309 assert_eq!("Hi\u{000C}", trim_line_break("Hi\u{000C}"));
310 assert_eq!("Hi", trim_line_break("Hi\u{000D}"));
311 assert_eq!("Hi\u{0085}", trim_line_break("Hi\u{0085}"));
312 assert_eq!("Hi\u{2028}", trim_line_break("Hi\u{2028}"));
313 assert_eq!("Hi\u{2029}", trim_line_break("Hi\u{2029}"));
314
315 assert_eq!("", trim_line_break("\r\n"));
316 assert_eq!("Hi", trim_line_break("Hi\r\n"));
317 }
318
319 #[cfg(feature = "unicode_lines")]
320 #[test]
321 fn trim_line_break_unicode_01() {
322 assert_eq!("", trim_line_break(""));
323 assert_eq!("Hi", trim_line_break("Hi"));
324
325 assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
326 assert_eq!("Hi", trim_line_break("Hi\u{000B}"));
327 assert_eq!("Hi", trim_line_break("Hi\u{000C}"));
328 assert_eq!("Hi", trim_line_break("Hi\u{000D}"));
329 assert_eq!("Hi", trim_line_break("Hi\u{0085}"));
330 assert_eq!("Hi", trim_line_break("Hi\u{2028}"));
331 assert_eq!("Hi", trim_line_break("Hi\u{2029}"));
332
333 assert_eq!("", trim_line_break("\r\n"));
334 assert_eq!("Hi", trim_line_break("Hi\r\n"));
335 }
336
337 #[test]
338 fn ends_with_line_break_01() {
339 assert!(ends_with_line_break("\n"));
340
341 #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
342 assert!(ends_with_line_break("\r"));
343
344 #[cfg(feature = "unicode_lines")]
345 {
346 assert!(ends_with_line_break("\u{000A}"));
347 assert!(ends_with_line_break("\u{000B}"));
348 assert!(ends_with_line_break("\u{000C}"));
349 assert!(ends_with_line_break("\u{000D}"));
350 assert!(ends_with_line_break("\u{0085}"));
351 assert!(ends_with_line_break("\u{2028}"));
352 assert!(ends_with_line_break("\u{2029}"));
353 }
354 }
355
356 #[test]
357 fn ends_with_line_break_02() {
358 assert!(ends_with_line_break("Hi there!\n"));
359
360 #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
361 assert!(ends_with_line_break("Hi there!\r"));
362
363 #[cfg(feature = "unicode_lines")]
364 {
365 assert!(ends_with_line_break("Hi there!\u{000A}"));
366 assert!(ends_with_line_break("Hi there!\u{000B}"));
367 assert!(ends_with_line_break("Hi there!\u{000C}"));
368 assert!(ends_with_line_break("Hi there!\u{000D}"));
369 assert!(ends_with_line_break("Hi there!\u{0085}"));
370 assert!(ends_with_line_break("Hi there!\u{2028}"));
371 assert!(ends_with_line_break("Hi there!\u{2029}"));
372 }
373 }
374
375 #[test]
376 fn ends_with_line_break_03() {
377 assert!(!ends_with_line_break(""));
378 assert!(!ends_with_line_break("a"));
379 assert!(!ends_with_line_break("Hi there!"));
380 }
381
382 #[test]
383 fn ends_with_line_break_04() {
384 assert!(!ends_with_line_break("\na"));
385 assert!(!ends_with_line_break("\ra"));
386 assert!(!ends_with_line_break("\u{000A}a"));
387 assert!(!ends_with_line_break("\u{000B}a"));
388 assert!(!ends_with_line_break("\u{000C}a"));
389 assert!(!ends_with_line_break("\u{000D}a"));
390 assert!(!ends_with_line_break("\u{0085}a"));
391 assert!(!ends_with_line_break("\u{2028}a"));
392 assert!(!ends_with_line_break("\u{2029}a"));
393 }
394
395 #[test]
396 fn char_to_line_idx_01() {
397 let text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
398 There\u{2028}is something.\u{2029}";
399
400 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
401 {
402 assert_eq!(0, char_to_line_idx(text, 0));
403 assert_eq!(1, char_to_line_idx(text, 1));
404 assert_eq!(2, char_to_line_idx(text, 8));
405 assert_eq!(2, char_to_line_idx(text, 38));
406 }
407
408 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
409 {
410 assert_eq!(0, char_to_line_idx(text, 0));
411 assert_eq!(1, char_to_line_idx(text, 1));
412 assert_eq!(2, char_to_line_idx(text, 8));
413 assert_eq!(3, char_to_line_idx(text, 9));
414 assert_eq!(3, char_to_line_idx(text, 38));
415 }
416
417 #[cfg(feature = "unicode_lines")]
418 {
419 assert_eq!(0, char_to_line_idx(text, 0));
420 assert_eq!(1, char_to_line_idx(text, 1));
421 assert_eq!(2, char_to_line_idx(text, 8));
422 assert_eq!(3, char_to_line_idx(text, 9));
423 assert_eq!(4, char_to_line_idx(text, 11));
424 assert_eq!(5, char_to_line_idx(text, 13));
425 assert_eq!(6, char_to_line_idx(text, 15));
426 assert_eq!(7, char_to_line_idx(text, 23));
427 assert_eq!(8, char_to_line_idx(text, 37));
428 assert_eq!(8, char_to_line_idx(text, 38));
429 }
430 }
431
432 #[test]
433 fn line_to_char_idx_01() {
434 let text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
435 There\u{2028}is something.\u{2029}";
436
437 #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
438 {
439 assert_eq!(0, line_to_char_idx(text, 0));
440 assert_eq!(1, line_to_char_idx(text, 1));
441 assert_eq!(8, line_to_char_idx(text, 2));
442 assert_eq!(37, line_to_char_idx(text, 3));
443 }
444
445 #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
446 {
447 assert_eq!(0, line_to_char_idx(text, 0));
448 assert_eq!(1, line_to_char_idx(text, 1));
449 assert_eq!(8, line_to_char_idx(text, 2));
450 assert_eq!(9, line_to_char_idx(text, 3));
451 assert_eq!(37, line_to_char_idx(text, 4));
452 }
453
454 #[cfg(feature = "unicode_lines")]
455 {
456 assert_eq!(0, line_to_char_idx(text, 0));
457 assert_eq!(1, line_to_char_idx(text, 1));
458 assert_eq!(8, line_to_char_idx(text, 2));
459 assert_eq!(9, line_to_char_idx(text, 3));
460 assert_eq!(11, line_to_char_idx(text, 4));
461 assert_eq!(13, line_to_char_idx(text, 5));
462 assert_eq!(15, line_to_char_idx(text, 6));
463 assert_eq!(23, line_to_char_idx(text, 7));
464 assert_eq!(37, line_to_char_idx(text, 8));
465 assert_eq!(37, line_to_char_idx(text, 9));
466 }
467 }
468}