1#[inline]
6pub fn is_break(byte_idx: usize, text: &[u8]) -> bool {
7 debug_assert!(byte_idx <= text.len());
8
9 if byte_idx == 0 || byte_idx == text.len() {
10 true
11 } else {
12 (text[byte_idx] >> 6 != 0b10) && ((text[byte_idx - 1] != 0x0D) | (text[byte_idx] != 0x0A))
13 }
14}
15
16#[inline]
21pub fn seam_is_break(left: &[u8], right: &[u8]) -> bool {
22 debug_assert!(!left.is_empty() && !right.is_empty());
23 (right[0] >> 6 != 0b10) && ((left[left.len() - 1] != 0x0D) | (right[0] != 0x0A))
24}
25
26#[inline]
32pub fn prev_break(byte_idx: usize, text: &[u8]) -> usize {
33 debug_assert!(byte_idx <= text.len());
35
36 if byte_idx == 0 {
37 0
38 } else {
39 let mut boundary_idx = byte_idx - 1;
40 while !is_break(boundary_idx, text) {
41 boundary_idx -= 1;
42 }
43 boundary_idx
44 }
45}
46
47#[inline]
53pub fn next_break(byte_idx: usize, text: &[u8]) -> usize {
54 debug_assert!(byte_idx <= text.len());
56
57 if byte_idx == text.len() {
58 text.len()
59 } else {
60 let mut boundary_idx = byte_idx + 1;
61 while !is_break(boundary_idx, text) {
62 boundary_idx += 1;
63 }
64 boundary_idx
65 }
66}
67
68#[inline]
75pub fn nearest_internal_break(byte_idx: usize, text: &[u8]) -> usize {
76 debug_assert!(byte_idx <= text.len());
78
79 let left = if is_break(byte_idx, text) && byte_idx != text.len() {
81 byte_idx
82 } else {
83 prev_break(byte_idx, text)
84 };
85 let right = next_break(byte_idx, text);
86
87 if left == 0 || (right != text.len() && (byte_idx - left) >= (right - byte_idx)) {
90 return right;
91 } else {
92 return left;
93 }
94}
95
96#[inline]
97pub fn find_good_split(byte_idx: usize, text: &[u8], bias_left: bool) -> usize {
98 debug_assert!(byte_idx <= text.len());
100
101 if is_break(byte_idx, text) {
102 byte_idx
103 } else {
104 let prev = prev_break(byte_idx, text);
105 let next = next_break(byte_idx, text);
106 if bias_left {
107 if prev > 0 {
108 prev
109 } else {
110 next
111 }
112 } else {
113 #[allow(clippy::collapsible_if)] if next < text.len() {
115 next
116 } else {
117 prev
118 }
119 }
120 }
121}
122
123#[cfg(test)]
126mod tests {
127 use super::*;
128
129 #[test]
130 fn crlf_segmenter_01() {
131 let text = b"Hello world!\r\nHow's it going?";
132
133 assert!(is_break(0, b""));
134 assert!(is_break(0, text));
135 assert!(is_break(12, text));
136 assert!(!is_break(13, text));
137 assert!(is_break(14, text));
138 assert!(is_break(19, text));
139 }
140
141 #[test]
142 fn crlf_segmenter_02() {
143 let l = b"Hello world!\r";
144 let r = b"\nHow's it going?";
145
146 assert!(!seam_is_break(l, r));
147 assert!(!seam_is_break(l, b"\n"));
148 assert!(!seam_is_break(b"\r", r));
149 assert!(!seam_is_break(b"\r", b"\n"));
150 assert!(seam_is_break(r, l));
151 assert!(seam_is_break(b"\n", b"\r"));
152 }
153
154 #[test]
155 fn nearest_internal_break_01() {
156 let text = b"Hello world!";
157 assert_eq!(1, nearest_internal_break(0, text));
158 assert_eq!(6, nearest_internal_break(6, text));
159 assert_eq!(11, nearest_internal_break(12, text));
160 }
161
162 #[test]
163 fn nearest_internal_break_02() {
164 let text = b"Hello\r\n world!";
165 assert_eq!(5, nearest_internal_break(5, text));
166 assert_eq!(7, nearest_internal_break(6, text));
167 assert_eq!(7, nearest_internal_break(7, text));
168 }
169
170 #[test]
171 fn nearest_internal_break_03() {
172 let text = b"\r\nHello world!\r\n";
173 assert_eq!(2, nearest_internal_break(0, text));
174 assert_eq!(2, nearest_internal_break(1, text));
175 assert_eq!(2, nearest_internal_break(2, text));
176 assert_eq!(14, nearest_internal_break(14, text));
177 assert_eq!(14, nearest_internal_break(15, text));
178 assert_eq!(14, nearest_internal_break(16, text));
179 }
180
181 #[test]
182 fn nearest_internal_break_04() {
183 let text = b"\r\n";
184 assert_eq!(2, nearest_internal_break(0, text));
185 assert_eq!(2, nearest_internal_break(1, text));
186 assert_eq!(2, nearest_internal_break(2, text));
187 }
188
189 #[test]
190 fn is_break_01() {
191 let text = b"\n\r\n\r\n\r\n\r\n\r\n\r";
192
193 assert!(is_break(0, text));
194 assert!(is_break(12, text));
195 assert!(is_break(3, text));
196 assert!(!is_break(6, text));
197 }
198
199 #[test]
200 fn seam_is_break_01() {
201 let text1 = b"\r\n\r\n\r\n";
202 let text2 = b"\r\n\r\n";
203
204 assert!(seam_is_break(text1, text2));
205 }
206
207 #[test]
208 fn seam_is_break_02() {
209 let text1 = b"\r\n\r\n\r";
210 let text2 = b"\n\r\n\r\n";
211
212 assert!(!seam_is_break(text1, text2));
213 }
214}