1use crate::byte_chunk::{ByteChunk, Chunk};
4
5#[inline]
9pub fn count(text: &str) -> usize {
10 count_impl::<Chunk>(text.as_bytes())
11}
12
13#[inline]
22pub fn from_byte_idx(text: &str, byte_idx: usize) -> usize {
23 let bytes = text.as_bytes();
24
25 let mut i = byte_idx;
28 while Some(true) == bytes.get(i).map(is_trailing_byte) {
29 i -= 1;
30 }
31
32 count_impl::<Chunk>(&bytes[0..i.min(bytes.len())])
33}
34
35#[inline]
41pub fn to_byte_idx(text: &str, char_idx: usize) -> usize {
42 to_byte_idx_impl::<Chunk>(text.as_bytes(), char_idx)
43}
44
45#[inline(always)]
48fn to_byte_idx_impl<T: ByteChunk>(text: &[u8], char_idx: usize) -> usize {
49 if text.len() <= T::SIZE {
50 let mut char_count = 0;
53 for (i, byte) in text.iter().enumerate() {
54 char_count += is_leading_byte(byte) as usize;
55 if char_count > char_idx {
56 return i;
57 }
58 }
59 return text.len();
60 }
61 let (start, middle, _) = unsafe { text.align_to::<T>() };
66
67 let mut byte_count = 0;
68 let mut char_count = 0;
69
70 for byte in start.iter() {
72 char_count += is_leading_byte(byte) as usize;
73 if char_count > char_idx {
74 return byte_count;
75 }
76 byte_count += 1;
77 }
78
79 let fast_path_chunks = middle.len().min((char_idx - char_count) / T::SIZE);
82 let bytes = T::SIZE * 4;
83 for chunks in middle[..fast_path_chunks].chunks_exact(4) {
84 let val1 = count_trailing_chunk(chunks[0]);
85 let val2 = count_trailing_chunk(chunks[1]);
86 let val3 = count_trailing_chunk(chunks[2]);
87 let val4 = count_trailing_chunk(chunks[3]);
88 char_count += bytes - val1.add(val2).add(val3.add(val4)).sum_bytes();
89 byte_count += bytes;
90 }
91
92 for chunk in middle[(fast_path_chunks - fast_path_chunks % 4)..].iter() {
94 let new_char_count = char_count + T::SIZE - count_trailing_chunk(*chunk).sum_bytes();
95 if new_char_count >= char_idx {
96 break;
97 }
98 char_count = new_char_count;
99 byte_count += T::SIZE;
100 }
101
102 let end = &text[byte_count..];
104 for byte in end.iter() {
105 char_count += is_leading_byte(byte) as usize;
106 if char_count > char_idx {
107 break;
108 }
109 byte_count += 1;
110 }
111
112 byte_count
113}
114
115#[inline(always)]
116pub(crate) fn count_impl<T: ByteChunk>(text: &[u8]) -> usize {
117 if text.len() < T::SIZE {
118 return text.iter().map(|x| is_leading_byte(x) as usize).sum();
121 }
122 let (start, middle, end) = unsafe { text.align_to::<T>() };
124
125 let mut inv_count = 0;
126
127 inv_count += start.iter().filter(|x| is_trailing_byte(x)).count();
129
130 for chunks in middle.chunks_exact(4) {
132 let val1 = count_trailing_chunk(chunks[0]);
133 let val2 = count_trailing_chunk(chunks[1]);
134 let val3 = count_trailing_chunk(chunks[2]);
135 let val4 = count_trailing_chunk(chunks[3]);
136 inv_count += val1.add(val2).add(val3.add(val4)).sum_bytes();
137 }
138 let mut acc = T::zero();
139 for chunk in middle.chunks_exact(4).remainder() {
140 acc = acc.add(count_trailing_chunk(*chunk));
141 }
142 inv_count += acc.sum_bytes();
143
144 inv_count += end.iter().filter(|x| is_trailing_byte(x)).count();
146
147 text.len() - inv_count
148}
149
150#[inline(always)]
151fn is_leading_byte(byte: &u8) -> bool {
152 (byte & 0xC0) != 0x80
153}
154
155#[inline(always)]
156fn is_trailing_byte(byte: &u8) -> bool {
157 (byte & 0xC0) == 0x80
158}
159
160#[inline(always)]
161fn count_trailing_chunk<T: ByteChunk>(val: T) -> T {
162 val.bitand(T::splat(0xc0)).cmp_eq_byte(0x80)
163}
164
165#[cfg(test)]
168mod tests {
169 use super::*;
170
171 const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
173 a fine day, isn't it?\nAren't you glad \
174 we're alive?\nこんにちは、みんなさん!";
175
176 #[test]
177 fn count_01() {
178 let text = "Hello せかい! Hello せかい! Hello せかい! Hello せかい! Hello せかい!";
179
180 assert_eq!(54, count(text));
181 }
182
183 #[test]
184 fn count_02() {
185 assert_eq!(100, count(TEXT_LINES));
186 }
187
188 #[test]
189 fn from_byte_idx_01() {
190 let text = "Hello せかい!";
191 assert_eq!(0, from_byte_idx(text, 0));
192 assert_eq!(1, from_byte_idx(text, 1));
193 assert_eq!(6, from_byte_idx(text, 6));
194 assert_eq!(6, from_byte_idx(text, 7));
195 assert_eq!(6, from_byte_idx(text, 8));
196 assert_eq!(7, from_byte_idx(text, 9));
197 assert_eq!(7, from_byte_idx(text, 10));
198 assert_eq!(7, from_byte_idx(text, 11));
199 assert_eq!(8, from_byte_idx(text, 12));
200 assert_eq!(8, from_byte_idx(text, 13));
201 assert_eq!(8, from_byte_idx(text, 14));
202 assert_eq!(9, from_byte_idx(text, 15));
203 assert_eq!(10, from_byte_idx(text, 16));
204 assert_eq!(10, from_byte_idx(text, 17));
205 assert_eq!(10, from_byte_idx(text, 18));
206 assert_eq!(10, from_byte_idx(text, 19));
207 }
208
209 #[test]
210 fn from_byte_idx_02() {
211 let text = "";
212 assert_eq!(0, from_byte_idx(text, 0));
213 assert_eq!(0, from_byte_idx(text, 1));
214
215 let text = "h";
216 assert_eq!(0, from_byte_idx(text, 0));
217 assert_eq!(1, from_byte_idx(text, 1));
218 assert_eq!(1, from_byte_idx(text, 2));
219
220 let text = "hi";
221 assert_eq!(0, from_byte_idx(text, 0));
222 assert_eq!(1, from_byte_idx(text, 1));
223 assert_eq!(2, from_byte_idx(text, 2));
224 assert_eq!(2, from_byte_idx(text, 3));
225 }
226
227 #[test]
228 fn from_byte_idx_03() {
229 let text = "せかい";
230 assert_eq!(0, from_byte_idx(text, 0));
231 assert_eq!(0, from_byte_idx(text, 1));
232 assert_eq!(0, from_byte_idx(text, 2));
233 assert_eq!(1, from_byte_idx(text, 3));
234 assert_eq!(1, from_byte_idx(text, 4));
235 assert_eq!(1, from_byte_idx(text, 5));
236 assert_eq!(2, from_byte_idx(text, 6));
237 assert_eq!(2, from_byte_idx(text, 7));
238 assert_eq!(2, from_byte_idx(text, 8));
239 assert_eq!(3, from_byte_idx(text, 9));
240 assert_eq!(3, from_byte_idx(text, 10));
241 assert_eq!(3, from_byte_idx(text, 11));
242 assert_eq!(3, from_byte_idx(text, 12));
243 }
244
245 #[test]
246 fn from_byte_idx_04() {
247 for i in 0..88 {
249 assert_eq!(i, from_byte_idx(TEXT_LINES, i));
250 }
251
252 for i in 88..125 {
254 assert_eq!(88 + ((i - 88) / 3), from_byte_idx(TEXT_LINES, i));
255 }
256
257 for i in 125..130 {
259 assert_eq!(100, from_byte_idx(TEXT_LINES, i));
260 }
261 }
262
263 #[test]
264 fn to_byte_idx_01() {
265 let text = "Hello せかい!";
266 assert_eq!(0, to_byte_idx(text, 0));
267 assert_eq!(1, to_byte_idx(text, 1));
268 assert_eq!(2, to_byte_idx(text, 2));
269 assert_eq!(5, to_byte_idx(text, 5));
270 assert_eq!(6, to_byte_idx(text, 6));
271 assert_eq!(12, to_byte_idx(text, 8));
272 assert_eq!(15, to_byte_idx(text, 9));
273 assert_eq!(16, to_byte_idx(text, 10));
274 }
275
276 #[test]
277 fn to_byte_idx_02() {
278 let text = "せかい";
279 assert_eq!(0, to_byte_idx(text, 0));
280 assert_eq!(3, to_byte_idx(text, 1));
281 assert_eq!(6, to_byte_idx(text, 2));
282 assert_eq!(9, to_byte_idx(text, 3));
283 }
284
285 #[test]
286 fn to_byte_idx_03() {
287 let text = "Hello world!";
288 assert_eq!(0, to_byte_idx(text, 0));
289 assert_eq!(1, to_byte_idx(text, 1));
290 assert_eq!(8, to_byte_idx(text, 8));
291 assert_eq!(11, to_byte_idx(text, 11));
292 assert_eq!(12, to_byte_idx(text, 12));
293 }
294
295 #[test]
296 fn to_byte_idx_04() {
297 let text = "Hello world! Hello せかい! Hello world! Hello せかい! \
298 Hello world! Hello せかい! Hello world! Hello せかい! \
299 Hello world! Hello せかい! Hello world! Hello せかい! \
300 Hello world! Hello せかい! Hello world! Hello せかい!";
301 assert_eq!(0, to_byte_idx(text, 0));
302 assert_eq!(30, to_byte_idx(text, 24));
303 assert_eq!(60, to_byte_idx(text, 48));
304 assert_eq!(90, to_byte_idx(text, 72));
305 assert_eq!(115, to_byte_idx(text, 93));
306 assert_eq!(120, to_byte_idx(text, 96));
307 assert_eq!(150, to_byte_idx(text, 120));
308 assert_eq!(180, to_byte_idx(text, 144));
309 assert_eq!(210, to_byte_idx(text, 168));
310 assert_eq!(239, to_byte_idx(text, 191));
311 }
312
313 #[test]
314 fn to_byte_idx_05() {
315 for i in 0..88 {
317 assert_eq!(i, to_byte_idx(TEXT_LINES, i));
318 }
319
320 for i in 88..100 {
322 assert_eq!(88 + ((i - 88) * 3), to_byte_idx(TEXT_LINES, i));
323 }
324
325 for i in 100..110 {
327 assert_eq!(124, to_byte_idx(TEXT_LINES, i));
328 }
329 }
330}