1use super::{Regex, Region, SearchOptions};
2use std::iter::FusedIterator;
3
4impl Regex {
5 pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
9 let mut region = Region::new();
10 self.search_with_options(
11 text,
12 0,
13 text.len(),
14 SearchOptions::SEARCH_OPTION_NONE,
15 Some(&mut region),
16 )
17 .map(|pos| Captures {
18 text,
19 region,
20 offset: pos,
21 })
22 }
23
24 pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
47 FindMatches {
48 regex: self,
49 region: Region::new(),
50 text,
51 last_end: 0,
52 last_match_end: None,
53 }
54 }
55
56 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> FindCaptures<'r, 't> {
81 FindCaptures {
82 regex: self,
83 text,
84 last_end: 0,
85 last_match_end: None,
86 }
87 }
88
89 pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
109 RegexSplits {
110 finder: self.find_iter(text),
111 last: 0,
112 }
113 }
114
115 pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: usize) -> RegexSplitsN<'r, 't> {
138 RegexSplitsN {
139 splits: self.split(text),
140 n: limit,
141 }
142 }
143
144 pub fn scan_with_region<F>(
147 &self,
148 to_search: &str,
149 region: &mut Region,
150 options: SearchOptions,
151 mut callback: F,
152 ) -> i32
153 where
154 F: Fn(i32, i32, &Region) -> bool,
155 {
156 use onig_sys::{onig_scan, OnigRegion};
157 use std::os::raw::{c_int, c_void};
158
159 let start = to_search.as_ptr();
161 let end = to_search[to_search.len()..].as_ptr();
162
163 unsafe extern "C" fn scan_cb<F>(
164 i: c_int,
165 j: c_int,
166 r: *mut OnigRegion,
167 ud: *mut c_void,
168 ) -> c_int
169 where
170 F: Fn(i32, i32, &Region) -> bool,
171 {
172 let region = Region::clone_from_raw(r);
173 let callback = &*(ud as *mut F);
174 if callback(i, j, ®ion) {
175 0
176 } else {
177 -1
178 }
179 }
180
181 unsafe {
182 onig_scan(
183 self.raw,
184 start,
185 end,
186 (&mut region.raw) as *mut ::onig_sys::OnigRegion,
187 options.bits(),
188 Some(scan_cb::<F>),
189 &mut callback as *mut F as *mut c_void,
190 )
191 }
192 }
193
194 pub fn scan<'t, CB>(&self, to_search: &'t str, callback: CB)
199 where
200 CB: Fn(i32, Captures<'t>) -> bool,
201 {
202 let mut region = Region::new();
203 self.scan_with_region(
204 to_search,
205 &mut region,
206 SearchOptions::SEARCH_OPTION_NONE,
207 |n, s, region| {
208 let captures = Captures {
209 text: to_search,
210 region: region.clone(),
211 offset: s as usize,
212 };
213 callback(n, captures)
214 },
215 );
216 }
217}
218
219#[derive(Debug)]
227pub struct Captures<'t> {
228 text: &'t str,
229 region: Region,
230 offset: usize,
231}
232
233impl<'t> Captures<'t> {
234 pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
239 self.region.pos(pos)
240 }
241
242 pub fn at(&self, pos: usize) -> Option<&'t str> {
245 self.pos(pos).map(|(beg, end)| &self.text[beg..end])
246 }
247
248 pub fn len(&self) -> usize {
250 self.region.len()
251 }
252
253 pub fn is_empty(&self) -> bool {
255 self.len() == 0
256 }
257
258 pub fn iter(&'t self) -> SubCaptures<'t> {
261 SubCaptures { idx: 0, caps: self }
262 }
263
264 pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
268 SubCapturesPos { idx: 0, caps: self }
269 }
270
271 pub fn offset(&self) -> usize {
273 self.offset
274 }
275}
276
277pub struct SubCaptures<'t> {
282 idx: usize,
283 caps: &'t Captures<'t>,
284}
285
286impl<'t> Iterator for SubCaptures<'t> {
287 type Item = Option<&'t str>;
288
289 fn next(&mut self) -> Option<Option<&'t str>> {
290 if self.idx < self.caps.len() {
291 self.idx += 1;
292 Some(self.caps.at(self.idx - 1))
293 } else {
294 None
295 }
296 }
297
298 fn size_hint(&self) -> (usize, Option<usize>) {
299 let size = self.caps.len();
300 (size, Some(size))
301 }
302
303 fn count(self) -> usize {
304 self.caps.len()
305 }
306}
307
308impl<'t> FusedIterator for SubCaptures<'t> {}
309
310impl<'t> ExactSizeIterator for SubCaptures<'t> {}
311
312pub struct SubCapturesPos<'t> {
318 idx: usize,
319 caps: &'t Captures<'t>,
320}
321
322impl<'t> Iterator for SubCapturesPos<'t> {
323 type Item = Option<(usize, usize)>;
324
325 fn next(&mut self) -> Option<Option<(usize, usize)>> {
326 if self.idx < self.caps.len() {
327 self.idx += 1;
328 Some(self.caps.pos(self.idx - 1))
329 } else {
330 None
331 }
332 }
333
334 fn size_hint(&self) -> (usize, Option<usize>) {
335 let size = self.caps.len();
336 (size, Some(size))
337 }
338
339 fn count(self) -> usize {
340 self.caps.len()
341 }
342}
343
344impl<'t> FusedIterator for SubCapturesPos<'t> {}
345
346impl<'t> ExactSizeIterator for SubCapturesPos<'t> {}
347
348pub struct FindMatches<'r, 't> {
357 regex: &'r Regex,
358 region: Region,
359 text: &'t str,
360 last_end: usize,
361 last_match_end: Option<usize>,
362}
363
364impl<'r, 't> Iterator for FindMatches<'r, 't> {
365 type Item = (usize, usize);
366
367 fn next(&mut self) -> Option<(usize, usize)> {
368 if self.last_end > self.text.len() {
369 return None;
370 }
371 self.region.clear();
372 self.regex.search_with_options(
373 self.text,
374 self.last_end,
375 self.text.len(),
376 SearchOptions::SEARCH_OPTION_NONE,
377 Some(&mut self.region),
378 )?;
379 let (s, e) = self.region.pos(0).unwrap();
380
381 if e == s && self.last_match_end.map_or(false, |l| l == e) {
384 self.last_end += self.text[self.last_end..]
385 .chars()
386 .next()
387 .map(|c| c.len_utf8())
388 .unwrap_or(1);
389 return self.next();
390 } else {
391 self.last_end = e;
392 self.last_match_end = Some(e);
393 }
394
395 Some((s, e))
396 }
397}
398
399impl<'r, 't> FusedIterator for FindMatches<'r, 't> {}
400
401pub struct FindCaptures<'r, 't> {
409 regex: &'r Regex,
410 text: &'t str,
411 last_end: usize,
412 last_match_end: Option<usize>,
413}
414
415impl<'r, 't> Iterator for FindCaptures<'r, 't> {
416 type Item = Captures<'t>;
417
418 fn next(&mut self) -> Option<Captures<'t>> {
419 if self.last_end > self.text.len() {
420 return None;
421 }
422
423 let mut region = Region::new();
424 let r = self.regex.search_with_options(
425 self.text,
426 self.last_end,
427 self.text.len(),
428 SearchOptions::SEARCH_OPTION_NONE,
429 Some(&mut region),
430 )?;
431 let (s, e) = region.pos(0).unwrap();
432
433 if e == s && self.last_match_end.map_or(false, |l| l == e) {
436 self.last_end += self.text[self.last_end..]
437 .chars()
438 .next()
439 .map(|c| c.len_utf8())
440 .unwrap_or(1);
441 return self.next();
442 } else {
443 self.last_end = e;
444 self.last_match_end = Some(e);
445 }
446 Some(Captures {
447 text: self.text,
448 region,
449 offset: r,
450 })
451 }
452}
453
454impl<'r, 't> FusedIterator for FindCaptures<'r, 't> {}
455
456pub struct RegexSplits<'r, 't> {
461 finder: FindMatches<'r, 't>,
462 last: usize,
463}
464
465impl<'r, 't> Iterator for RegexSplits<'r, 't> {
466 type Item = &'t str;
467
468 fn next(&mut self) -> Option<&'t str> {
469 let text = self.finder.text;
470 match self.finder.next() {
471 None => {
472 if self.last >= text.len() {
473 None
474 } else {
475 let s = &text[self.last..];
476 self.last = text.len();
477 Some(s)
478 }
479 }
480 Some((s, e)) => {
481 let matched = &text[self.last..s];
482 self.last = e;
483 Some(matched)
484 }
485 }
486 }
487}
488
489impl<'r, 't> FusedIterator for RegexSplits<'r, 't> {}
490
491pub struct RegexSplitsN<'r, 't> {
498 splits: RegexSplits<'r, 't>,
499 n: usize,
500}
501
502impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
503 type Item = &'t str;
504
505 fn next(&mut self) -> Option<&'t str> {
506 if self.n == 0 {
507 return None;
508 }
509 self.n -= 1;
510 if self.n == 0 {
511 let text = self.splits.finder.text;
512 Some(&text[self.splits.last..])
513 } else {
514 self.splits.next()
515 }
516 }
517
518 fn size_hint(&self) -> (usize, Option<usize>) {
519 (0, Some(self.n))
520 }
521}
522
523impl<'r, 't> FusedIterator for RegexSplitsN<'r, 't> {}
524
525#[cfg(test)]
526mod tests {
527 use super::super::*;
528
529 #[test]
530 fn test_regex_captures() {
531 let regex = Regex::new("e(l+)|(r+)").unwrap();
532 let captures = regex.captures("hello").unwrap();
533 assert_eq!(captures.len(), 3);
534 assert_eq!(captures.is_empty(), false);
535 let pos1 = captures.pos(0).unwrap();
536 let pos2 = captures.pos(1).unwrap();
537 let pos3 = captures.pos(2);
538 assert_eq!(pos1, (1, 4));
539 assert_eq!(pos2, (2, 4));
540 assert_eq!(pos3, None);
541 let str1 = captures.at(0).unwrap();
542 let str2 = captures.at(1).unwrap();
543 let str3 = captures.at(2);
544 assert_eq!(str1, "ell");
545 assert_eq!(str2, "ll");
546 assert_eq!(str3, None);
547 }
548
549 #[test]
550 fn test_regex_subcaptures() {
551 let regex = Regex::new("e(l+)").unwrap();
552 let captures = regex.captures("hello").unwrap();
553 let caps = captures.iter().collect::<Vec<_>>();
554 assert_eq!(caps[0], Some("ell"));
555 assert_eq!(caps[1], Some("ll"));
556 assert_eq!(caps.len(), 2);
557 }
558
559 #[test]
560 fn test_regex_subcapturespos() {
561 let regex = Regex::new("e(l+)").unwrap();
562 let captures = regex.captures("hello").unwrap();
563 let caps = captures.iter_pos().collect::<Vec<_>>();
564 assert_eq!(caps[0], Some((1, 4)));
565 assert_eq!(caps[1], Some((2, 4)));
566 assert_eq!(caps.len(), 2);
567 }
568
569 #[test]
570 fn test_find_iter() {
571 let re = Regex::new(r"\d+").unwrap();
572 let ms = re.find_iter("a12b2").collect::<Vec<_>>();
573 assert_eq!(ms, vec![(1, 3), (4, 5)]);
574 }
575
576 #[test]
577 fn test_find_iter_one_zero_length() {
578 let re = Regex::new(r"\d*").unwrap();
579 let ms = re.find_iter("a1b2").collect::<Vec<_>>();
580 assert_eq!(ms, vec![(0, 0), (1, 2), (3, 4)]);
581 }
582
583 #[test]
584 fn test_find_iter_many_zero_length() {
585 let re = Regex::new(r"\d*").unwrap();
586 let ms = re.find_iter("a1bbb2").collect::<Vec<_>>();
587 assert_eq!(ms, vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)]);
588 }
589
590 #[test]
591 fn test_find_iter_empty_after_match() {
592 let re = Regex::new(r"b|(?=,)").unwrap();
593 let ms = re.find_iter("ba,").collect::<Vec<_>>();
594 assert_eq!(ms, vec![(0, 1), (2, 2)]);
595 }
596
597 #[test]
598 fn test_zero_length_matches_jumps_past_match_location() {
599 let re = Regex::new(r"\b").unwrap();
600 let matches = re.find_iter("test string").collect::<Vec<_>>();
601 assert_eq!(matches, [(0, 0), (4, 4), (5, 5), (11, 11)]);
602 }
603
604 #[test]
605 fn test_captures_iter() {
606 let re = Regex::new(r"\d+").unwrap();
607 let ms = re.captures_iter("a12b2").collect::<Vec<_>>();
608 assert_eq!(ms[0].pos(0).unwrap(), (1, 3));
609 assert_eq!(ms[1].pos(0).unwrap(), (4, 5));
610 }
611
612 #[test]
613 fn test_captures_stores_match_offset() {
614 let reg = Regex::new(r"\d+\.(\d+)").unwrap();
615 let captures = reg.captures("100 - 3.1415 / 2.0").unwrap();
616 assert_eq!(6, captures.offset());
617 let all_caps = reg
618 .captures_iter("1 - 3234.3 * 123.2 - 100")
619 .map(|cap| cap.offset())
620 .collect::<Vec<_>>();
621 assert_eq!(vec![4, 13], all_caps);
622 }
623}