1use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA};
30use std::{fmt, io, marker::PhantomData, str::FromStr};
31
32pub use regex_automata::Error;
33
34#[derive(Debug, Clone)]
39pub struct Pattern<S = usize, A = DenseDFA<Vec<S>, S>>
40where
41 S: StateID,
42 A: DFA<ID = S>,
43{
44 automaton: A,
45}
46
47#[derive(Debug, Clone)]
51pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>>
52where
53 S: StateID,
54 A: DFA<ID = S>,
55{
56 automaton: A,
57 state: S,
58 _lt: PhantomData<&'a ()>,
59}
60
61impl Pattern {
64 pub fn new(pattern: &str) -> Result<Self, Error> {
90 let automaton = DenseDFA::new(pattern)?;
91 Ok(Pattern { automaton })
92 }
93
94 pub fn new_anchored(pattern: &str) -> Result<Self, Error> {
124 let automaton = dense::Builder::new().anchored(true).build(pattern)?;
125 Ok(Pattern { automaton })
126 }
127}
128
129impl FromStr for Pattern {
130 type Err = Error;
131 fn from_str(s: &str) -> Result<Self, Self::Err> {
132 Self::new(s)
133 }
134}
135
136impl<S, A> Pattern<S, A>
137where
138 S: StateID,
139 A: DFA<ID = S>,
140 Self: for<'a> ToMatcher<'a, S>,
141{
142 #[inline]
144 pub fn matches(&self, s: &impl AsRef<str>) -> bool {
145 self.matcher().matches(s)
146 }
147
148 #[inline]
172 pub fn debug_matches(&self, d: &impl fmt::Debug) -> bool {
173 self.matcher().debug_matches(d)
174 }
175
176 #[inline]
208 pub fn display_matches(&self, d: &impl fmt::Display) -> bool {
209 self.matcher().display_matches(d)
210 }
211
212 #[inline]
216 pub fn read_matches(&self, io: impl io::Read) -> io::Result<bool> {
217 self.matcher().read_matches(io)
218 }
219}
220
221impl<'a, S, A> Matcher<'a, S, A>
224where
225 S: StateID,
226 A: DFA<ID = S>,
227{
228 fn new(automaton: A) -> Self {
229 let state = automaton.start_state();
230 Self {
231 automaton,
232 state,
233 _lt: PhantomData,
234 }
235 }
236
237 #[inline]
238 fn advance(&mut self, input: u8) {
239 self.state = unsafe {
240 self.automaton.next_state_unchecked(self.state, input)
244 };
245 }
246
247 #[inline]
250 pub fn is_matched(&self) -> bool {
251 self.automaton.is_match_state(self.state)
252 }
253
254 pub fn matches(mut self, s: &impl AsRef<str>) -> bool {
257 for &byte in s.as_ref().as_bytes() {
258 self.advance(byte);
259 if self.automaton.is_dead_state(self.state) {
260 return false;
261 }
262 }
263 self.is_matched()
264 }
265
266 pub fn debug_matches(mut self, d: &impl fmt::Debug) -> bool {
269 use std::fmt::Write;
270 write!(&mut self, "{:?}", d).expect("matcher write impl should not fail");
271 self.is_matched()
272 }
273
274 pub fn display_matches(mut self, d: &impl fmt::Display) -> bool {
277 use std::fmt::Write;
278 write!(&mut self, "{}", d).expect("matcher write impl should not fail");
279 self.is_matched()
280 }
281
282 pub fn read_matches(mut self, io: impl io::Read + Sized) -> io::Result<bool> {
286 for r in io.bytes() {
287 self.advance(r?);
288 if self.automaton.is_dead_state(self.state) {
289 return Ok(false);
290 }
291 }
292 Ok(self.is_matched())
293 }
294}
295
296impl<'a, S, A> fmt::Write for Matcher<'a, S, A>
297where
298 S: StateID,
299 A: DFA<ID = S>,
300{
301 fn write_str(&mut self, s: &str) -> fmt::Result {
302 for &byte in s.as_bytes() {
303 self.advance(byte);
304 if self.automaton.is_dead_state(self.state) {
305 break;
306 }
307 }
308 Ok(())
309 }
310}
311
312impl<'a, S, A> io::Write for Matcher<'a, S, A>
313where
314 S: StateID,
315 A: DFA<ID = S>,
316{
317 fn write(&mut self, bytes: &[u8]) -> Result<usize, io::Error> {
318 let mut i = 0;
319 for &byte in bytes {
320 self.advance(byte);
321 i += 1;
322 if self.automaton.is_dead_state(self.state) {
323 break;
324 }
325 }
326 Ok(i)
327 }
328
329 fn flush(&mut self) -> Result<(), io::Error> {
330 Ok(())
331 }
332}
333
334pub trait ToMatcher<'a, S>
335where
336 Self: crate::sealed::Sealed,
337 S: StateID + 'a,
338{
339 type Automaton: DFA<ID = S>;
340 fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>;
341}
342
343impl<S> crate::sealed::Sealed for Pattern<S, DenseDFA<Vec<S>, S>> where S: StateID {}
344
345impl<'a, S> ToMatcher<'a, S> for Pattern<S, DenseDFA<Vec<S>, S>>
346where
347 S: StateID + 'a,
348{
349 type Automaton = DenseDFA<&'a [S], S>;
350 fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
351 Matcher::new(self.automaton.as_ref())
352 }
353}
354
355impl<'a, S> ToMatcher<'a, S> for Pattern<S, SparseDFA<Vec<u8>, S>>
356where
357 S: StateID + 'a,
358{
359 type Automaton = SparseDFA<&'a [u8], S>;
360 fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> {
361 Matcher::new(self.automaton.as_ref())
362 }
363}
364
365impl<S> crate::sealed::Sealed for Pattern<S, SparseDFA<Vec<u8>, S>> where S: StateID {}
366
367mod sealed {
368 pub trait Sealed {}
369}
370
371#[cfg(test)]
372mod test {
373 use super::*;
374
375 struct Str<'a>(&'a str);
376 struct ReadStr<'a>(io::Cursor<&'a [u8]>);
377
378 impl<'a> fmt::Debug for Str<'a> {
379 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
380 write!(f, "{}", self.0)
381 }
382 }
383
384 impl<'a> fmt::Display for Str<'a> {
385 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
386 write!(f, "{}", self.0)
387 }
388 }
389
390 impl<'a> io::Read for ReadStr<'a> {
391 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
392 self.0.read(buf)
393 }
394 }
395
396 impl Str<'static> {
397 fn hello_world() -> Self {
398 Self::new("hello world")
399 }
400 }
401
402 impl<'a> Str<'a> {
403 fn new(s: &'a str) -> Self {
404 Str(s)
405 }
406
407 fn to_reader(self) -> ReadStr<'a> {
408 ReadStr(io::Cursor::new(self.0.as_bytes()))
409 }
410 }
411
412 fn test_debug_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
413 let pat = new_pattern("hello world").unwrap();
414 assert!(pat.debug_matches(&Str::hello_world()));
415
416 let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
417 assert!(pat.debug_matches(&Str::hello_world()));
418
419 let pat = new_pattern("goodbye world").unwrap();
420 assert_eq!(pat.debug_matches(&Str::hello_world()), false);
421 }
422
423 fn test_display_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
424 let pat = new_pattern("hello world").unwrap();
425 assert!(pat.display_matches(&Str::hello_world()));
426
427 let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
428 assert!(pat.display_matches(&Str::hello_world()));
429
430 let pat = new_pattern("goodbye world").unwrap();
431 assert_eq!(pat.display_matches(&Str::hello_world()), false);
432 }
433
434 fn test_reader_matches(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
435 let pat = new_pattern("hello world").unwrap();
436 assert!(pat
437 .read_matches(Str::hello_world().to_reader())
438 .expect("no io error should occur"));
439
440 let pat = new_pattern("hel+o w[orl]{3}d").unwrap();
441 assert!(pat
442 .read_matches(Str::hello_world().to_reader())
443 .expect("no io error should occur"));
444
445 let pat = new_pattern("goodbye world").unwrap();
446 assert_eq!(
447 pat.read_matches(Str::hello_world().to_reader())
448 .expect("no io error should occur"),
449 false
450 );
451 }
452
453 fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result<Pattern, Error>) {
454 let pat = new_pattern("a+b").unwrap();
455 assert!(pat.debug_matches(&Str::new("ab")));
456 assert!(pat.debug_matches(&Str::new("aaaab")));
457 assert!(pat.debug_matches(&Str::new("aaaaaaaaaab")));
458 assert_eq!(pat.debug_matches(&Str::new("b")), false);
459 assert_eq!(pat.debug_matches(&Str::new("abb")), false);
460 assert_eq!(pat.debug_matches(&Str::new("aaaaabb")), false);
461 }
462
463 mod anchored {
464 use super::*;
465 #[test]
466 fn debug_matches() {
467 test_debug_matches(Pattern::new_anchored)
468 }
469
470 #[test]
471 fn display_matches() {
472 test_display_matches(Pattern::new_anchored)
473 }
474
475 #[test]
476 fn reader_matches() {
477 test_reader_matches(Pattern::new_anchored)
478 }
479
480 #[test]
481 fn debug_rep_patterns() {
482 test_debug_rep_patterns(Pattern::new_anchored)
483 }
484
485 fn test_is_anchored(f: impl Fn(&Pattern, Str) -> bool) {
489 let pat = Pattern::new_anchored("a+b").unwrap();
490 assert!(f(&pat, Str::new("ab")));
491 assert!(f(&pat, Str::new("aaaab")));
492 assert!(f(&pat, Str::new("aaaaaaaaaab")));
493 assert!(!f(&pat, Str::new("bab")));
494 assert!(!f(&pat, Str::new("ffab")));
495 assert!(!f(&pat, Str::new("qqqqqqqaaaaab")));
496 }
497
498 #[test]
499 fn debug_is_anchored() {
500 test_is_anchored(|pat, input| pat.debug_matches(&input))
501 }
502
503 #[test]
504 fn display_is_anchored() {
505 test_is_anchored(|pat, input| pat.display_matches(&input));
506 }
507
508 #[test]
509 fn reader_is_anchored() {
510 test_is_anchored(|pat, input| {
511 pat.read_matches(input.to_reader())
512 .expect("no io error occurs")
513 });
514 }
515
516 fn test_explicitly_unanchored(f: impl Fn(&Pattern, Str) -> bool) {
520 let pat = Pattern::new_anchored(".*?a+b").unwrap();
521 assert!(f(&pat, Str::new("ab")));
522 assert!(f(&pat, Str::new("aaaab")));
523 assert!(f(&pat, Str::new("aaaaaaaaaab")));
524 assert!(f(&pat, Str::new("bab")));
525 assert!(f(&pat, Str::new("ffab")));
526 assert!(f(&pat, Str::new("qqqqqqqaaaaab")));
527 }
528
529 #[test]
530 fn debug_explicitly_unanchored() {
531 test_explicitly_unanchored(|pat, input| pat.debug_matches(&input))
532 }
533
534 #[test]
535 fn display_explicitly_unanchored() {
536 test_explicitly_unanchored(|pat, input| pat.display_matches(&input));
537 }
538
539 #[test]
540 fn reader_explicitly_unanchored() {
541 test_explicitly_unanchored(|pat, input| {
542 pat.read_matches(input.to_reader())
543 .expect("no io error occurs")
544 });
545 }
546 }
547
548 mod unanchored {
549 use super::*;
550 #[test]
551 fn debug_matches() {
552 test_debug_matches(Pattern::new)
553 }
554
555 #[test]
556 fn display_matches() {
557 test_display_matches(Pattern::new)
558 }
559
560 #[test]
561 fn reader_matches() {
562 test_reader_matches(Pattern::new)
563 }
564
565 #[test]
566 fn debug_rep_patterns() {
567 test_debug_rep_patterns(Pattern::new)
568 }
569
570 fn test_is_unanchored(f: impl Fn(&Pattern, Str) -> bool) {
573 let pat = Pattern::new("a+b").unwrap();
574 assert!(f(&pat, Str::new("ab")));
575 assert!(f(&pat, Str::new("aaaab")));
576 assert!(f(&pat, Str::new("aaaaaaaaaab")));
577 assert!(f(&pat, Str::new("bab")));
578 assert!(f(&pat, Str::new("ffab")));
579 assert!(f(&pat, Str::new("qqqfqqqqaaaaab")));
580 }
581
582 #[test]
583 fn debug_is_unanchored() {
584 test_is_unanchored(|pat, input| pat.debug_matches(&input))
585 }
586
587 #[test]
588 fn display_is_unanchored() {
589 test_is_unanchored(|pat, input| pat.display_matches(&input));
590 }
591
592 #[test]
593 fn reader_is_unanchored() {
594 test_is_unanchored(|pat, input| {
595 pat.read_matches(input.to_reader())
596 .expect("no io error occurs")
597 });
598 }
599 }
600}