pest_meta/optimizer/
mod.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoÈ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Different optimizations for pest's ASTs.
11
12use crate::ast::*;
13use std::collections::HashMap;
14
15#[cfg(test)]
16macro_rules! box_tree {
17    ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18      $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19    );
20    ($expr:expr) => ($expr);
21}
22
23mod concatenator;
24mod factorizer;
25mod lister;
26mod restorer;
27mod rotater;
28mod skipper;
29mod unroller;
30
31/// Takes pest's ASTs and optimizes them
32pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33    let map = to_hash_map(&rules);
34    let optimized: Vec<OptimizedRule> = rules
35        .into_iter()
36        .map(rotater::rotate)
37        .map(|rule| skipper::skip(rule, &map))
38        .map(unroller::unroll)
39        .map(concatenator::concatenate)
40        .map(factorizer::factor)
41        .map(lister::list)
42        .map(rule_to_optimized_rule)
43        .collect();
44
45    let optimized_map = to_optimized_hash_map(&optimized);
46    optimized
47        .into_iter()
48        .map(|rule| restorer::restore_on_err(rule, &optimized_map))
49        .collect()
50}
51
52fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
53    fn to_optimized(expr: Expr) -> OptimizedExpr {
54        match expr {
55            Expr::Str(string) => OptimizedExpr::Str(string),
56            Expr::Insens(string) => OptimizedExpr::Insens(string),
57            Expr::Range(start, end) => OptimizedExpr::Range(start, end),
58            Expr::Ident(ident) => OptimizedExpr::Ident(ident),
59            Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
60            Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
61            Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
62            Expr::Seq(lhs, rhs) => {
63                OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
64            }
65            Expr::Choice(lhs, rhs) => {
66                OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
67            }
68            Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
69            Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
70            Expr::Skip(strings) => OptimizedExpr::Skip(strings),
71            Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
72            #[cfg(feature = "grammar-extras")]
73            Expr::PushLiteral(string) => OptimizedExpr::PushLiteral(string),
74            #[cfg(feature = "grammar-extras")]
75            Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
76            #[cfg(feature = "grammar-extras")]
77            Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
78            #[cfg(not(feature = "grammar-extras"))]
79            Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
80            Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
81                unreachable!("No valid transformation to OptimizedRule")
82            }
83        }
84    }
85
86    OptimizedRule {
87        name: rule.name,
88        ty: rule.ty,
89        expr: to_optimized(rule.expr),
90    }
91}
92
93macro_rules! to_hash_map {
94    ($func_name:ident, $rule:ty, $expr:ty) => {
95        fn $func_name(rules: &[$rule]) -> HashMap<String, $expr> {
96            rules
97                .iter()
98                .map(|r| (r.name.clone(), r.expr.clone()))
99                .collect()
100        }
101    };
102}
103to_hash_map!(to_hash_map, Rule, Expr);
104to_hash_map!(to_optimized_hash_map, OptimizedRule, OptimizedExpr);
105
106/// The optimized version of the pest AST's `Rule`.
107#[derive(Clone, Debug, Eq, PartialEq)]
108pub struct OptimizedRule {
109    /// The name of the rule.
110    pub name: String,
111    /// The type of the rule.
112    pub ty: RuleType,
113    /// The optimized expression of the rule.
114    pub expr: OptimizedExpr,
115}
116
117/// The optimized version of the pest AST's `Expr`.
118///
119/// # Warning: Semantic Versioning
120/// There may be non-breaking changes to the meta-grammar
121/// between minor versions. Those non-breaking changes, however,
122/// may translate into semver-breaking changes due to the additional variants
123/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
124/// future (e.g. by increasing MSRV and non_exhaustive annotations).
125#[derive(Clone, Debug, Eq, PartialEq)]
126pub enum OptimizedExpr {
127    /// Matches an exact string, e.g. `"a"`
128    Str(String),
129    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
130    Insens(String),
131    /// Matches one character in the range, e.g. `'a'..'z'`
132    Range(String, String),
133    /// Matches the rule with the given name, e.g. `a`
134    Ident(String),
135    /// Matches a custom part of the stack, e.g. `PEEK[..]`
136    PeekSlice(i32, Option<i32>),
137    /// Positive lookahead; matches expression without making progress, e.g. `&e`
138    PosPred(Box<OptimizedExpr>),
139    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
140    NegPred(Box<OptimizedExpr>),
141    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
142    Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
143    /// Matches either of two expressions, e.g. `e1 | e2`
144    Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
145    /// Optionally matches an expression, e.g. `e?`
146    Opt(Box<OptimizedExpr>),
147    /// Matches an expression zero or more times, e.g. `e*`
148    Rep(Box<OptimizedExpr>),
149    /// Matches an expression one or more times, e.g. `e+`
150    #[cfg(feature = "grammar-extras")]
151    RepOnce(Box<OptimizedExpr>),
152    /// Continues to match expressions until one of the strings in the `Vec` is found
153    Skip(Vec<String>),
154    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
155    Push(Box<OptimizedExpr>),
156    /// Pushes a literal string to the stack, e.g. `push_literal("a")`
157    #[cfg(feature = "grammar-extras")]
158    PushLiteral(String),
159    /// Matches an expression and assigns a label to it, e.g. #label = exp
160    #[cfg(feature = "grammar-extras")]
161    NodeTag(Box<OptimizedExpr>, String),
162    /// Restores an expression's checkpoint
163    RestoreOnErr(Box<OptimizedExpr>),
164}
165
166impl OptimizedExpr {
167    /// Returns a top-down iterator over the `OptimizedExpr`.
168    pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
169        OptimizedExprTopDownIterator::new(self)
170    }
171
172    /// Applies `f` to the `OptimizedExpr` top-down.
173    pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
174    where
175        F: FnMut(OptimizedExpr) -> OptimizedExpr,
176    {
177        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
178        where
179            F: FnMut(OptimizedExpr) -> OptimizedExpr,
180        {
181            let expr = f(expr);
182
183            match expr {
184                OptimizedExpr::PosPred(expr) => {
185                    let mapped = Box::new(map_internal(*expr, f));
186                    OptimizedExpr::PosPred(mapped)
187                }
188                OptimizedExpr::NegPred(expr) => {
189                    let mapped = Box::new(map_internal(*expr, f));
190                    OptimizedExpr::NegPred(mapped)
191                }
192                OptimizedExpr::Seq(lhs, rhs) => {
193                    let mapped_lhs = Box::new(map_internal(*lhs, f));
194                    let mapped_rhs = Box::new(map_internal(*rhs, f));
195                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
196                }
197                OptimizedExpr::Choice(lhs, rhs) => {
198                    let mapped_lhs = Box::new(map_internal(*lhs, f));
199                    let mapped_rhs = Box::new(map_internal(*rhs, f));
200                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
201                }
202                OptimizedExpr::Rep(expr) => {
203                    let mapped = Box::new(map_internal(*expr, f));
204                    OptimizedExpr::Rep(mapped)
205                }
206                OptimizedExpr::Opt(expr) => {
207                    let mapped = Box::new(map_internal(*expr, f));
208                    OptimizedExpr::Opt(mapped)
209                }
210                OptimizedExpr::Push(expr) => {
211                    let mapped = Box::new(map_internal(*expr, f));
212                    OptimizedExpr::Push(mapped)
213                }
214                expr => expr,
215            }
216        }
217
218        map_internal(self, &mut f)
219    }
220
221    /// Applies `f` to the `OptimizedExpr` bottom-up.
222    pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
223    where
224        F: FnMut(OptimizedExpr) -> OptimizedExpr,
225    {
226        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
227        where
228            F: FnMut(OptimizedExpr) -> OptimizedExpr,
229        {
230            let mapped = match expr {
231                OptimizedExpr::PosPred(expr) => {
232                    let mapped = Box::new(map_internal(*expr, f));
233                    OptimizedExpr::PosPred(mapped)
234                }
235                OptimizedExpr::NegPred(expr) => {
236                    let mapped = Box::new(map_internal(*expr, f));
237                    OptimizedExpr::NegPred(mapped)
238                }
239                OptimizedExpr::Seq(lhs, rhs) => {
240                    let mapped_lhs = Box::new(map_internal(*lhs, f));
241                    let mapped_rhs = Box::new(map_internal(*rhs, f));
242                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
243                }
244                OptimizedExpr::Choice(lhs, rhs) => {
245                    let mapped_lhs = Box::new(map_internal(*lhs, f));
246                    let mapped_rhs = Box::new(map_internal(*rhs, f));
247                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
248                }
249                OptimizedExpr::Rep(expr) => {
250                    let mapped = Box::new(map_internal(*expr, f));
251                    OptimizedExpr::Rep(mapped)
252                }
253                OptimizedExpr::Opt(expr) => {
254                    let mapped = Box::new(map_internal(*expr, f));
255                    OptimizedExpr::Opt(mapped)
256                }
257                OptimizedExpr::Push(expr) => {
258                    let mapped = Box::new(map_internal(*expr, f));
259                    OptimizedExpr::Push(mapped)
260                }
261                expr => expr,
262            };
263
264            f(mapped)
265        }
266
267        map_internal(self, &mut f)
268    }
269}
270
271impl core::fmt::Display for OptimizedExpr {
272    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
273        match self {
274            OptimizedExpr::Str(s) => write!(f, "{:?}", s),
275            OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
276            OptimizedExpr::Range(start, end) => {
277                let start = start.chars().next().expect("Empty range start.");
278                let end = end.chars().next().expect("Empty range end.");
279                write!(f, "({:?}..{:?})", start, end)
280            }
281            OptimizedExpr::Ident(id) => write!(f, "{}", id),
282            OptimizedExpr::PeekSlice(start, end) => match end {
283                Some(end) => write!(f, "PEEK[{}..{}]", start, end),
284                None => write!(f, "PEEK[{}..]", start),
285            },
286            OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
287            OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
288            OptimizedExpr::Seq(lhs, rhs) => {
289                let mut nodes = Vec::new();
290                nodes.push(lhs);
291                let mut current = rhs;
292                while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
293                    nodes.push(lhs);
294                    current = rhs;
295                }
296                nodes.push(current);
297                let sequence = nodes
298                    .iter()
299                    .map(|node| format!("{}", node))
300                    .collect::<Vec<_>>()
301                    .join(" ~ ");
302                write!(f, "({})", sequence)
303            }
304            OptimizedExpr::Choice(lhs, rhs) => {
305                let mut nodes = Vec::new();
306                nodes.push(lhs);
307                let mut current = rhs;
308                while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
309                    nodes.push(lhs);
310                    current = rhs;
311                }
312                nodes.push(current);
313                let sequence = nodes
314                    .iter()
315                    .map(|node| format!("{}", node))
316                    .collect::<Vec<_>>()
317                    .join(" | ");
318                write!(f, "({})", sequence)
319            }
320            OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
321            OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
322            #[cfg(feature = "grammar-extras")]
323            OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
324            OptimizedExpr::Skip(strings) => {
325                let strings = strings
326                    .iter()
327                    .map(|s| format!("{:?}", s))
328                    .collect::<Vec<_>>()
329                    .join(" | ");
330                write!(f, "(!({}) ~ ANY)*", strings)
331            }
332            OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
333            #[cfg(feature = "grammar-extras")]
334            OptimizedExpr::PushLiteral(s) => write!(f, "PUSH_LITERAL({:?})", s),
335            #[cfg(feature = "grammar-extras")]
336            OptimizedExpr::NodeTag(expr, tag) => {
337                write!(f, "(#{} = {})", tag, expr)
338            }
339            OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
340        }
341    }
342}
343
344/// A top-down iterator over an `OptimizedExpr`.
345pub struct OptimizedExprTopDownIterator {
346    current: Option<OptimizedExpr>,
347    next: Option<OptimizedExpr>,
348    right_branches: Vec<OptimizedExpr>,
349}
350
351impl OptimizedExprTopDownIterator {
352    /// Creates a new top down iterator from an `OptimizedExpr`.
353    pub fn new(expr: &OptimizedExpr) -> Self {
354        let mut iter = OptimizedExprTopDownIterator {
355            current: None,
356            next: None,
357            right_branches: vec![],
358        };
359        iter.iterate_expr(expr.clone());
360        iter
361    }
362
363    fn iterate_expr(&mut self, expr: OptimizedExpr) {
364        self.current = Some(expr.clone());
365        match expr {
366            OptimizedExpr::Seq(lhs, rhs) => {
367                self.right_branches.push(*rhs);
368                self.next = Some(*lhs);
369            }
370            OptimizedExpr::Choice(lhs, rhs) => {
371                self.right_branches.push(*rhs);
372                self.next = Some(*lhs);
373            }
374            OptimizedExpr::PosPred(expr)
375            | OptimizedExpr::NegPred(expr)
376            | OptimizedExpr::Rep(expr)
377            | OptimizedExpr::Opt(expr)
378            | OptimizedExpr::Push(expr) => {
379                self.next = Some(*expr);
380            }
381            _ => {
382                self.next = None;
383            }
384        }
385    }
386}
387
388impl Iterator for OptimizedExprTopDownIterator {
389    type Item = OptimizedExpr;
390
391    fn next(&mut self) -> Option<Self::Item> {
392        let result = self.current.take();
393
394        if let Some(expr) = self.next.take() {
395            self.iterate_expr(expr);
396        } else if let Some(expr) = self.right_branches.pop() {
397            self.iterate_expr(expr);
398        }
399
400        result
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn rotate() {
410        let rules = {
411            use crate::ast::Expr::*;
412            vec![Rule {
413                name: "rule".to_owned(),
414                ty: RuleType::Normal,
415                expr: box_tree!(Choice(
416                    Choice(
417                        Choice(Str(String::from("a")), Str(String::from("b"))),
418                        Str(String::from("c"))
419                    ),
420                    Str(String::from("d"))
421                )),
422            }]
423        };
424        let rotated = {
425            use crate::optimizer::OptimizedExpr::*;
426            vec![OptimizedRule {
427                name: "rule".to_owned(),
428                ty: RuleType::Normal,
429                expr: box_tree!(Choice(
430                    Str(String::from("a")),
431                    Choice(
432                        Str(String::from("b")),
433                        Choice(Str(String::from("c")), Str(String::from("d")))
434                    )
435                )),
436            }]
437        };
438
439        assert_eq!(optimize(rules), rotated);
440    }
441
442    #[test]
443    fn skip() {
444        let rules = {
445            use crate::ast::Expr::*;
446            vec![Rule {
447                name: "rule".to_owned(),
448                ty: RuleType::Atomic,
449                expr: box_tree!(Rep(Seq(
450                    NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
451                    Ident(String::from("ANY"))
452                ))),
453            }]
454        };
455        let skipped = vec![OptimizedRule {
456            name: "rule".to_owned(),
457            ty: RuleType::Atomic,
458            expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
459        }];
460
461        assert_eq!(optimize(rules), skipped);
462    }
463
464    #[test]
465    fn concat_strings() {
466        let rules = {
467            use crate::ast::Expr::*;
468            vec![Rule {
469                name: "rule".to_owned(),
470                ty: RuleType::Atomic,
471                expr: box_tree!(Seq(
472                    Seq(Str(String::from("a")), Str(String::from("b"))),
473                    Seq(Str(String::from("c")), Str(String::from("d")))
474                )),
475            }]
476        };
477        let concatenated = vec![OptimizedRule {
478            name: "rule".to_owned(),
479            ty: RuleType::Atomic,
480            expr: OptimizedExpr::Str(String::from("abcd")),
481        }];
482
483        assert_eq!(optimize(rules), concatenated);
484    }
485
486    #[test]
487    fn unroll_loop_exact() {
488        let rules = vec![Rule {
489            name: "rule".to_owned(),
490            ty: RuleType::Atomic,
491            expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
492        }];
493        let unrolled = {
494            use crate::optimizer::OptimizedExpr::*;
495            vec![OptimizedRule {
496                name: "rule".to_owned(),
497                ty: RuleType::Atomic,
498                expr: box_tree!(Seq(
499                    Ident(String::from("a")),
500                    Seq(Ident(String::from("a")), Ident(String::from("a")))
501                )),
502            }]
503        };
504
505        assert_eq!(optimize(rules), unrolled);
506    }
507
508    #[test]
509    fn unroll_loop_max() {
510        let rules = vec![Rule {
511            name: "rule".to_owned(),
512            ty: RuleType::Atomic,
513            expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
514        }];
515        let unrolled = {
516            use crate::optimizer::OptimizedExpr::*;
517            vec![OptimizedRule {
518                name: "rule".to_owned(),
519                ty: RuleType::Atomic,
520                expr: box_tree!(Seq(
521                    Opt(Str(String::from("a"))),
522                    Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
523                )),
524            }]
525        };
526
527        assert_eq!(optimize(rules), unrolled);
528    }
529
530    #[test]
531    fn unroll_loop_min() {
532        let rules = vec![Rule {
533            name: "rule".to_owned(),
534            ty: RuleType::Atomic,
535            expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
536        }];
537        let unrolled = {
538            use crate::optimizer::OptimizedExpr::*;
539            vec![OptimizedRule {
540                name: "rule".to_owned(),
541                ty: RuleType::Atomic,
542                expr: box_tree!(Seq(
543                    Str(String::from("a")),
544                    Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
545                )),
546            }]
547        };
548
549        assert_eq!(optimize(rules), unrolled);
550    }
551
552    #[test]
553    fn unroll_loop_min_max() {
554        let rules = vec![Rule {
555            name: "rule".to_owned(),
556            ty: RuleType::Atomic,
557            expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
558        }];
559        let unrolled = {
560            use crate::optimizer::OptimizedExpr::*;
561            vec![OptimizedRule {
562                name: "rule".to_owned(),
563                ty: RuleType::Atomic,
564                /* TODO possible room for improvement here:
565                 * if the sequences were rolled out in the opposite
566                 * order, we could further optimize the strings
567                 * in cases like this.
568                Str(String::from(("aa")),
569                Opt(Str(String::from("a")))
570                */
571                expr: box_tree!(Seq(
572                    Str(String::from("a")),
573                    Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
574                )),
575            }]
576        };
577
578        assert_eq!(optimize(rules), unrolled);
579    }
580
581    #[test]
582    fn concat_insensitive_strings() {
583        let rules = {
584            use crate::ast::Expr::*;
585            vec![Rule {
586                name: "rule".to_owned(),
587                ty: RuleType::Atomic,
588                expr: box_tree!(Seq(
589                    Seq(Insens(String::from("a")), Insens(String::from("b"))),
590                    Seq(Insens(String::from("c")), Insens(String::from("d")))
591                )),
592            }]
593        };
594        let concatenated = vec![OptimizedRule {
595            name: "rule".to_owned(),
596            ty: RuleType::Atomic,
597            expr: OptimizedExpr::Insens(String::from("abcd")),
598        }];
599
600        assert_eq!(optimize(rules), concatenated);
601    }
602
603    #[test]
604    fn long_common_sequence() {
605        let rules = {
606            use crate::ast::Expr::*;
607            vec![Rule {
608                name: "rule".to_owned(),
609                ty: RuleType::Silent,
610                expr: box_tree!(Choice(
611                    Seq(
612                        Ident(String::from("a")),
613                        Seq(Ident(String::from("b")), Ident(String::from("c")))
614                    ),
615                    Seq(
616                        Seq(Ident(String::from("a")), Ident(String::from("b"))),
617                        Ident(String::from("d"))
618                    )
619                )),
620            }]
621        };
622        let optimized = {
623            use crate::optimizer::OptimizedExpr::*;
624            vec![OptimizedRule {
625                name: "rule".to_owned(),
626                ty: RuleType::Silent,
627                expr: box_tree!(Seq(
628                    Ident(String::from("a")),
629                    Seq(
630                        Ident(String::from("b")),
631                        Choice(Ident(String::from("c")), Ident(String::from("d")))
632                    )
633                )),
634            }]
635        };
636
637        assert_eq!(optimize(rules), optimized);
638    }
639
640    #[test]
641    fn short_common_sequence() {
642        let rules = {
643            use crate::ast::Expr::*;
644            vec![Rule {
645                name: "rule".to_owned(),
646                ty: RuleType::Atomic,
647                expr: box_tree!(Choice(
648                    Seq(Ident(String::from("a")), Ident(String::from("b"))),
649                    Ident(String::from("a"))
650                )),
651            }]
652        };
653        let optimized = {
654            use crate::optimizer::OptimizedExpr::*;
655            vec![OptimizedRule {
656                name: "rule".to_owned(),
657                ty: RuleType::Atomic,
658                expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
659            }]
660        };
661
662        assert_eq!(optimize(rules), optimized);
663    }
664
665    #[test]
666    fn impossible_common_sequence() {
667        let rules = {
668            use crate::ast::Expr::*;
669            vec![Rule {
670                name: "rule".to_owned(),
671                ty: RuleType::Silent,
672                expr: box_tree!(Choice(
673                    Ident(String::from("a")),
674                    Seq(Ident(String::from("a")), Ident(String::from("b")))
675                )),
676            }]
677        };
678        let optimized = {
679            use crate::optimizer::OptimizedExpr::*;
680            vec![OptimizedRule {
681                name: "rule".to_owned(),
682                ty: RuleType::Silent,
683                expr: box_tree!(Ident(String::from("a"))),
684            }]
685        };
686
687        assert_eq!(optimize(rules), optimized);
688    }
689
690    #[test]
691    fn lister() {
692        let rules = {
693            use crate::ast::Expr::*;
694            vec![Rule {
695                name: "rule".to_owned(),
696                ty: RuleType::Silent,
697                expr: box_tree!(Seq(
698                    Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
699                    Ident(String::from("a"))
700                )),
701            }]
702        };
703        let optimized = {
704            use crate::optimizer::OptimizedExpr::*;
705            vec![OptimizedRule {
706                name: "rule".to_owned(),
707                ty: RuleType::Silent,
708                expr: box_tree!(Seq(
709                    Ident(String::from("a")),
710                    Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
711                )),
712            }]
713        };
714
715        assert_eq!(optimize(rules), optimized);
716    }
717
718    mod display {
719        use super::super::*;
720        /// In previous implementation of Display for OptimizedExpr
721        /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
722        /// Str("\n") will be displayed as
723        /// "
724        /// "
725        ///
726        /// It will not break the compilation in normal use.
727        ///
728        /// But when I use it in automatically generating documents,
729        /// it will quite confusing and we'll be unable to distinguish \n and \r.
730        ///
731        /// And `cargo expand` will emit codes that can't be compiled,
732        /// for it expand `#[doc("...")]` to `/// ...`,
733        /// and when the document comment breaks the line,
734        /// it will be expanded into wrong codes.
735        #[test]
736        fn control_character() {
737            assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
738            assert_eq!(
739                OptimizedExpr::Insens("\n".to_owned()).to_string(),
740                "^\"\\n\"",
741            );
742            assert_eq!(
743                OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
744                "('\\n'..'\\r')",
745            );
746            assert_eq!(
747                OptimizedExpr::Skip(vec![
748                    "\n".to_owned(),
749                    "\r".to_owned(),
750                    "\n\r".to_owned(),
751                    "\0".to_owned(),
752                ])
753                .to_string(),
754                r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
755            );
756
757            assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
758        }
759
760        #[test]
761        fn str() {
762            assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
763        }
764
765        #[test]
766        fn insens() {
767            assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
768        }
769
770        #[test]
771        fn range() {
772            assert_eq!(
773                OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
774                r#"('a'..'z')"#,
775            );
776        }
777
778        #[test]
779        fn ident() {
780            assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
781        }
782
783        #[test]
784        fn peek_slice() {
785            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
786            assert_eq!(
787                OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
788                "PEEK[0..-1]",
789            );
790            assert_eq!(
791                OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
792                "PEEK[2..3]",
793            );
794            assert_eq!(
795                OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
796                "PEEK[2..-1]",
797            );
798            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
799        }
800
801        #[test]
802        fn pos_pred() {
803            assert_eq!(
804                OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
805                    OptimizedExpr::Ident("a".to_owned()),
806                ))))
807                .to_string(),
808                "&!a",
809            );
810            assert_eq!(
811                OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
812                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
813                        "a".to_owned(),
814                    )))),
815                    Box::new(OptimizedExpr::Str("a".to_owned())),
816                )))
817                .to_string(),
818                r#"&(a* | "a")"#,
819            );
820            assert_eq!(
821                OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
822                    OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
823                ))))
824                .to_string(),
825                "&!a",
826            );
827        }
828
829        #[test]
830        fn neg_pred() {
831            assert_eq!(
832                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
833                r#"!e"#,
834            );
835            assert_eq!(
836                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
837                    Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
838                        "a".to_owned(),
839                    )))),
840                    Box::new(OptimizedExpr::Str("a".to_owned())),
841                )))
842                .to_string(),
843                r#"!(PUSH(a) | "a")"#,
844            );
845        }
846
847        #[test]
848        fn seq() {
849            assert_eq!(
850                OptimizedExpr::Seq(
851                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
852                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
853                )
854                .to_string(),
855                r#"(e1 ~ e2)"#,
856            );
857            assert_eq!(
858                Expr::Seq(
859                    Box::new(Expr::Ident("e1".to_owned())),
860                    Box::new(Expr::Seq(
861                        Box::new(Expr::Ident("e2".to_owned())),
862                        Box::new(Expr::Ident("e3".to_owned())),
863                    )),
864                )
865                .to_string(),
866                "(e1 ~ e2 ~ e3)",
867            );
868            assert_eq!(
869                Expr::Seq(
870                    Box::new(Expr::Ident("e1".to_owned())),
871                    Box::new(Expr::Seq(
872                        Box::new(Expr::Ident("e2".to_owned())),
873                        Box::new(Expr::Seq(
874                            Box::new(Expr::Ident("e3".to_owned())),
875                            Box::new(Expr::Ident("e4".to_owned())),
876                        )),
877                    )),
878                )
879                .to_string(),
880                "(e1 ~ e2 ~ e3 ~ e4)",
881            );
882            assert_eq!(
883                Expr::Seq(
884                    Box::new(Expr::Ident("e1".to_owned())),
885                    Box::new(Expr::Choice(
886                        Box::new(Expr::Ident("e2".to_owned())),
887                        Box::new(Expr::Seq(
888                            Box::new(Expr::Ident("e3".to_owned())),
889                            Box::new(Expr::Ident("e4".to_owned())),
890                        )),
891                    )),
892                )
893                .to_string(),
894                "(e1 ~ (e2 | (e3 ~ e4)))",
895            );
896            assert_eq!(
897                Expr::Seq(
898                    Box::new(Expr::Ident("e1".to_owned())),
899                    Box::new(Expr::Seq(
900                        Box::new(Expr::Ident("e2".to_owned())),
901                        Box::new(Expr::Choice(
902                            Box::new(Expr::Ident("e3".to_owned())),
903                            Box::new(Expr::Ident("e4".to_owned())),
904                        )),
905                    )),
906                )
907                .to_string(),
908                "(e1 ~ e2 ~ (e3 | e4))",
909            );
910            assert_eq!(
911                OptimizedExpr::Seq(
912                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
913                        "a".to_owned(),
914                    )))),
915                    Box::new(OptimizedExpr::Seq(
916                        Box::new(OptimizedExpr::Ident("b".to_owned())),
917                        Box::new(OptimizedExpr::Insens("c".to_owned())),
918                    )),
919                )
920                .to_string(),
921                r#"("a"* ~ b ~ ^"c")"#,
922            );
923            assert_eq!(
924                OptimizedExpr::Seq(
925                    Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
926                        "a".to_owned(),
927                        "z".to_owned(),
928                    )))),
929                    Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
930                        Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
931                    )))),
932                )
933                .to_string(),
934                "(&('a'..'z') ~ !('A'..'Z')?)",
935            );
936        }
937
938        #[test]
939        fn choice() {
940            assert_eq!(
941                OptimizedExpr::Choice(
942                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
943                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
944                )
945                .to_string(),
946                r#"(e1 | e2)"#,
947            );
948            assert_eq!(
949                Expr::Choice(
950                    Box::new(Expr::Ident("e1".to_owned())),
951                    Box::new(Expr::Choice(
952                        Box::new(Expr::Ident("e2".to_owned())),
953                        Box::new(Expr::Ident("e3".to_owned())),
954                    )),
955                )
956                .to_string(),
957                "(e1 | e2 | e3)",
958            );
959            assert_eq!(
960                Expr::Choice(
961                    Box::new(Expr::Ident("e1".to_owned())),
962                    Box::new(Expr::Choice(
963                        Box::new(Expr::Ident("e2".to_owned())),
964                        Box::new(Expr::Choice(
965                            Box::new(Expr::Ident("e3".to_owned())),
966                            Box::new(Expr::Ident("e4".to_owned())),
967                        )),
968                    )),
969                )
970                .to_string(),
971                "(e1 | e2 | e3 | e4)",
972            );
973            assert_eq!(
974                Expr::Choice(
975                    Box::new(Expr::Ident("e1".to_owned())),
976                    Box::new(Expr::Seq(
977                        Box::new(Expr::Ident("e2".to_owned())),
978                        Box::new(Expr::Choice(
979                            Box::new(Expr::Ident("e3".to_owned())),
980                            Box::new(Expr::Ident("e4".to_owned())),
981                        )),
982                    )),
983                )
984                .to_string(),
985                "(e1 | (e2 ~ (e3 | e4)))",
986            );
987            assert_eq!(
988                OptimizedExpr::Choice(
989                    Box::new(OptimizedExpr::Str("a".to_owned())),
990                    Box::new(OptimizedExpr::Choice(
991                        Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
992                            "b".to_owned(),
993                        )))),
994                        Box::new(OptimizedExpr::Insens("c".to_owned())),
995                    )),
996                )
997                .to_string(),
998                r#"("a" | PUSH(b) | ^"c")"#,
999            );
1000        }
1001
1002        #[test]
1003        fn opt() {
1004            assert_eq!(
1005                OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1006                "e?",
1007            );
1008        }
1009
1010        #[test]
1011        fn rep() {
1012            assert_eq!(
1013                OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1014                "x*",
1015            );
1016            assert_eq!(
1017                OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1018                    "0".to_owned(),
1019                    "9".to_owned(),
1020                )))
1021                .to_string(),
1022                "('0'..'9')*",
1023            );
1024        }
1025
1026        #[test]
1027        #[cfg(feature = "grammar-extras")]
1028        fn rep_once() {
1029            assert_eq!(
1030                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1031                "e+",
1032            );
1033            assert_eq!(
1034                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1035                    "0".to_owned(),
1036                    "9".to_owned(),
1037                )))
1038                .to_string(),
1039                "('0'..'9')+",
1040            );
1041        }
1042
1043        #[test]
1044        fn skip() {
1045            assert_eq!(
1046                OptimizedExpr::Skip(
1047                    ["a", "bc"]
1048                        .into_iter()
1049                        .map(|s| s.to_owned())
1050                        .collect::<Vec<_>>(),
1051                )
1052                .to_string(),
1053                r#"(!("a" | "bc") ~ ANY)*"#,
1054            );
1055        }
1056
1057        #[test]
1058        fn inline_skip() {
1059            use crate::ast::Expr::*;
1060            let rules = vec![
1061                Rule {
1062                    name: "inline".to_owned(),
1063                    ty: RuleType::Atomic,
1064                    expr: Str("a".to_owned()),
1065                },
1066                Rule {
1067                    name: "skip".to_owned(),
1068                    ty: RuleType::Atomic,
1069                    expr: box_tree!(Rep(Seq(
1070                        NegPred(Choice(
1071                            Ident(String::from("inline")),
1072                            Str(String::from("b"))
1073                        )),
1074                        Ident("ANY".to_owned())
1075                    ))),
1076                },
1077            ];
1078            let map = to_hash_map(&rules);
1079            let rule = skipper::skip(rules[1].clone(), &map);
1080            assert!(matches!(rule, Rule { expr: Skip(..), .. }));
1081            let choices = match rule.expr {
1082                Skip(choices) => choices,
1083                _ => unreachable!(),
1084            };
1085            assert_eq!(choices, vec!["a".to_owned(), "b".to_owned()]);
1086        }
1087
1088        #[test]
1089        fn push() {
1090            assert_eq!(
1091                OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1092                "PUSH(e)",
1093            );
1094        }
1095
1096        #[test]
1097        #[cfg(feature = "grammar-extras")]
1098        fn push_literal() {
1099            assert_eq!(
1100                OptimizedExpr::PushLiteral("a \" b".to_owned()).to_string(),
1101                r#"PUSH_LITERAL("a \" b")"#,
1102            );
1103        }
1104
1105        #[test]
1106        #[cfg(feature = "grammar-extras")]
1107        fn node_tag() {
1108            assert_eq!(
1109                OptimizedExpr::NodeTag(
1110                    Box::new(OptimizedExpr::Ident("expr".to_owned())),
1111                    "label".to_owned(),
1112                )
1113                .to_string(),
1114                r#"(#label = expr)"#,
1115            );
1116            assert_eq!(
1117                OptimizedExpr::NodeTag(
1118                    Box::new(OptimizedExpr::Ident("x".to_owned())),
1119                    "X".to_owned(),
1120                )
1121                .to_string(),
1122                r#"(#X = x)"#,
1123            );
1124            assert_eq!(
1125                OptimizedExpr::NodeTag(
1126                    Box::new(OptimizedExpr::Seq(
1127                        Box::new(OptimizedExpr::Ident("x".to_owned())),
1128                        Box::new(OptimizedExpr::Str("y".to_owned())),
1129                    )),
1130                    "X".to_owned(),
1131                )
1132                .to_string(),
1133                r#"(#X = (x ~ "y"))"#,
1134            );
1135        }
1136
1137        #[test]
1138        fn restore_on_err() {
1139            assert_eq!(
1140                OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1141                    .to_string(),
1142                "e",
1143            );
1144        }
1145    }
1146}