pest/
parser_state.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! The core functionality of parsing grammar.
11//! State of parser during the process of rules handling.
12
13use alloc::borrow::{Cow, ToOwned};
14use alloc::boxed::Box;
15use alloc::collections::BTreeSet;
16use alloc::rc::Rc;
17use alloc::string::String;
18use alloc::sync::Arc;
19use alloc::vec;
20use alloc::vec::Vec;
21use core::fmt::{Debug, Display, Formatter};
22use core::num::NonZeroUsize;
23use core::ops::Deref; // used in BorrowedOrRc.as_str
24use core::ops::Range;
25use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
26
27use crate::error::{Error, ErrorVariant};
28use crate::iterators::pairs::new;
29use crate::iterators::{pairs, QueueableToken};
30use crate::position::Position;
31use crate::span::Span;
32use crate::stack::Stack;
33use crate::RuleType;
34
35/// The current lookahead status of a [`ParserState`].
36///
37/// [`ParserState`]: struct.ParserState.html
38#[derive(Clone, Copy, Debug, Eq, PartialEq)]
39pub enum Lookahead {
40    /// The positive predicate, written as an ampersand &,
41    /// attempts to match its inner expression.
42    /// If the inner expression succeeds, parsing continues,
43    /// but at the same position as the predicate —
44    /// &foo ~ bar is thus a kind of "AND" statement:
45    /// "the input string must match foo AND bar".
46    /// If the inner expression fails,
47    /// the whole expression fails too.
48    Positive,
49    /// The negative predicate, written as an exclamation mark !,
50    /// attempts to match its inner expression.
51    /// If the inner expression fails, the predicate succeeds
52    /// and parsing continues at the same position as the predicate.
53    /// If the inner expression succeeds, the predicate fails —
54    /// !foo ~ bar is thus a kind of "NOT" statement:
55    /// "the input string must match bar but NOT foo".
56    Negative,
57    /// No lookahead (i.e. it will consume input).
58    None,
59}
60
61/// The current atomicity of a [`ParserState`].
62///
63/// [`ParserState`]: struct.ParserState.html
64#[derive(Clone, Copy, Debug, Eq, PartialEq)]
65pub enum Atomicity {
66    /// prevents implicit whitespace: inside an atomic rule,
67    /// the tilde ~ means "immediately followed by",
68    /// and repetition operators (asterisk * and plus sign +)
69    /// have no implicit separation. In addition, all other rules
70    /// called from an atomic rule are also treated as atomic.
71    /// (interior matching rules are silent)
72    Atomic,
73    /// The same as atomic, but inner tokens are produced as normal.
74    CompoundAtomic,
75    /// implicit whitespace is enabled
76    NonAtomic,
77}
78
79/// Type alias to simplify specifying the return value of chained closures.
80pub type ParseResult<S> = Result<S, S>;
81
82/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
83#[derive(Clone, Copy, Debug, Eq, PartialEq)]
84pub enum MatchDir {
85    /// from the bottom to the top of the stack
86    BottomToTop,
87    /// from the top to the bottom of the stack
88    TopToBottom,
89}
90
91static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
92
93/// Sets the maximum call limit for the parser state
94/// to prevent stack overflows or excessive execution times
95/// in some grammars.
96/// If set, the calls are tracked as a running total
97/// over all non-terminal rules that can nest closures
98/// (which are passed to transform the parser state).
99///
100/// # Arguments
101///
102/// * `limit` - The maximum number of calls. If None,
103///             the number of calls is unlimited.
104pub fn set_call_limit(limit: Option<NonZeroUsize>) {
105    CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
106}
107
108static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
109
110/// Sets whether information for more error details
111/// should be collected. This is useful for debugging
112/// parser errors (as it leads to more comprehensive
113/// error messages), but it has a higher performance cost.
114/// (hence, it's off by default)
115///
116/// # Arguments
117///
118/// * `enabled` - Whether to enable the collection for
119///               more error details.
120pub fn set_error_detail(enabled: bool) {
121    ERROR_DETAIL.store(enabled, Ordering::Relaxed);
122}
123
124#[derive(Debug)]
125struct CallLimitTracker {
126    current_call_limit: Option<(usize, usize)>,
127}
128
129impl Default for CallLimitTracker {
130    fn default() -> Self {
131        let limit = CALL_LIMIT.load(Ordering::Relaxed);
132        let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
133        Self { current_call_limit }
134    }
135}
136
137impl CallLimitTracker {
138    fn limit_reached(&self) -> bool {
139        self.current_call_limit
140            .map_or(false, |(current, limit)| current >= limit)
141    }
142
143    fn increment_depth(&mut self) {
144        if let Some((current, _)) = &mut self.current_call_limit {
145            *current += 1;
146        }
147    }
148}
149
150/// Number of call stacks that may result from a sequence of rules parsing.
151const CALL_STACK_INITIAL_CAPACITY: usize = 20;
152/// Max (un)expected number of tokens that we may see on the parsing error position.
153const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
154/// Max rule children number for which we'll extend calls stacks.
155///
156/// In case rule we're working with has too many children rules that failed in parsing,
157/// we don't want to store long stacks for all of them. If rule has more than this number
158/// of failed children, they all will be collapsed in a parent rule.
159const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
160
161/// Structure tracking errored parsing call (associated with specific `ParserState` function).
162#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
163pub enum ParseAttempt<R> {
164    /// Call of `rule` errored.
165    Rule(R),
166    /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
167    /// Works as indicator of that leaf node is not a rule. In order to get the token value we
168    /// can address `ParseAttempts` `(un)expected_tokens`.
169    Token,
170}
171
172impl<R> ParseAttempt<R> {
173    pub fn get_rule(&self) -> Option<&R> {
174        match self {
175            ParseAttempt::Rule(r) => Some(r),
176            ParseAttempt::Token => None,
177        }
178    }
179}
180
181/// Rules call stack.
182/// Contains sequence of rule calls that resulted in new parsing attempt.
183#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
184pub struct RulesCallStack<R> {
185    /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
186    pub deepest: ParseAttempt<R>,
187    /// Most top rule covering `deepest`.
188    pub parent: Option<R>,
189}
190
191impl<R> RulesCallStack<R> {
192    fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
193        RulesCallStack {
194            deepest,
195            parent: None,
196        }
197    }
198}
199
200#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
201pub enum ParsingToken {
202    Sensitive { token: String },
203    Insensitive { token: String },
204    Range { start: char, end: char },
205    BuiltInRule,
206}
207
208impl Display for ParsingToken {
209    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
210        match self {
211            ParsingToken::Sensitive { token } => write!(f, "{token}"),
212            ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
213            ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
214            ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
215        }
216    }
217}
218/// A helper that provides efficient string handling without unnecessary copying.
219/// We use `Arc<String>` instead of `Cow<'i, str>` to avoid copying strings when cloning the `Owned` variant, since
220/// `Arc::clone` only increments a reference count. [SpanOrLiteral] needs to be [Send] and [Sync]`, so we use [Arc]
221/// instead of [Rc].
222///
223/// (We need to clone this struct to detach it from the `&self` borrow in [SpanOrLiteral::as_borrowed_or_rc], so that
224/// we can then call `self.match_string` (a `mut self` method).
225#[derive(Debug, Clone)]
226enum BorrowedOrArc<'i> {
227    Borrowed(&'i str),
228    Owned(Arc<String>),
229}
230
231/// A holder for a literal string, for use in `push_literal`. This is typically a `&'static str`, but is an owned
232/// `Rc<String>` for the pest vm.
233impl<'i> BorrowedOrArc<'i> {
234    fn as_str<'a: 'i>(&'a self) -> &'a str {
235        match self {
236            BorrowedOrArc::Borrowed(s) => s,
237            BorrowedOrArc::Owned(s) => s.deref(),
238        }
239    }
240}
241
242impl From<Cow<'static, str>> for BorrowedOrArc<'_> {
243    fn from(value: Cow<'static, str>) -> Self {
244        match value {
245            Cow::Borrowed(s) => Self::Borrowed(s),
246            Cow::Owned(s) => Self::Owned(Arc::new(s)),
247        }
248    }
249}
250
251#[derive(Debug, Clone)]
252enum SpanOrLiteral<'i> {
253    Span(Span<'i>),
254    Literal(BorrowedOrArc<'i>),
255}
256
257impl<'i> SpanOrLiteral<'i> {
258    #[inline]
259    fn as_borrowed_or_rc(&self) -> BorrowedOrArc<'i> {
260        match self {
261            Self::Span(s) => BorrowedOrArc::Borrowed(s.as_str()),
262            Self::Literal(s) => s.clone(),
263        }
264    }
265}
266
267/// Structure that tracks all the parsing attempts made on the max position.
268/// We want to give an error hint about parsing rules that succeeded
269/// at the farthest input position.
270/// The intuition is such rules will be most likely the query user initially wanted to write.
271#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
272pub struct ParseAttempts<R> {
273    /// Indicates whether the parsing attempts are tracked.
274    enabled: bool,
275    /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
276    /// If there are several stacks in vec, it means all those rule stacks are "equal"
277    /// because their attempts occurred on the same position.
278    pub call_stacks: Vec<RulesCallStack<R>>,
279    /// Tokens that could be putted at `max_position`
280    /// in order to get a valid grammar query.
281    expected_tokens: Vec<ParsingToken>,
282    /// Tokens that we've prohibited to be putted at `max_position`
283    /// in order to get a valid grammar query.
284    unexpected_tokens: Vec<ParsingToken>,
285    /// Max position at which we were expecting to see one of `expected_tokens`.
286    pub max_position: usize,
287}
288
289impl<R: RuleType> ParseAttempts<R> {
290    /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
291    /// initialized with capacity.
292    pub fn new() -> Self {
293        Self {
294            call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
295            expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
296            unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
297            max_position: 0,
298            enabled: ERROR_DETAIL.load(Ordering::Relaxed),
299        }
300    }
301
302    /// Get number of currently present call stacks.
303    fn call_stacks_number(&self) -> usize {
304        self.call_stacks.len()
305    }
306
307    pub fn expected_tokens(&self) -> Vec<ParsingToken> {
308        self.expected_tokens
309            .iter()
310            .cloned()
311            .collect::<BTreeSet<_>>()
312            .into_iter()
313            .collect()
314    }
315
316    pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
317        self.unexpected_tokens
318            .iter()
319            .cloned()
320            .collect::<BTreeSet<_>>()
321            .into_iter()
322            .collect()
323    }
324
325    /// Retrieve call stacks.
326    pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
327        self.call_stacks
328            .iter()
329            .cloned()
330            .collect::<BTreeSet<_>>()
331            .into_iter()
332            .collect()
333    }
334
335    /// In case we've tried to parse a rule, which start position is bigger than previous
336    /// `max_position` it means that we've advanced in our parsing and found better candidate.
337    ///
338    /// `start_index` is:
339    /// * Number of call stacks present in state at the moment current `rule` was called. The idea
340    ///   is that we'd like to update only those stacks that originated from the current `rule` and
341    ///   not from those that were called previously.
342    /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
343    fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
344        let mut non_token_call_stacks = Vec::new();
345        let mut token_call_stack_met = false;
346        for call_stack in self.call_stacks.iter().skip(start_index) {
347            if matches!(call_stack.deepest, ParseAttempt::Token) {
348                token_call_stack_met = true;
349            } else {
350                non_token_call_stacks.push(call_stack.clone())
351            }
352        }
353        if token_call_stack_met && non_token_call_stacks.is_empty() {
354            // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
355            // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
356            // rule) as soon as it doesn't give us any useful additional info.
357            non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
358        }
359        self.call_stacks
360            .splice(start_index.., non_token_call_stacks);
361
362        let children_number_over_threshold =
363            self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
364        if children_number_over_threshold {
365            self.call_stacks.truncate(start_index);
366            self.call_stacks
367                .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
368        } else {
369            for call_stack in self.call_stacks.iter_mut().skip(start_index) {
370                if matches!(call_stack.deepest, ParseAttempt::Token) {
371                    call_stack.deepest = ParseAttempt::Rule(rule);
372                } else {
373                    call_stack.parent = Some(rule);
374                }
375            }
376        }
377    }
378
379    /// If `expected` flag is set to false, it means we've successfully parsed token being in the
380    /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
381    /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
382    ///
383    /// In case `position` is:
384    /// * Equal to `max_position`, add `token` to `target_vec`,
385    /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
386    #[allow(clippy::comparison_chain)]
387    fn try_add_new_token(
388        &mut self,
389        token: ParsingToken,
390        start_position: usize,
391        position: usize,
392        negative_lookahead: bool,
393    ) {
394        let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
395            let target_vec = if negative_lookahead {
396                &mut attempts.unexpected_tokens
397            } else {
398                &mut attempts.expected_tokens
399            };
400            target_vec.push(token);
401        };
402
403        if position > self.max_position {
404            if negative_lookahead && start_position > self.max_position {
405                // We encountered a sequence under negative lookahead.
406                // We would like to track only first failed token in this sequence (which
407                // `start_position` should be equal to `self.max_position`).
408                return;
409            }
410            target_vec_push_token(self);
411
412            if negative_lookahead {
413                // In case of successful parsing of token under negative lookahead the only
414                // thing we'd like to do is to track the token in the `unexpected_tokens`.
415                return;
416            }
417            self.max_position = position;
418            self.expected_tokens.clear();
419            self.unexpected_tokens.clear();
420            self.call_stacks.clear();
421            self.call_stacks
422                .push(RulesCallStack::new(ParseAttempt::Token));
423        } else if position == self.max_position {
424            target_vec_push_token(self);
425            self.call_stacks
426                .push(RulesCallStack::new(ParseAttempt::Token));
427        }
428    }
429
430    /// Reset state in case we've successfully parsed some token in
431    /// `match_string` or `match_insensetive`.
432    fn nullify_expected_tokens(&mut self, new_max_position: usize) {
433        self.call_stacks.clear();
434        self.expected_tokens.clear();
435        self.unexpected_tokens.clear();
436        self.max_position = new_max_position;
437    }
438}
439
440impl<R: RuleType> Default for ParseAttempts<R> {
441    fn default() -> Self {
442        Self::new()
443    }
444}
445
446/// The complete state of a [`Parser`].
447///
448/// [`Parser`]: trait.Parser.html
449#[derive(Debug)]
450pub struct ParserState<'i, R: RuleType> {
451    /// Current position from which we try to apply some parser function.
452    /// Initially is 0.
453    /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
454    /// and switched our `position` from 0 to the length of "create".
455    ///
456    /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
457    position: Position<'i>,
458    /// Queue representing rules partially (`QueueableToken::Start`) and
459    /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
460    /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
461    /// `End` state.
462    queue: Vec<QueueableToken<'i, R>>,
463    /// Status set in case specific lookahead logic is used in grammar.
464    /// See `Lookahead` for more information.
465    lookahead: Lookahead,
466    /// Rules that we HAVE expected, tried to parse, but failed.
467    pos_attempts: Vec<R>,
468    /// Rules that we have NOT expected, tried to parse, but failed.
469    neg_attempts: Vec<R>,
470    /// Max position in the query from which we've tried to parse some rule but failed.
471    attempt_pos: usize,
472    /// Current atomicity status. For more information see `Atomicity`.
473    atomicity: Atomicity,
474    /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
475    /// invocations).
476    stack: Stack<SpanOrLiteral<'i>>,
477    /// Used for setting max parser calls limit.
478    call_tracker: CallLimitTracker,
479    /// Together with tracking of `pos_attempts` and `attempt_pos`
480    /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
481    /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
482    ///
483    /// Note, that we may try to parse several rules on different positions. We want to track only
484    /// those rules, which attempt position is bigger, because we consider that it's nearer to the
485    /// query that user really wanted to pass.
486    ///
487    /// E.g. we have a query `create user "Bobby"` and two root rules:
488    /// * CreateUser  = { "create" ~ "user"  ~ Name }
489    /// * CreateTable = { "create" ~ "table" ~ Name }
490    /// * Name = { SOME_DEFINITION }
491    ///
492    /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
493    /// successfully parse "create" + "user" (and not "table").
494    parse_attempts: ParseAttempts<R>,
495}
496
497/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
498///
499/// # Examples
500///
501/// ```
502/// # use pest;
503/// let input = "";
504/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
505/// ```
506#[allow(clippy::perf)]
507pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
508where
509    F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
510{
511    let state = ParserState::new(input);
512
513    match f(state) {
514        Ok(state) => {
515            let len = state.queue.len();
516            Ok(new(Rc::new(state.queue), input, None, 0, len))
517        }
518        Err(mut state) => {
519            let variant = if state.reached_call_limit() {
520                ErrorVariant::CustomError {
521                    message: "call limit reached".to_owned(),
522                }
523            } else {
524                state.pos_attempts.sort();
525                state.pos_attempts.dedup();
526                state.neg_attempts.sort();
527                state.neg_attempts.dedup();
528                ErrorVariant::ParsingError {
529                    positives: state.pos_attempts.clone(),
530                    negatives: state.neg_attempts.clone(),
531                }
532            };
533
534            if state.parse_attempts.enabled {
535                Err(Error::new_from_pos_with_parsing_attempts(
536                    variant,
537                    Position::new_internal(input, state.attempt_pos),
538                    state.parse_attempts.clone(),
539                ))
540            } else {
541                Err(Error::new_from_pos(
542                    variant,
543                    Position::new_internal(input, state.attempt_pos),
544                ))
545            }
546        }
547    }
548}
549
550impl<'i, R: RuleType> ParserState<'i, R> {
551    /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
552    /// will be passed from closure to closure based on the needs of the specified `Parser`.
553    ///
554    /// # Examples
555    ///
556    /// ```
557    /// # use pest;
558    /// let input = "";
559    /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
560    /// ```
561    pub fn new(input: &'i str) -> Box<Self> {
562        Box::new(ParserState {
563            position: Position::from_start(input),
564            queue: vec![],
565            lookahead: Lookahead::None,
566            pos_attempts: vec![],
567            neg_attempts: vec![],
568            attempt_pos: 0,
569            atomicity: Atomicity::NonAtomic,
570            stack: Stack::new(),
571            call_tracker: Default::default(),
572            parse_attempts: ParseAttempts::new(),
573        })
574    }
575
576    /// Get all parse attempts after process of parsing is finished.
577    pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
578        &self.parse_attempts
579    }
580
581    /// Returns a reference to the current `Position` of the `ParserState`.
582    ///
583    /// # Examples
584    ///
585    /// ```
586    /// # use pest;
587    /// # #[allow(non_camel_case_types)]
588    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
589    /// enum Rule {
590    ///     ab
591    /// }
592    ///
593    /// let input = "ab";
594    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
595    /// let position = state.position();
596    /// assert_eq!(position.pos(), 0);
597    /// ```
598    pub fn position(&self) -> &Position<'i> {
599        &self.position
600    }
601
602    /// Returns the current atomicity of the `ParserState`.
603    ///
604    /// # Examples
605    ///
606    /// ```
607    /// # use pest;
608    /// # use pest::Atomicity;
609    /// # #[allow(non_camel_case_types)]
610    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
611    /// enum Rule {
612    ///     ab
613    /// }
614    ///
615    /// let input = "ab";
616    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
617    /// let atomicity = state.atomicity();
618    /// assert_eq!(atomicity, Atomicity::NonAtomic);
619    /// ```
620    pub fn atomicity(&self) -> Atomicity {
621        self.atomicity
622    }
623
624    #[inline]
625    fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
626        if self.call_tracker.limit_reached() {
627            return Err(self);
628        }
629        self.call_tracker.increment_depth();
630        Ok(self)
631    }
632
633    #[inline]
634    fn reached_call_limit(&self) -> bool {
635        self.call_tracker.limit_reached()
636    }
637
638    /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
639    /// meant to match the rule.
640    ///
641    /// # Examples
642    ///
643    /// ```
644    /// # use pest;
645    /// # #[allow(non_camel_case_types)]
646    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
647    /// enum Rule {
648    ///     a
649    /// }
650    ///
651    /// let input = "a";
652    /// let pairs: Vec<_> = pest::state(input, |state| {
653    ///     state.rule(Rule::a, |s| Ok(s))
654    /// }).unwrap().collect();
655    ///
656    /// assert_eq!(pairs.len(), 1);
657    /// ```
658    #[inline]
659    pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
660    where
661        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
662    {
663        self = self.inc_call_check_limit()?;
664        // Position from which this `rule` starts parsing.
665        let actual_pos = self.position.pos();
666        // Remember index of the `self.queue` element that will be associated with this `rule`.
667        let index = self.queue.len();
668
669        let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
670            (self.pos_attempts.len(), self.neg_attempts.len())
671        } else {
672            // Attempts have not been cleared yet since the attempt_pos is older.
673            (0, 0)
674        };
675
676        if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
677            // Pair's position will only be known after running the closure.
678            self.queue.push(QueueableToken::Start {
679                end_token_index: 0,
680                input_pos: actual_pos,
681            });
682        }
683
684        // Remember attempts number before `f` call.
685        // In `track` using this variable we can say, how many attempts were added
686        // during children rules traversal.
687        let attempts = self.attempts_at(actual_pos);
688        // Number of call stacks present in `self.parse_attempts` before `f` call.
689        // We need to remember this number only in case there wasn't found any farther attempt.
690        // E.g. we are handling rule, on start position of which may be tested two
691        // children rules. At the moment we'll return from `f` call below,
692        // there will be two more children rules in `self.parse_attempts` that we'll
693        // consider to be the children of current `rule`.
694        let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
695        // Max parsing attempt position at the moment of `rule` handling.
696        // It case it's raised during children rules handling, it means
697        // we've made a parsing progress.
698        let remember_max_position = self.parse_attempts.max_position;
699
700        let result = f(self);
701
702        let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
703            if new_state.parse_attempts.max_position > remember_max_position {
704                // It means that one of `match_string` or e.g. `match_insensetive` function calls
705                // have already erased `self.parse_attempts.call_stacks` and that previously
706                // remembered values are not valid anymore.
707                remember_call_stacks_number = 0;
708            }
709            if !matches!(new_state.atomicity, Atomicity::Atomic) {
710                new_state
711                    .parse_attempts
712                    .try_add_new_stack_rule(rule, remember_call_stacks_number);
713            }
714        };
715
716        match result {
717            Ok(mut new_state) => {
718                if new_state.lookahead == Lookahead::Negative {
719                    new_state.track(
720                        rule,
721                        actual_pos,
722                        pos_attempts_index,
723                        neg_attempts_index,
724                        attempts,
725                    );
726                }
727
728                if new_state.lookahead == Lookahead::None
729                    && new_state.atomicity != Atomicity::Atomic
730                {
731                    // Index of `QueueableToken::End` token added below
732                    // that corresponds to previously added `QueueableToken::Start` token.
733                    let new_index = new_state.queue.len();
734                    match new_state.queue[index] {
735                        QueueableToken::Start {
736                            ref mut end_token_index,
737                            ..
738                        } => *end_token_index = new_index,
739                        _ => unreachable!(),
740                    };
741
742                    let new_pos = new_state.position.pos();
743
744                    new_state.queue.push(QueueableToken::End {
745                        start_token_index: index,
746                        rule,
747                        tag: None,
748                        input_pos: new_pos,
749                    });
750                }
751
752                // Note, that we need to count positive parsing results too, because we can fail in
753                // optional rule call inside which may lie the farthest
754                // parsed token.
755                if new_state.parse_attempts.enabled {
756                    try_add_rule_to_stack(&mut new_state);
757                }
758                Ok(new_state)
759            }
760            Err(mut new_state) => {
761                if new_state.lookahead != Lookahead::Negative {
762                    new_state.track(
763                        rule,
764                        actual_pos,
765                        pos_attempts_index,
766                        neg_attempts_index,
767                        attempts,
768                    );
769                    if new_state.parse_attempts.enabled {
770                        try_add_rule_to_stack(&mut new_state);
771                    }
772                }
773
774                if new_state.lookahead == Lookahead::None
775                    && new_state.atomicity != Atomicity::Atomic
776                {
777                    new_state.queue.truncate(index);
778                }
779
780                Err(new_state)
781            }
782        }
783    }
784
785    /// Tag current node
786    ///
787    /// # Examples
788    ///
789    /// Try to recognize the one specified in a set of characters
790    ///
791    /// ```
792    /// use pest::{state, ParseResult, ParserState, iterators::Pair};
793    /// #[allow(non_camel_case_types)]
794    /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
795    /// enum Rule {
796    ///     character,
797    /// }
798    /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
799    ///     state.sequence(|state| {
800    ///         character(state)
801    ///             .and_then(|state| character(state))
802    ///             .and_then(|state| character(state))
803    ///             .and_then(|state| state.tag_node("c"))
804    ///             .and_then(|state| character(state))
805    ///     })
806    /// }
807    /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
808    ///     state.rule(Rule::character, |state| state.match_range('a'..'z'))
809    /// }
810    ///
811    /// let input = "abcd";
812    /// let pairs = state(input, mark_c).unwrap();
813    /// // find all node tag as `c`
814    /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
815    /// assert_eq!(find[0].as_str(), "c")
816    /// ```
817    #[inline]
818    pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
819        if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
820            *old = Some(tag)
821        }
822        Ok(self)
823    }
824
825    /// Get number of allowed rules attempts + prohibited rules attempts.
826    fn attempts_at(&self, pos: usize) -> usize {
827        if self.attempt_pos == pos {
828            self.pos_attempts.len() + self.neg_attempts.len()
829        } else {
830            0
831        }
832    }
833
834    fn track(
835        &mut self,
836        rule: R,
837        pos: usize,
838        pos_attempts_index: usize,
839        neg_attempts_index: usize,
840        prev_attempts: usize,
841    ) {
842        if self.atomicity == Atomicity::Atomic {
843            return;
844        }
845
846        // If nested rules made no progress, there is no use to report them; it's only useful to
847        // track the current rule, the exception being when only one attempt has been made during
848        // the children rules.
849        let curr_attempts = self.attempts_at(pos);
850        if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
851            return;
852        }
853
854        if pos == self.attempt_pos {
855            self.pos_attempts.truncate(pos_attempts_index);
856            self.neg_attempts.truncate(neg_attempts_index);
857        }
858
859        if pos > self.attempt_pos {
860            self.pos_attempts.clear();
861            self.neg_attempts.clear();
862            self.attempt_pos = pos;
863        }
864
865        let attempts = if self.lookahead != Lookahead::Negative {
866            &mut self.pos_attempts
867        } else {
868            &mut self.neg_attempts
869        };
870
871        if pos == self.attempt_pos {
872            attempts.push(rule);
873        }
874    }
875
876    /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
877    /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
878    /// `Box<ParserState>` otherwise.
879    ///
880    /// This method is useful to parse sequences that only match together which usually come in the
881    /// form of chained `Result`s with
882    /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
883    ///
884    ///
885    /// # Examples
886    ///
887    /// ```
888    /// # use pest;
889    /// # #[allow(non_camel_case_types)]
890    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
891    /// enum Rule {
892    ///     a
893    /// }
894    ///
895    /// let input = "a";
896    /// let pairs: Vec<_> = pest::state(input, |state| {
897    ///     state.sequence(|s| {
898    ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
899    ///             s.match_string("b")
900    ///         })
901    ///     }).or_else(|s| {
902    ///         Ok(s)
903    ///     })
904    /// }).unwrap().collect();
905    ///
906    /// assert_eq!(pairs.len(), 0);
907    /// ```
908    #[inline]
909    pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
910    where
911        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
912    {
913        self = self.inc_call_check_limit()?;
914        let token_index = self.queue.len();
915        let initial_pos = self.position;
916
917        let result = f(self);
918
919        match result {
920            Ok(new_state) => Ok(new_state),
921            Err(mut new_state) => {
922                // Restore the initial position and truncate the token queue.
923                new_state.position = initial_pos;
924                new_state.queue.truncate(token_index);
925                Err(new_state)
926            }
927        }
928    }
929
930    /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
931    /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
932    ///
933    /// # Examples
934    ///
935    /// ```
936    /// # use pest;
937    /// # #[allow(non_camel_case_types)]
938    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
939    /// enum Rule {
940    ///     ab
941    /// }
942    ///
943    /// let input = "aab";
944    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
945    /// let mut result = state.repeat(|s| {
946    ///     s.match_string("a")
947    /// });
948    /// assert!(result.is_ok());
949    /// assert_eq!(result.unwrap().position().pos(), 2);
950    ///
951    /// state = pest::ParserState::new(input);
952    /// result = state.repeat(|s| {
953    ///     s.match_string("b")
954    /// });
955    /// assert!(result.is_ok());
956    /// assert_eq!(result.unwrap().position().pos(), 0);
957    /// ```
958    #[inline]
959    pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
960    where
961        F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
962    {
963        self = self.inc_call_check_limit()?;
964        let mut result = f(self);
965
966        loop {
967            match result {
968                Ok(state) => result = f(state),
969                Err(state) => return Ok(state),
970            };
971        }
972    }
973
974    /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
975    /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
976    ///
977    /// # Examples
978    ///
979    /// ```
980    /// # use pest;
981    /// # #[allow(non_camel_case_types)]
982    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
983    /// enum Rule {
984    ///     ab
985    /// }
986    ///
987    /// let input = "ab";
988    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
989    /// let result = state.optional(|s| {
990    ///     s.match_string("ab")
991    /// });
992    /// assert!(result.is_ok());
993    ///
994    /// state = pest::ParserState::new(input);
995    /// let result = state.optional(|s| {
996    ///     s.match_string("ac")
997    /// });
998    /// assert!(result.is_ok());
999    /// ```
1000    #[inline]
1001    pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1002    where
1003        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1004    {
1005        self = self.inc_call_check_limit()?;
1006        match f(self) {
1007            Ok(state) | Err(state) => Ok(state),
1008        }
1009    }
1010
1011    /// Generic function to handle result of char/string/range parsing
1012    /// in order to track (un)expected tokens.
1013    fn handle_token_parse_result(
1014        &mut self,
1015        start_position: usize,
1016        token: ParsingToken,
1017        parse_succeeded: bool,
1018    ) {
1019        // New position after tracked parsed element for case of `parse_succeded` is true.
1020        // Position of parsing failure otherwise.
1021        let current_pos = self.position.pos();
1022
1023        if parse_succeeded {
1024            if self.lookahead == Lookahead::Negative {
1025                self.parse_attempts
1026                    .try_add_new_token(token, start_position, current_pos, true);
1027            } else if current_pos > self.parse_attempts.max_position {
1028                self.parse_attempts.nullify_expected_tokens(current_pos);
1029            }
1030        } else if self.lookahead != Lookahead::Negative {
1031            self.parse_attempts
1032                .try_add_new_token(token, start_position, current_pos, false);
1033        }
1034    }
1035
1036    /// Attempts to match a single character based on a filter function. Returns `Ok` with the
1037    /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
1038    /// otherwise.
1039    ///
1040    /// # Examples
1041    ///
1042    /// ```
1043    /// # use pest;
1044    /// # #[allow(non_camel_case_types)]
1045    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1046    /// enum Rule {}
1047    ///
1048    /// let input = "ab";
1049    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1050    /// let result = state.match_char_by(|c| c.is_ascii());
1051    /// assert!(result.is_ok());
1052    /// assert_eq!(result.unwrap().position().pos(), 1);
1053    ///
1054    /// let input = "❤";
1055    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1056    /// let result = state.match_char_by(|c| c.is_ascii());
1057    /// assert!(result.is_err());
1058    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1059    /// ```
1060    #[inline]
1061    pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1062    where
1063        F: FnOnce(char) -> bool,
1064    {
1065        let start_position = self.position.pos();
1066        let succeeded = self.position.match_char_by(f);
1067        if self.parse_attempts.enabled {
1068            let token = ParsingToken::BuiltInRule;
1069            self.handle_token_parse_result(start_position, token, succeeded);
1070        }
1071        if succeeded {
1072            Ok(self)
1073        } else {
1074            Err(self)
1075        }
1076    }
1077
1078    /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1079    /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1080    ///
1081    /// # Examples
1082    ///
1083    /// ```
1084    /// # use pest;
1085    /// # #[allow(non_camel_case_types)]
1086    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1087    /// enum Rule {}
1088    ///
1089    /// let input = "ab";
1090    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1091    /// let mut result = state.match_string("ab");
1092    /// assert!(result.is_ok());
1093    /// assert_eq!(result.unwrap().position().pos(), 2);
1094    ///
1095    /// state = pest::ParserState::new(input);
1096    /// result = state.match_string("ac");
1097    /// assert!(result.is_err());
1098    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1099    /// ```
1100    #[inline]
1101    pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1102        let start_position = self.position.pos();
1103        let succeeded = self.position.match_string(string);
1104        if self.parse_attempts.enabled {
1105            let token = ParsingToken::Sensitive {
1106                token: String::from(string),
1107            };
1108            self.handle_token_parse_result(start_position, token, succeeded);
1109        }
1110        if succeeded {
1111            Ok(self)
1112        } else {
1113            Err(self)
1114        }
1115    }
1116
1117    /// Pushes the given literal to the stack, and always returns `Ok(Box<ParserState>)`.
1118    ///
1119    /// # Examples
1120    ///
1121    /// ```
1122    /// # use pest;
1123    /// # #[allow(non_camel_case_types)]
1124    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1125    /// enum Rule {}
1126    ///
1127    /// let input = "ab";
1128    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1129    ///
1130    /// let mut result = state.stack_push_literal("a");
1131    /// assert!(result.is_ok());
1132    /// assert_eq!(result.as_ref().unwrap().position().pos(), 0);
1133    ///
1134    /// let mut result = result.unwrap().stack_pop();
1135    /// assert!(result.is_ok());
1136    /// assert_eq!(result.unwrap().position().pos(), 1);
1137    /// ```
1138    #[inline]
1139    pub fn stack_push_literal(
1140        mut self: Box<Self>,
1141        string: impl Into<Cow<'static, str>>,
1142    ) -> ParseResult<Box<Self>> {
1143        // convert string into a Literal, and then into a BorrowedOrRc
1144        self.stack
1145            .push(SpanOrLiteral::Literal(string.into().into()));
1146        Ok(self)
1147    }
1148
1149    /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1150    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1151    ///
1152    /// # Examples
1153    ///
1154    /// ```
1155    /// # use pest;
1156    /// # #[allow(non_camel_case_types)]
1157    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1158    /// enum Rule {}
1159    ///
1160    /// let input = "ab";
1161    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1162    /// let mut result = state.match_insensitive("AB");
1163    /// assert!(result.is_ok());
1164    /// assert_eq!(result.unwrap().position().pos(), 2);
1165    ///
1166    /// state = pest::ParserState::new(input);
1167    /// result = state.match_insensitive("AC");
1168    /// assert!(result.is_err());
1169    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1170    /// ```
1171    #[inline]
1172    pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1173        let start_position: usize = self.position().pos();
1174        let succeeded = self.position.match_insensitive(string);
1175        if self.parse_attempts.enabled {
1176            let token = ParsingToken::Insensitive {
1177                token: String::from(string),
1178            };
1179            self.handle_token_parse_result(start_position, token, succeeded);
1180        }
1181        if succeeded {
1182            Ok(self)
1183        } else {
1184            Err(self)
1185        }
1186    }
1187
1188    /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1189    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1190    ///
1191    /// # Caution
1192    /// The provided `range` is interpreted as inclusive.
1193    ///
1194    /// # Examples
1195    ///
1196    /// ```
1197    /// # use pest;
1198    /// # #[allow(non_camel_case_types)]
1199    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1200    /// enum Rule {}
1201    ///
1202    /// let input = "ab";
1203    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1204    /// let mut result = state.match_range('a'..'z');
1205    /// assert!(result.is_ok());
1206    /// assert_eq!(result.unwrap().position().pos(), 1);
1207    ///
1208    /// state = pest::ParserState::new(input);
1209    /// result = state.match_range('A'..'Z');
1210    /// assert!(result.is_err());
1211    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1212    /// ```
1213    #[inline]
1214    pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1215        let start_position = self.position().pos();
1216        let token = ParsingToken::Range {
1217            start: range.start,
1218            end: range.end,
1219        };
1220        let succeeded = self.position.match_range(range);
1221        if self.parse_attempts.enabled {
1222            self.handle_token_parse_result(start_position, token, succeeded);
1223        }
1224        if succeeded {
1225            Ok(self)
1226        } else {
1227            Err(self)
1228        }
1229    }
1230
1231    /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1232    /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1233    ///
1234    /// # Examples
1235    ///
1236    /// ```
1237    /// # use pest;
1238    /// # #[allow(non_camel_case_types)]
1239    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1240    /// enum Rule {}
1241    ///
1242    /// let input = "ab";
1243    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1244    /// let mut result = state.skip(1);
1245    /// assert!(result.is_ok());
1246    /// assert_eq!(result.unwrap().position().pos(), 1);
1247    ///
1248    /// state = pest::ParserState::new(input);
1249    /// result = state.skip(3);
1250    /// assert!(result.is_err());
1251    /// assert_eq!(result.unwrap_err().position().pos(), 0);
1252    /// ```
1253    #[inline]
1254    pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1255        if self.position.skip(n) {
1256            Ok(self)
1257        } else {
1258            Err(self)
1259        }
1260    }
1261
1262    /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1263    /// updated `Box<ParserState>` whether or not one of the strings is found.
1264    ///
1265    /// # Examples
1266    ///
1267    /// ```
1268    /// # use pest;
1269    /// # #[allow(non_camel_case_types)]
1270    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1271    /// enum Rule {}
1272    ///
1273    /// let input = "abcd";
1274    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1275    /// let mut result = state.skip_until(&["c", "d"]);
1276    /// assert!(result.is_ok());
1277    /// assert_eq!(result.unwrap().position().pos(), 2);
1278    /// ```
1279    #[inline]
1280    pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1281        self.position.skip_until(strings);
1282        Ok(self)
1283    }
1284
1285    /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1286    /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1287    ///
1288    /// # Examples
1289    ///
1290    /// ```
1291    /// # use pest;
1292    /// # #[allow(non_camel_case_types)]
1293    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1294    /// enum Rule {}
1295    ///
1296    /// let input = "ab";
1297    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1298    /// let mut result = state.start_of_input();
1299    /// assert!(result.is_ok());
1300    ///
1301    /// state = pest::ParserState::new(input);
1302    /// state = state.match_string("ab").unwrap();
1303    /// result = state.start_of_input();
1304    /// assert!(result.is_err());
1305    /// ```
1306    #[inline]
1307    pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1308        if self.position.at_start() {
1309            Ok(self)
1310        } else {
1311            Err(self)
1312        }
1313    }
1314
1315    /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1316    /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1317    ///
1318    /// # Examples
1319    ///
1320    /// ```
1321    /// # use pest;
1322    /// # #[allow(non_camel_case_types)]
1323    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1324    /// enum Rule {}
1325    ///
1326    /// let input = "ab";
1327    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1328    /// let mut result = state.end_of_input();
1329    /// assert!(result.is_err());
1330    ///
1331    /// state = pest::ParserState::new(input);
1332    /// state = state.match_string("ab").unwrap();
1333    /// result = state.end_of_input();
1334    /// assert!(result.is_ok());
1335    /// ```
1336    #[inline]
1337    pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1338        if self.position.at_end() {
1339            Ok(self)
1340        } else {
1341            Err(self)
1342        }
1343    }
1344
1345    /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1346    /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1347    /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1348    /// together, negating the `Result`.
1349    ///
1350    /// # Examples
1351    ///
1352    /// ```
1353    /// # use pest;
1354    /// # #[allow(non_camel_case_types)]
1355    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1356    /// enum Rule {
1357    ///     a
1358    /// }
1359    ///
1360    /// let input = "a";
1361    /// let pairs: Vec<_> = pest::state(input, |state| {
1362    ///     state.lookahead(true, |state| {
1363    ///         state.rule(Rule::a, |s| Ok(s))
1364    ///     })
1365    /// }).unwrap().collect();
1366    ///
1367    /// assert_eq!(pairs.len(), 0);
1368    /// ```
1369    #[inline]
1370    pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1371    where
1372        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1373    {
1374        self = self.inc_call_check_limit()?;
1375        let initial_lookahead = self.lookahead;
1376
1377        self.lookahead = if is_positive {
1378            match initial_lookahead {
1379                Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1380                Lookahead::Negative => Lookahead::Negative,
1381            }
1382        } else {
1383            match initial_lookahead {
1384                Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1385                Lookahead::Negative => Lookahead::Positive,
1386            }
1387        };
1388
1389        let initial_pos = self.position;
1390
1391        let result = f(self.checkpoint());
1392
1393        let result_state = match result {
1394            Ok(mut new_state) => {
1395                new_state.position = initial_pos;
1396                new_state.lookahead = initial_lookahead;
1397                Ok(new_state.restore())
1398            }
1399            Err(mut new_state) => {
1400                new_state.position = initial_pos;
1401                new_state.lookahead = initial_lookahead;
1402                Err(new_state.restore())
1403            }
1404        };
1405
1406        if is_positive {
1407            result_state
1408        } else {
1409            match result_state {
1410                Ok(state) => Err(state),
1411                Err(state) => Ok(state),
1412            }
1413        }
1414    }
1415
1416    /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1417    /// Used as wrapper over `rule` (or even another `atomic`) call.
1418    ///
1419    /// # Examples
1420    ///
1421    /// ```
1422    /// # use pest::{self, Atomicity};
1423    /// # #[allow(non_camel_case_types)]
1424    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1425    /// enum Rule {
1426    ///     a
1427    /// }
1428    ///
1429    /// let input = "a";
1430    /// let pairs: Vec<_> = pest::state(input, |state| {
1431    ///     state.atomic(Atomicity::Atomic, |s| {
1432    ///         s.rule(Rule::a, |s| Ok(s))
1433    ///     })
1434    /// }).unwrap().collect();
1435    ///
1436    /// assert_eq!(pairs.len(), 0);
1437    /// ```
1438    #[inline]
1439    pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1440    where
1441        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1442    {
1443        self = self.inc_call_check_limit()?;
1444        // In case child parsing call is another `atomic` it will have its own atomicity status.
1445        let initial_atomicity = self.atomicity;
1446        // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1447        // E.g. we have the following rules:
1448        // * RootRule = @{ InnerRule }
1449        // * InnerRule = @{ ... }
1450        let should_toggle = self.atomicity != atomicity;
1451
1452        // Note that we take atomicity of the top rule and not of the leaf (inner).
1453        if should_toggle {
1454            self.atomicity = atomicity;
1455        }
1456
1457        let result = f(self);
1458
1459        match result {
1460            Ok(mut new_state) => {
1461                if should_toggle {
1462                    new_state.atomicity = initial_atomicity;
1463                }
1464                Ok(new_state)
1465            }
1466            Err(mut new_state) => {
1467                if should_toggle {
1468                    new_state.atomicity = initial_atomicity;
1469                }
1470                Err(new_state)
1471            }
1472        }
1473    }
1474
1475    /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1476    /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1477    /// called successfully, or `Err(Box<ParserState>)` otherwise.
1478    ///
1479    /// # Examples
1480    ///
1481    /// ```
1482    /// # use pest;
1483    /// # #[allow(non_camel_case_types)]
1484    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1485    /// enum Rule {}
1486    ///
1487    /// let input = "ab";
1488    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1489    /// let mut result = state.stack_push(|state| state.match_string("a"));
1490    /// assert!(result.is_ok());
1491    /// assert_eq!(result.unwrap().position().pos(), 1);
1492    /// ```
1493    #[inline]
1494    pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1495    where
1496        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1497    {
1498        self = self.inc_call_check_limit()?;
1499        let start = self.position;
1500
1501        let result = f(self);
1502
1503        match result {
1504            Ok(mut state) => {
1505                let end = state.position;
1506                state.stack.push(SpanOrLiteral::Span(start.span(&end)));
1507                Ok(state)
1508            }
1509            Err(state) => Err(state),
1510        }
1511    }
1512
1513    /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1514    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1515    ///
1516    /// # Examples
1517    ///
1518    /// ```
1519    /// # use pest;
1520    /// # #[allow(non_camel_case_types)]
1521    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1522    /// enum Rule {}
1523    ///
1524    /// let input = "aa";
1525    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1526    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1527    ///     |state| state.stack_peek()
1528    /// );
1529    /// assert!(result.is_ok());
1530    /// assert_eq!(result.unwrap().position().pos(), 2);
1531    /// ```
1532    #[inline]
1533    pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1534        let string = self
1535            .stack
1536            .peek()
1537            .expect("peek was called on empty stack")
1538            .as_borrowed_or_rc();
1539        self.match_string(string.as_str())
1540    }
1541
1542    /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1543    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1544    ///
1545    /// # Examples
1546    ///
1547    /// ```
1548    /// # use pest;
1549    /// # #[allow(non_camel_case_types)]
1550    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1551    /// enum Rule {}
1552    ///
1553    /// let input = "aa";
1554    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1555    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1556    ///     |state| state.stack_pop()
1557    /// );
1558    /// assert!(result.is_ok());
1559    /// assert_eq!(result.unwrap().position().pos(), 2);
1560    /// ```
1561    #[inline]
1562    pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1563        let string = self
1564            .stack
1565            .pop()
1566            .expect("pop was called on empty stack")
1567            .as_borrowed_or_rc();
1568        self.match_string(string.as_str())
1569    }
1570
1571    /// Matches part of the state of the stack.
1572    ///
1573    /// # Examples
1574    ///
1575    /// ```
1576    /// # use pest::{self, MatchDir};
1577    /// # #[allow(non_camel_case_types)]
1578    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1579    /// enum Rule {}
1580    ///
1581    /// let input = "abcd cd cb";
1582    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1583    /// let mut result = state
1584    ///     .stack_push(|state| state.match_string("a"))
1585    ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
1586    ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
1587    ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
1588    ///     .and_then(|state| state.match_string(" "))
1589    ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1590    ///     .and_then(|state| state.match_string(" "))
1591    ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1592    /// assert!(result.is_ok());
1593    /// assert_eq!(result.unwrap().position().pos(), 10);
1594    /// ```
1595    #[inline]
1596    pub fn stack_match_peek_slice(
1597        mut self: Box<Self>,
1598        start: i32,
1599        end: Option<i32>,
1600        match_dir: MatchDir,
1601    ) -> ParseResult<Box<Self>> {
1602        let range = match constrain_idxs(start, end, self.stack.len()) {
1603            Some(r) => r,
1604            None => return Err(self),
1605        };
1606        // return true if an empty sequence is requested
1607        if range.end <= range.start {
1608            return Ok(self);
1609        }
1610
1611        let mut position = self.position;
1612        let result = {
1613            let mut iter_b2t = self.stack[range].iter();
1614            let matcher =
1615                |span: &SpanOrLiteral<'_>| position.match_string(span.as_borrowed_or_rc().as_str());
1616            match match_dir {
1617                MatchDir::BottomToTop => iter_b2t.all(matcher),
1618                MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1619            }
1620        };
1621        if result {
1622            self.position = position;
1623            Ok(self)
1624        } else {
1625            Err(self)
1626        }
1627    }
1628
1629    /// Matches the full state of the stack.
1630    ///
1631    /// # Examples
1632    ///
1633    /// ```
1634    /// # use pest;
1635    /// # #[allow(non_camel_case_types)]
1636    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1637    /// enum Rule {}
1638    ///
1639    /// let input = "abba";
1640    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1641    /// let mut result = state
1642    ///     .stack_push(|state| state.match_string("a"))
1643    ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1644    ///     .and_then(|state| state.stack_match_peek());
1645    /// assert!(result.is_ok());
1646    /// assert_eq!(result.unwrap().position().pos(), 4);
1647    /// ```
1648    #[inline]
1649    pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1650        self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1651    }
1652
1653    /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1654    ///
1655    /// # Examples
1656    ///
1657    /// ```
1658    /// /// # use pest;
1659    /// # #[allow(non_camel_case_types)]
1660    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1661    /// enum Rule {}
1662    ///
1663    /// let input = "aaaa";
1664    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1665    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1666    ///     state.stack_push(|state| state.match_string("a"))
1667    /// }).and_then(|state| state.stack_match_peek());
1668    /// assert!(result.is_ok());
1669    /// assert_eq!(result.unwrap().position().pos(), 4);
1670    /// ```
1671    #[inline]
1672    pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1673        let mut position = self.position;
1674        let mut result = true;
1675        while let Some(span) = self.stack.pop() {
1676            result = position.match_string(span.as_borrowed_or_rc().as_str());
1677            if !result {
1678                break;
1679            }
1680        }
1681
1682        if result {
1683            self.position = position;
1684            Ok(self)
1685        } else {
1686            Err(self)
1687        }
1688    }
1689
1690    /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1691    /// `Err(Box<ParserState>)` otherwise.
1692    ///
1693    /// # Examples
1694    ///
1695    /// ```
1696    /// # use pest;
1697    /// # #[allow(non_camel_case_types)]
1698    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1699    /// enum Rule {}
1700    ///
1701    /// let input = "aa";
1702    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1703    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1704    ///     |state| state.stack_drop()
1705    /// );
1706    /// assert!(result.is_ok());
1707    /// assert_eq!(result.unwrap().position().pos(), 1);
1708    /// ```
1709    #[inline]
1710    pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1711        match self.stack.pop() {
1712            Some(_) => Ok(self),
1713            None => Err(self),
1714        }
1715    }
1716
1717    /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1718    /// this method only restores the stack.
1719    ///
1720    /// # Examples
1721    ///
1722    /// ```
1723    /// # use pest;
1724    /// # #[allow(non_camel_case_types)]
1725    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1726    /// enum Rule {}
1727    ///
1728    /// let input = "ab";
1729    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1730    /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1731    ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1732    /// );
1733    ///
1734    /// assert!(result.is_err());
1735    ///
1736    /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1737    /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1738    /// assert!(catch_panic.is_err());
1739    /// ```
1740    #[inline]
1741    pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1742    where
1743        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1744    {
1745        match f(self.checkpoint()) {
1746            Ok(state) => Ok(state.checkpoint_ok()),
1747            Err(state) => Err(state.restore()),
1748        }
1749    }
1750
1751    // Mark the current state as a checkpoint and return the `Box`.
1752    #[inline]
1753    pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1754        self.stack.snapshot();
1755        self
1756    }
1757
1758    // The checkpoint was cleared successfully
1759    // so remove it without touching other stack state.
1760    #[inline]
1761    pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1762        self.stack.clear_snapshot();
1763        self
1764    }
1765
1766    // Restore the current state to the most recent checkpoint.
1767    #[inline]
1768    pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1769        self.stack.restore();
1770        self
1771    }
1772}
1773
1774/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
1775fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1776    let start_norm = normalize_index(start, len)?;
1777    let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1778    Some(start_norm..end_norm)
1779}
1780
1781/// `constrain_idxs` helper function.
1782/// Normalizes the index using its sequence’s length.
1783/// Returns `None` if the normalized index is OOB.
1784fn normalize_index(i: i32, len: usize) -> Option<usize> {
1785    if i > len as i32 {
1786        None
1787    } else if i >= 0 {
1788        Some(i as usize)
1789    } else {
1790        let real_i = len as i32 + i;
1791        if real_i >= 0 {
1792            Some(real_i as usize)
1793        } else {
1794            None
1795        }
1796    }
1797}
1798
1799#[cfg(test)]
1800mod test {
1801    use super::*;
1802
1803    #[test]
1804    fn normalize_index_pos() {
1805        assert_eq!(normalize_index(4, 6), Some(4));
1806        assert_eq!(normalize_index(5, 5), Some(5));
1807        assert_eq!(normalize_index(6, 3), None);
1808    }
1809
1810    #[test]
1811    fn normalize_index_neg() {
1812        assert_eq!(normalize_index(-4, 6), Some(2));
1813        assert_eq!(normalize_index(-5, 5), Some(0));
1814        assert_eq!(normalize_index(-6, 3), None);
1815    }
1816}