pest/parser_state.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! The core functionality of parsing grammar.
11//! State of parser during the process of rules handling.
12
13use alloc::borrow::{Cow, ToOwned};
14use alloc::boxed::Box;
15use alloc::collections::BTreeSet;
16use alloc::rc::Rc;
17use alloc::string::String;
18use alloc::sync::Arc;
19use alloc::vec;
20use alloc::vec::Vec;
21use core::fmt::{Debug, Display, Formatter};
22use core::num::NonZeroUsize;
23use core::ops::Deref; // used in BorrowedOrRc.as_str
24use core::ops::Range;
25use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
26
27use crate::error::{Error, ErrorVariant};
28use crate::iterators::pairs::new;
29use crate::iterators::{pairs, QueueableToken};
30use crate::position::Position;
31use crate::span::Span;
32use crate::stack::Stack;
33use crate::RuleType;
34
35/// The current lookahead status of a [`ParserState`].
36///
37/// [`ParserState`]: struct.ParserState.html
38#[derive(Clone, Copy, Debug, Eq, PartialEq)]
39pub enum Lookahead {
40 /// The positive predicate, written as an ampersand &,
41 /// attempts to match its inner expression.
42 /// If the inner expression succeeds, parsing continues,
43 /// but at the same position as the predicate —
44 /// &foo ~ bar is thus a kind of "AND" statement:
45 /// "the input string must match foo AND bar".
46 /// If the inner expression fails,
47 /// the whole expression fails too.
48 Positive,
49 /// The negative predicate, written as an exclamation mark !,
50 /// attempts to match its inner expression.
51 /// If the inner expression fails, the predicate succeeds
52 /// and parsing continues at the same position as the predicate.
53 /// If the inner expression succeeds, the predicate fails —
54 /// !foo ~ bar is thus a kind of "NOT" statement:
55 /// "the input string must match bar but NOT foo".
56 Negative,
57 /// No lookahead (i.e. it will consume input).
58 None,
59}
60
61/// The current atomicity of a [`ParserState`].
62///
63/// [`ParserState`]: struct.ParserState.html
64#[derive(Clone, Copy, Debug, Eq, PartialEq)]
65pub enum Atomicity {
66 /// prevents implicit whitespace: inside an atomic rule,
67 /// the tilde ~ means "immediately followed by",
68 /// and repetition operators (asterisk * and plus sign +)
69 /// have no implicit separation. In addition, all other rules
70 /// called from an atomic rule are also treated as atomic.
71 /// (interior matching rules are silent)
72 Atomic,
73 /// The same as atomic, but inner tokens are produced as normal.
74 CompoundAtomic,
75 /// implicit whitespace is enabled
76 NonAtomic,
77}
78
79/// Type alias to simplify specifying the return value of chained closures.
80pub type ParseResult<S> = Result<S, S>;
81
82/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
83#[derive(Clone, Copy, Debug, Eq, PartialEq)]
84pub enum MatchDir {
85 /// from the bottom to the top of the stack
86 BottomToTop,
87 /// from the top to the bottom of the stack
88 TopToBottom,
89}
90
91static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
92
93/// Sets the maximum call limit for the parser state
94/// to prevent stack overflows or excessive execution times
95/// in some grammars.
96/// If set, the calls are tracked as a running total
97/// over all non-terminal rules that can nest closures
98/// (which are passed to transform the parser state).
99///
100/// # Arguments
101///
102/// * `limit` - The maximum number of calls. If None,
103/// the number of calls is unlimited.
104pub fn set_call_limit(limit: Option<NonZeroUsize>) {
105 CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
106}
107
108static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
109
110/// Sets whether information for more error details
111/// should be collected. This is useful for debugging
112/// parser errors (as it leads to more comprehensive
113/// error messages), but it has a higher performance cost.
114/// (hence, it's off by default)
115///
116/// # Arguments
117///
118/// * `enabled` - Whether to enable the collection for
119/// more error details.
120pub fn set_error_detail(enabled: bool) {
121 ERROR_DETAIL.store(enabled, Ordering::Relaxed);
122}
123
124#[derive(Debug)]
125struct CallLimitTracker {
126 current_call_limit: Option<(usize, usize)>,
127}
128
129impl Default for CallLimitTracker {
130 fn default() -> Self {
131 let limit = CALL_LIMIT.load(Ordering::Relaxed);
132 let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
133 Self { current_call_limit }
134 }
135}
136
137impl CallLimitTracker {
138 fn limit_reached(&self) -> bool {
139 self.current_call_limit
140 .map_or(false, |(current, limit)| current >= limit)
141 }
142
143 fn increment_depth(&mut self) {
144 if let Some((current, _)) = &mut self.current_call_limit {
145 *current += 1;
146 }
147 }
148}
149
150/// Number of call stacks that may result from a sequence of rules parsing.
151const CALL_STACK_INITIAL_CAPACITY: usize = 20;
152/// Max (un)expected number of tokens that we may see on the parsing error position.
153const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
154/// Max rule children number for which we'll extend calls stacks.
155///
156/// In case rule we're working with has too many children rules that failed in parsing,
157/// we don't want to store long stacks for all of them. If rule has more than this number
158/// of failed children, they all will be collapsed in a parent rule.
159const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
160
161/// Structure tracking errored parsing call (associated with specific `ParserState` function).
162#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
163pub enum ParseAttempt<R> {
164 /// Call of `rule` errored.
165 Rule(R),
166 /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
167 /// Works as indicator of that leaf node is not a rule. In order to get the token value we
168 /// can address `ParseAttempts` `(un)expected_tokens`.
169 Token,
170}
171
172impl<R> ParseAttempt<R> {
173 pub fn get_rule(&self) -> Option<&R> {
174 match self {
175 ParseAttempt::Rule(r) => Some(r),
176 ParseAttempt::Token => None,
177 }
178 }
179}
180
181/// Rules call stack.
182/// Contains sequence of rule calls that resulted in new parsing attempt.
183#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
184pub struct RulesCallStack<R> {
185 /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
186 pub deepest: ParseAttempt<R>,
187 /// Most top rule covering `deepest`.
188 pub parent: Option<R>,
189}
190
191impl<R> RulesCallStack<R> {
192 fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
193 RulesCallStack {
194 deepest,
195 parent: None,
196 }
197 }
198}
199
200#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
201pub enum ParsingToken {
202 Sensitive { token: String },
203 Insensitive { token: String },
204 Range { start: char, end: char },
205 BuiltInRule,
206}
207
208impl Display for ParsingToken {
209 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
210 match self {
211 ParsingToken::Sensitive { token } => write!(f, "{token}"),
212 ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
213 ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
214 ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
215 }
216 }
217}
218/// A helper that provides efficient string handling without unnecessary copying.
219/// We use `Arc<String>` instead of `Cow<'i, str>` to avoid copying strings when cloning the `Owned` variant, since
220/// `Arc::clone` only increments a reference count. [SpanOrLiteral] needs to be [Send] and [Sync]`, so we use [Arc]
221/// instead of [Rc].
222///
223/// (We need to clone this struct to detach it from the `&self` borrow in [SpanOrLiteral::as_borrowed_or_rc], so that
224/// we can then call `self.match_string` (a `mut self` method).
225#[derive(Debug, Clone)]
226enum BorrowedOrArc<'i> {
227 Borrowed(&'i str),
228 Owned(Arc<String>),
229}
230
231/// A holder for a literal string, for use in `push_literal`. This is typically a `&'static str`, but is an owned
232/// `Rc<String>` for the pest vm.
233impl<'i> BorrowedOrArc<'i> {
234 fn as_str<'a: 'i>(&'a self) -> &'a str {
235 match self {
236 BorrowedOrArc::Borrowed(s) => s,
237 BorrowedOrArc::Owned(s) => s.deref(),
238 }
239 }
240}
241
242impl From<Cow<'static, str>> for BorrowedOrArc<'_> {
243 fn from(value: Cow<'static, str>) -> Self {
244 match value {
245 Cow::Borrowed(s) => Self::Borrowed(s),
246 Cow::Owned(s) => Self::Owned(Arc::new(s)),
247 }
248 }
249}
250
251#[derive(Debug, Clone)]
252enum SpanOrLiteral<'i> {
253 Span(Span<'i>),
254 Literal(BorrowedOrArc<'i>),
255}
256
257impl<'i> SpanOrLiteral<'i> {
258 #[inline]
259 fn as_borrowed_or_rc(&self) -> BorrowedOrArc<'i> {
260 match self {
261 Self::Span(s) => BorrowedOrArc::Borrowed(s.as_str()),
262 Self::Literal(s) => s.clone(),
263 }
264 }
265}
266
267/// Structure that tracks all the parsing attempts made on the max position.
268/// We want to give an error hint about parsing rules that succeeded
269/// at the farthest input position.
270/// The intuition is such rules will be most likely the query user initially wanted to write.
271#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
272pub struct ParseAttempts<R> {
273 /// Indicates whether the parsing attempts are tracked.
274 enabled: bool,
275 /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
276 /// If there are several stacks in vec, it means all those rule stacks are "equal"
277 /// because their attempts occurred on the same position.
278 pub call_stacks: Vec<RulesCallStack<R>>,
279 /// Tokens that could be putted at `max_position`
280 /// in order to get a valid grammar query.
281 expected_tokens: Vec<ParsingToken>,
282 /// Tokens that we've prohibited to be putted at `max_position`
283 /// in order to get a valid grammar query.
284 unexpected_tokens: Vec<ParsingToken>,
285 /// Max position at which we were expecting to see one of `expected_tokens`.
286 pub max_position: usize,
287}
288
289impl<R: RuleType> ParseAttempts<R> {
290 /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
291 /// initialized with capacity.
292 pub fn new() -> Self {
293 Self {
294 call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
295 expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
296 unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
297 max_position: 0,
298 enabled: ERROR_DETAIL.load(Ordering::Relaxed),
299 }
300 }
301
302 /// Get number of currently present call stacks.
303 fn call_stacks_number(&self) -> usize {
304 self.call_stacks.len()
305 }
306
307 pub fn expected_tokens(&self) -> Vec<ParsingToken> {
308 self.expected_tokens
309 .iter()
310 .cloned()
311 .collect::<BTreeSet<_>>()
312 .into_iter()
313 .collect()
314 }
315
316 pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
317 self.unexpected_tokens
318 .iter()
319 .cloned()
320 .collect::<BTreeSet<_>>()
321 .into_iter()
322 .collect()
323 }
324
325 /// Retrieve call stacks.
326 pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
327 self.call_stacks
328 .iter()
329 .cloned()
330 .collect::<BTreeSet<_>>()
331 .into_iter()
332 .collect()
333 }
334
335 /// In case we've tried to parse a rule, which start position is bigger than previous
336 /// `max_position` it means that we've advanced in our parsing and found better candidate.
337 ///
338 /// `start_index` is:
339 /// * Number of call stacks present in state at the moment current `rule` was called. The idea
340 /// is that we'd like to update only those stacks that originated from the current `rule` and
341 /// not from those that were called previously.
342 /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
343 fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
344 let mut non_token_call_stacks = Vec::new();
345 let mut token_call_stack_met = false;
346 for call_stack in self.call_stacks.iter().skip(start_index) {
347 if matches!(call_stack.deepest, ParseAttempt::Token) {
348 token_call_stack_met = true;
349 } else {
350 non_token_call_stacks.push(call_stack.clone())
351 }
352 }
353 if token_call_stack_met && non_token_call_stacks.is_empty() {
354 // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
355 // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
356 // rule) as soon as it doesn't give us any useful additional info.
357 non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
358 }
359 self.call_stacks
360 .splice(start_index.., non_token_call_stacks);
361
362 let children_number_over_threshold =
363 self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
364 if children_number_over_threshold {
365 self.call_stacks.truncate(start_index);
366 self.call_stacks
367 .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
368 } else {
369 for call_stack in self.call_stacks.iter_mut().skip(start_index) {
370 if matches!(call_stack.deepest, ParseAttempt::Token) {
371 call_stack.deepest = ParseAttempt::Rule(rule);
372 } else {
373 call_stack.parent = Some(rule);
374 }
375 }
376 }
377 }
378
379 /// If `expected` flag is set to false, it means we've successfully parsed token being in the
380 /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
381 /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
382 ///
383 /// In case `position` is:
384 /// * Equal to `max_position`, add `token` to `target_vec`,
385 /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
386 #[allow(clippy::comparison_chain)]
387 fn try_add_new_token(
388 &mut self,
389 token: ParsingToken,
390 start_position: usize,
391 position: usize,
392 negative_lookahead: bool,
393 ) {
394 let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
395 let target_vec = if negative_lookahead {
396 &mut attempts.unexpected_tokens
397 } else {
398 &mut attempts.expected_tokens
399 };
400 target_vec.push(token);
401 };
402
403 if position > self.max_position {
404 if negative_lookahead && start_position > self.max_position {
405 // We encountered a sequence under negative lookahead.
406 // We would like to track only first failed token in this sequence (which
407 // `start_position` should be equal to `self.max_position`).
408 return;
409 }
410 target_vec_push_token(self);
411
412 if negative_lookahead {
413 // In case of successful parsing of token under negative lookahead the only
414 // thing we'd like to do is to track the token in the `unexpected_tokens`.
415 return;
416 }
417 self.max_position = position;
418 self.expected_tokens.clear();
419 self.unexpected_tokens.clear();
420 self.call_stacks.clear();
421 self.call_stacks
422 .push(RulesCallStack::new(ParseAttempt::Token));
423 } else if position == self.max_position {
424 target_vec_push_token(self);
425 self.call_stacks
426 .push(RulesCallStack::new(ParseAttempt::Token));
427 }
428 }
429
430 /// Reset state in case we've successfully parsed some token in
431 /// `match_string` or `match_insensetive`.
432 fn nullify_expected_tokens(&mut self, new_max_position: usize) {
433 self.call_stacks.clear();
434 self.expected_tokens.clear();
435 self.unexpected_tokens.clear();
436 self.max_position = new_max_position;
437 }
438}
439
440impl<R: RuleType> Default for ParseAttempts<R> {
441 fn default() -> Self {
442 Self::new()
443 }
444}
445
446/// The complete state of a [`Parser`].
447///
448/// [`Parser`]: trait.Parser.html
449#[derive(Debug)]
450pub struct ParserState<'i, R: RuleType> {
451 /// Current position from which we try to apply some parser function.
452 /// Initially is 0.
453 /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
454 /// and switched our `position` from 0 to the length of "create".
455 ///
456 /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
457 position: Position<'i>,
458 /// Queue representing rules partially (`QueueableToken::Start`) and
459 /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
460 /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
461 /// `End` state.
462 queue: Vec<QueueableToken<'i, R>>,
463 /// Status set in case specific lookahead logic is used in grammar.
464 /// See `Lookahead` for more information.
465 lookahead: Lookahead,
466 /// Rules that we HAVE expected, tried to parse, but failed.
467 pos_attempts: Vec<R>,
468 /// Rules that we have NOT expected, tried to parse, but failed.
469 neg_attempts: Vec<R>,
470 /// Max position in the query from which we've tried to parse some rule but failed.
471 attempt_pos: usize,
472 /// Current atomicity status. For more information see `Atomicity`.
473 atomicity: Atomicity,
474 /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
475 /// invocations).
476 stack: Stack<SpanOrLiteral<'i>>,
477 /// Used for setting max parser calls limit.
478 call_tracker: CallLimitTracker,
479 /// Together with tracking of `pos_attempts` and `attempt_pos`
480 /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
481 /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
482 ///
483 /// Note, that we may try to parse several rules on different positions. We want to track only
484 /// those rules, which attempt position is bigger, because we consider that it's nearer to the
485 /// query that user really wanted to pass.
486 ///
487 /// E.g. we have a query `create user "Bobby"` and two root rules:
488 /// * CreateUser = { "create" ~ "user" ~ Name }
489 /// * CreateTable = { "create" ~ "table" ~ Name }
490 /// * Name = { SOME_DEFINITION }
491 ///
492 /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
493 /// successfully parse "create" + "user" (and not "table").
494 parse_attempts: ParseAttempts<R>,
495}
496
497/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
498///
499/// # Examples
500///
501/// ```
502/// # use pest;
503/// let input = "";
504/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
505/// ```
506#[allow(clippy::perf)]
507pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
508where
509 F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
510{
511 let state = ParserState::new(input);
512
513 match f(state) {
514 Ok(state) => {
515 let len = state.queue.len();
516 Ok(new(Rc::new(state.queue), input, None, 0, len))
517 }
518 Err(mut state) => {
519 let variant = if state.reached_call_limit() {
520 ErrorVariant::CustomError {
521 message: "call limit reached".to_owned(),
522 }
523 } else {
524 state.pos_attempts.sort();
525 state.pos_attempts.dedup();
526 state.neg_attempts.sort();
527 state.neg_attempts.dedup();
528 ErrorVariant::ParsingError {
529 positives: state.pos_attempts.clone(),
530 negatives: state.neg_attempts.clone(),
531 }
532 };
533
534 if state.parse_attempts.enabled {
535 Err(Error::new_from_pos_with_parsing_attempts(
536 variant,
537 Position::new_internal(input, state.attempt_pos),
538 state.parse_attempts.clone(),
539 ))
540 } else {
541 Err(Error::new_from_pos(
542 variant,
543 Position::new_internal(input, state.attempt_pos),
544 ))
545 }
546 }
547 }
548}
549
550impl<'i, R: RuleType> ParserState<'i, R> {
551 /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
552 /// will be passed from closure to closure based on the needs of the specified `Parser`.
553 ///
554 /// # Examples
555 ///
556 /// ```
557 /// # use pest;
558 /// let input = "";
559 /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
560 /// ```
561 pub fn new(input: &'i str) -> Box<Self> {
562 Box::new(ParserState {
563 position: Position::from_start(input),
564 queue: vec![],
565 lookahead: Lookahead::None,
566 pos_attempts: vec![],
567 neg_attempts: vec![],
568 attempt_pos: 0,
569 atomicity: Atomicity::NonAtomic,
570 stack: Stack::new(),
571 call_tracker: Default::default(),
572 parse_attempts: ParseAttempts::new(),
573 })
574 }
575
576 /// Get all parse attempts after process of parsing is finished.
577 pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
578 &self.parse_attempts
579 }
580
581 /// Returns a reference to the current `Position` of the `ParserState`.
582 ///
583 /// # Examples
584 ///
585 /// ```
586 /// # use pest;
587 /// # #[allow(non_camel_case_types)]
588 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
589 /// enum Rule {
590 /// ab
591 /// }
592 ///
593 /// let input = "ab";
594 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
595 /// let position = state.position();
596 /// assert_eq!(position.pos(), 0);
597 /// ```
598 pub fn position(&self) -> &Position<'i> {
599 &self.position
600 }
601
602 /// Returns the current atomicity of the `ParserState`.
603 ///
604 /// # Examples
605 ///
606 /// ```
607 /// # use pest;
608 /// # use pest::Atomicity;
609 /// # #[allow(non_camel_case_types)]
610 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
611 /// enum Rule {
612 /// ab
613 /// }
614 ///
615 /// let input = "ab";
616 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
617 /// let atomicity = state.atomicity();
618 /// assert_eq!(atomicity, Atomicity::NonAtomic);
619 /// ```
620 pub fn atomicity(&self) -> Atomicity {
621 self.atomicity
622 }
623
624 #[inline]
625 fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
626 if self.call_tracker.limit_reached() {
627 return Err(self);
628 }
629 self.call_tracker.increment_depth();
630 Ok(self)
631 }
632
633 #[inline]
634 fn reached_call_limit(&self) -> bool {
635 self.call_tracker.limit_reached()
636 }
637
638 /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
639 /// meant to match the rule.
640 ///
641 /// # Examples
642 ///
643 /// ```
644 /// # use pest;
645 /// # #[allow(non_camel_case_types)]
646 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
647 /// enum Rule {
648 /// a
649 /// }
650 ///
651 /// let input = "a";
652 /// let pairs: Vec<_> = pest::state(input, |state| {
653 /// state.rule(Rule::a, |s| Ok(s))
654 /// }).unwrap().collect();
655 ///
656 /// assert_eq!(pairs.len(), 1);
657 /// ```
658 #[inline]
659 pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
660 where
661 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
662 {
663 self = self.inc_call_check_limit()?;
664 // Position from which this `rule` starts parsing.
665 let actual_pos = self.position.pos();
666 // Remember index of the `self.queue` element that will be associated with this `rule`.
667 let index = self.queue.len();
668
669 let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
670 (self.pos_attempts.len(), self.neg_attempts.len())
671 } else {
672 // Attempts have not been cleared yet since the attempt_pos is older.
673 (0, 0)
674 };
675
676 if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
677 // Pair's position will only be known after running the closure.
678 self.queue.push(QueueableToken::Start {
679 end_token_index: 0,
680 input_pos: actual_pos,
681 });
682 }
683
684 // Remember attempts number before `f` call.
685 // In `track` using this variable we can say, how many attempts were added
686 // during children rules traversal.
687 let attempts = self.attempts_at(actual_pos);
688 // Number of call stacks present in `self.parse_attempts` before `f` call.
689 // We need to remember this number only in case there wasn't found any farther attempt.
690 // E.g. we are handling rule, on start position of which may be tested two
691 // children rules. At the moment we'll return from `f` call below,
692 // there will be two more children rules in `self.parse_attempts` that we'll
693 // consider to be the children of current `rule`.
694 let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
695 // Max parsing attempt position at the moment of `rule` handling.
696 // It case it's raised during children rules handling, it means
697 // we've made a parsing progress.
698 let remember_max_position = self.parse_attempts.max_position;
699
700 let result = f(self);
701
702 let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
703 if new_state.parse_attempts.max_position > remember_max_position {
704 // It means that one of `match_string` or e.g. `match_insensetive` function calls
705 // have already erased `self.parse_attempts.call_stacks` and that previously
706 // remembered values are not valid anymore.
707 remember_call_stacks_number = 0;
708 }
709 if !matches!(new_state.atomicity, Atomicity::Atomic) {
710 new_state
711 .parse_attempts
712 .try_add_new_stack_rule(rule, remember_call_stacks_number);
713 }
714 };
715
716 match result {
717 Ok(mut new_state) => {
718 if new_state.lookahead == Lookahead::Negative {
719 new_state.track(
720 rule,
721 actual_pos,
722 pos_attempts_index,
723 neg_attempts_index,
724 attempts,
725 );
726 }
727
728 if new_state.lookahead == Lookahead::None
729 && new_state.atomicity != Atomicity::Atomic
730 {
731 // Index of `QueueableToken::End` token added below
732 // that corresponds to previously added `QueueableToken::Start` token.
733 let new_index = new_state.queue.len();
734 match new_state.queue[index] {
735 QueueableToken::Start {
736 ref mut end_token_index,
737 ..
738 } => *end_token_index = new_index,
739 _ => unreachable!(),
740 };
741
742 let new_pos = new_state.position.pos();
743
744 new_state.queue.push(QueueableToken::End {
745 start_token_index: index,
746 rule,
747 tag: None,
748 input_pos: new_pos,
749 });
750 }
751
752 // Note, that we need to count positive parsing results too, because we can fail in
753 // optional rule call inside which may lie the farthest
754 // parsed token.
755 if new_state.parse_attempts.enabled {
756 try_add_rule_to_stack(&mut new_state);
757 }
758 Ok(new_state)
759 }
760 Err(mut new_state) => {
761 if new_state.lookahead != Lookahead::Negative {
762 new_state.track(
763 rule,
764 actual_pos,
765 pos_attempts_index,
766 neg_attempts_index,
767 attempts,
768 );
769 if new_state.parse_attempts.enabled {
770 try_add_rule_to_stack(&mut new_state);
771 }
772 }
773
774 if new_state.lookahead == Lookahead::None
775 && new_state.atomicity != Atomicity::Atomic
776 {
777 new_state.queue.truncate(index);
778 }
779
780 Err(new_state)
781 }
782 }
783 }
784
785 /// Tag current node
786 ///
787 /// # Examples
788 ///
789 /// Try to recognize the one specified in a set of characters
790 ///
791 /// ```
792 /// use pest::{state, ParseResult, ParserState, iterators::Pair};
793 /// #[allow(non_camel_case_types)]
794 /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
795 /// enum Rule {
796 /// character,
797 /// }
798 /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
799 /// state.sequence(|state| {
800 /// character(state)
801 /// .and_then(|state| character(state))
802 /// .and_then(|state| character(state))
803 /// .and_then(|state| state.tag_node("c"))
804 /// .and_then(|state| character(state))
805 /// })
806 /// }
807 /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
808 /// state.rule(Rule::character, |state| state.match_range('a'..'z'))
809 /// }
810 ///
811 /// let input = "abcd";
812 /// let pairs = state(input, mark_c).unwrap();
813 /// // find all node tag as `c`
814 /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
815 /// assert_eq!(find[0].as_str(), "c")
816 /// ```
817 #[inline]
818 pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
819 if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
820 *old = Some(tag)
821 }
822 Ok(self)
823 }
824
825 /// Get number of allowed rules attempts + prohibited rules attempts.
826 fn attempts_at(&self, pos: usize) -> usize {
827 if self.attempt_pos == pos {
828 self.pos_attempts.len() + self.neg_attempts.len()
829 } else {
830 0
831 }
832 }
833
834 fn track(
835 &mut self,
836 rule: R,
837 pos: usize,
838 pos_attempts_index: usize,
839 neg_attempts_index: usize,
840 prev_attempts: usize,
841 ) {
842 if self.atomicity == Atomicity::Atomic {
843 return;
844 }
845
846 // If nested rules made no progress, there is no use to report them; it's only useful to
847 // track the current rule, the exception being when only one attempt has been made during
848 // the children rules.
849 let curr_attempts = self.attempts_at(pos);
850 if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
851 return;
852 }
853
854 if pos == self.attempt_pos {
855 self.pos_attempts.truncate(pos_attempts_index);
856 self.neg_attempts.truncate(neg_attempts_index);
857 }
858
859 if pos > self.attempt_pos {
860 self.pos_attempts.clear();
861 self.neg_attempts.clear();
862 self.attempt_pos = pos;
863 }
864
865 let attempts = if self.lookahead != Lookahead::Negative {
866 &mut self.pos_attempts
867 } else {
868 &mut self.neg_attempts
869 };
870
871 if pos == self.attempt_pos {
872 attempts.push(rule);
873 }
874 }
875
876 /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
877 /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
878 /// `Box<ParserState>` otherwise.
879 ///
880 /// This method is useful to parse sequences that only match together which usually come in the
881 /// form of chained `Result`s with
882 /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
883 ///
884 ///
885 /// # Examples
886 ///
887 /// ```
888 /// # use pest;
889 /// # #[allow(non_camel_case_types)]
890 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
891 /// enum Rule {
892 /// a
893 /// }
894 ///
895 /// let input = "a";
896 /// let pairs: Vec<_> = pest::state(input, |state| {
897 /// state.sequence(|s| {
898 /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
899 /// s.match_string("b")
900 /// })
901 /// }).or_else(|s| {
902 /// Ok(s)
903 /// })
904 /// }).unwrap().collect();
905 ///
906 /// assert_eq!(pairs.len(), 0);
907 /// ```
908 #[inline]
909 pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
910 where
911 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
912 {
913 self = self.inc_call_check_limit()?;
914 let token_index = self.queue.len();
915 let initial_pos = self.position;
916
917 let result = f(self);
918
919 match result {
920 Ok(new_state) => Ok(new_state),
921 Err(mut new_state) => {
922 // Restore the initial position and truncate the token queue.
923 new_state.position = initial_pos;
924 new_state.queue.truncate(token_index);
925 Err(new_state)
926 }
927 }
928 }
929
930 /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
931 /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
932 ///
933 /// # Examples
934 ///
935 /// ```
936 /// # use pest;
937 /// # #[allow(non_camel_case_types)]
938 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
939 /// enum Rule {
940 /// ab
941 /// }
942 ///
943 /// let input = "aab";
944 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
945 /// let mut result = state.repeat(|s| {
946 /// s.match_string("a")
947 /// });
948 /// assert!(result.is_ok());
949 /// assert_eq!(result.unwrap().position().pos(), 2);
950 ///
951 /// state = pest::ParserState::new(input);
952 /// result = state.repeat(|s| {
953 /// s.match_string("b")
954 /// });
955 /// assert!(result.is_ok());
956 /// assert_eq!(result.unwrap().position().pos(), 0);
957 /// ```
958 #[inline]
959 pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
960 where
961 F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
962 {
963 self = self.inc_call_check_limit()?;
964 let mut result = f(self);
965
966 loop {
967 match result {
968 Ok(state) => result = f(state),
969 Err(state) => return Ok(state),
970 };
971 }
972 }
973
974 /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
975 /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
976 ///
977 /// # Examples
978 ///
979 /// ```
980 /// # use pest;
981 /// # #[allow(non_camel_case_types)]
982 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
983 /// enum Rule {
984 /// ab
985 /// }
986 ///
987 /// let input = "ab";
988 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
989 /// let result = state.optional(|s| {
990 /// s.match_string("ab")
991 /// });
992 /// assert!(result.is_ok());
993 ///
994 /// state = pest::ParserState::new(input);
995 /// let result = state.optional(|s| {
996 /// s.match_string("ac")
997 /// });
998 /// assert!(result.is_ok());
999 /// ```
1000 #[inline]
1001 pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1002 where
1003 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1004 {
1005 self = self.inc_call_check_limit()?;
1006 match f(self) {
1007 Ok(state) | Err(state) => Ok(state),
1008 }
1009 }
1010
1011 /// Generic function to handle result of char/string/range parsing
1012 /// in order to track (un)expected tokens.
1013 fn handle_token_parse_result(
1014 &mut self,
1015 start_position: usize,
1016 token: ParsingToken,
1017 parse_succeeded: bool,
1018 ) {
1019 // New position after tracked parsed element for case of `parse_succeded` is true.
1020 // Position of parsing failure otherwise.
1021 let current_pos = self.position.pos();
1022
1023 if parse_succeeded {
1024 if self.lookahead == Lookahead::Negative {
1025 self.parse_attempts
1026 .try_add_new_token(token, start_position, current_pos, true);
1027 } else if current_pos > self.parse_attempts.max_position {
1028 self.parse_attempts.nullify_expected_tokens(current_pos);
1029 }
1030 } else if self.lookahead != Lookahead::Negative {
1031 self.parse_attempts
1032 .try_add_new_token(token, start_position, current_pos, false);
1033 }
1034 }
1035
1036 /// Attempts to match a single character based on a filter function. Returns `Ok` with the
1037 /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
1038 /// otherwise.
1039 ///
1040 /// # Examples
1041 ///
1042 /// ```
1043 /// # use pest;
1044 /// # #[allow(non_camel_case_types)]
1045 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1046 /// enum Rule {}
1047 ///
1048 /// let input = "ab";
1049 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1050 /// let result = state.match_char_by(|c| c.is_ascii());
1051 /// assert!(result.is_ok());
1052 /// assert_eq!(result.unwrap().position().pos(), 1);
1053 ///
1054 /// let input = "❤";
1055 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1056 /// let result = state.match_char_by(|c| c.is_ascii());
1057 /// assert!(result.is_err());
1058 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1059 /// ```
1060 #[inline]
1061 pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1062 where
1063 F: FnOnce(char) -> bool,
1064 {
1065 let start_position = self.position.pos();
1066 let succeeded = self.position.match_char_by(f);
1067 if self.parse_attempts.enabled {
1068 let token = ParsingToken::BuiltInRule;
1069 self.handle_token_parse_result(start_position, token, succeeded);
1070 }
1071 if succeeded {
1072 Ok(self)
1073 } else {
1074 Err(self)
1075 }
1076 }
1077
1078 /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1079 /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1080 ///
1081 /// # Examples
1082 ///
1083 /// ```
1084 /// # use pest;
1085 /// # #[allow(non_camel_case_types)]
1086 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1087 /// enum Rule {}
1088 ///
1089 /// let input = "ab";
1090 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1091 /// let mut result = state.match_string("ab");
1092 /// assert!(result.is_ok());
1093 /// assert_eq!(result.unwrap().position().pos(), 2);
1094 ///
1095 /// state = pest::ParserState::new(input);
1096 /// result = state.match_string("ac");
1097 /// assert!(result.is_err());
1098 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1099 /// ```
1100 #[inline]
1101 pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1102 let start_position = self.position.pos();
1103 let succeeded = self.position.match_string(string);
1104 if self.parse_attempts.enabled {
1105 let token = ParsingToken::Sensitive {
1106 token: String::from(string),
1107 };
1108 self.handle_token_parse_result(start_position, token, succeeded);
1109 }
1110 if succeeded {
1111 Ok(self)
1112 } else {
1113 Err(self)
1114 }
1115 }
1116
1117 /// Pushes the given literal to the stack, and always returns `Ok(Box<ParserState>)`.
1118 ///
1119 /// # Examples
1120 ///
1121 /// ```
1122 /// # use pest;
1123 /// # #[allow(non_camel_case_types)]
1124 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1125 /// enum Rule {}
1126 ///
1127 /// let input = "ab";
1128 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1129 ///
1130 /// let mut result = state.stack_push_literal("a");
1131 /// assert!(result.is_ok());
1132 /// assert_eq!(result.as_ref().unwrap().position().pos(), 0);
1133 ///
1134 /// let mut result = result.unwrap().stack_pop();
1135 /// assert!(result.is_ok());
1136 /// assert_eq!(result.unwrap().position().pos(), 1);
1137 /// ```
1138 #[inline]
1139 pub fn stack_push_literal(
1140 mut self: Box<Self>,
1141 string: impl Into<Cow<'static, str>>,
1142 ) -> ParseResult<Box<Self>> {
1143 // convert string into a Literal, and then into a BorrowedOrRc
1144 self.stack
1145 .push(SpanOrLiteral::Literal(string.into().into()));
1146 Ok(self)
1147 }
1148
1149 /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1150 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1151 ///
1152 /// # Examples
1153 ///
1154 /// ```
1155 /// # use pest;
1156 /// # #[allow(non_camel_case_types)]
1157 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1158 /// enum Rule {}
1159 ///
1160 /// let input = "ab";
1161 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1162 /// let mut result = state.match_insensitive("AB");
1163 /// assert!(result.is_ok());
1164 /// assert_eq!(result.unwrap().position().pos(), 2);
1165 ///
1166 /// state = pest::ParserState::new(input);
1167 /// result = state.match_insensitive("AC");
1168 /// assert!(result.is_err());
1169 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1170 /// ```
1171 #[inline]
1172 pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1173 let start_position: usize = self.position().pos();
1174 let succeeded = self.position.match_insensitive(string);
1175 if self.parse_attempts.enabled {
1176 let token = ParsingToken::Insensitive {
1177 token: String::from(string),
1178 };
1179 self.handle_token_parse_result(start_position, token, succeeded);
1180 }
1181 if succeeded {
1182 Ok(self)
1183 } else {
1184 Err(self)
1185 }
1186 }
1187
1188 /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1189 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1190 ///
1191 /// # Caution
1192 /// The provided `range` is interpreted as inclusive.
1193 ///
1194 /// # Examples
1195 ///
1196 /// ```
1197 /// # use pest;
1198 /// # #[allow(non_camel_case_types)]
1199 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1200 /// enum Rule {}
1201 ///
1202 /// let input = "ab";
1203 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1204 /// let mut result = state.match_range('a'..'z');
1205 /// assert!(result.is_ok());
1206 /// assert_eq!(result.unwrap().position().pos(), 1);
1207 ///
1208 /// state = pest::ParserState::new(input);
1209 /// result = state.match_range('A'..'Z');
1210 /// assert!(result.is_err());
1211 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1212 /// ```
1213 #[inline]
1214 pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1215 let start_position = self.position().pos();
1216 let token = ParsingToken::Range {
1217 start: range.start,
1218 end: range.end,
1219 };
1220 let succeeded = self.position.match_range(range);
1221 if self.parse_attempts.enabled {
1222 self.handle_token_parse_result(start_position, token, succeeded);
1223 }
1224 if succeeded {
1225 Ok(self)
1226 } else {
1227 Err(self)
1228 }
1229 }
1230
1231 /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1232 /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1233 ///
1234 /// # Examples
1235 ///
1236 /// ```
1237 /// # use pest;
1238 /// # #[allow(non_camel_case_types)]
1239 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1240 /// enum Rule {}
1241 ///
1242 /// let input = "ab";
1243 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1244 /// let mut result = state.skip(1);
1245 /// assert!(result.is_ok());
1246 /// assert_eq!(result.unwrap().position().pos(), 1);
1247 ///
1248 /// state = pest::ParserState::new(input);
1249 /// result = state.skip(3);
1250 /// assert!(result.is_err());
1251 /// assert_eq!(result.unwrap_err().position().pos(), 0);
1252 /// ```
1253 #[inline]
1254 pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1255 if self.position.skip(n) {
1256 Ok(self)
1257 } else {
1258 Err(self)
1259 }
1260 }
1261
1262 /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1263 /// updated `Box<ParserState>` whether or not one of the strings is found.
1264 ///
1265 /// # Examples
1266 ///
1267 /// ```
1268 /// # use pest;
1269 /// # #[allow(non_camel_case_types)]
1270 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1271 /// enum Rule {}
1272 ///
1273 /// let input = "abcd";
1274 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1275 /// let mut result = state.skip_until(&["c", "d"]);
1276 /// assert!(result.is_ok());
1277 /// assert_eq!(result.unwrap().position().pos(), 2);
1278 /// ```
1279 #[inline]
1280 pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1281 self.position.skip_until(strings);
1282 Ok(self)
1283 }
1284
1285 /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1286 /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1287 ///
1288 /// # Examples
1289 ///
1290 /// ```
1291 /// # use pest;
1292 /// # #[allow(non_camel_case_types)]
1293 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1294 /// enum Rule {}
1295 ///
1296 /// let input = "ab";
1297 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1298 /// let mut result = state.start_of_input();
1299 /// assert!(result.is_ok());
1300 ///
1301 /// state = pest::ParserState::new(input);
1302 /// state = state.match_string("ab").unwrap();
1303 /// result = state.start_of_input();
1304 /// assert!(result.is_err());
1305 /// ```
1306 #[inline]
1307 pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1308 if self.position.at_start() {
1309 Ok(self)
1310 } else {
1311 Err(self)
1312 }
1313 }
1314
1315 /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1316 /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1317 ///
1318 /// # Examples
1319 ///
1320 /// ```
1321 /// # use pest;
1322 /// # #[allow(non_camel_case_types)]
1323 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1324 /// enum Rule {}
1325 ///
1326 /// let input = "ab";
1327 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1328 /// let mut result = state.end_of_input();
1329 /// assert!(result.is_err());
1330 ///
1331 /// state = pest::ParserState::new(input);
1332 /// state = state.match_string("ab").unwrap();
1333 /// result = state.end_of_input();
1334 /// assert!(result.is_ok());
1335 /// ```
1336 #[inline]
1337 pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1338 if self.position.at_end() {
1339 Ok(self)
1340 } else {
1341 Err(self)
1342 }
1343 }
1344
1345 /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1346 /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1347 /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1348 /// together, negating the `Result`.
1349 ///
1350 /// # Examples
1351 ///
1352 /// ```
1353 /// # use pest;
1354 /// # #[allow(non_camel_case_types)]
1355 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1356 /// enum Rule {
1357 /// a
1358 /// }
1359 ///
1360 /// let input = "a";
1361 /// let pairs: Vec<_> = pest::state(input, |state| {
1362 /// state.lookahead(true, |state| {
1363 /// state.rule(Rule::a, |s| Ok(s))
1364 /// })
1365 /// }).unwrap().collect();
1366 ///
1367 /// assert_eq!(pairs.len(), 0);
1368 /// ```
1369 #[inline]
1370 pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1371 where
1372 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1373 {
1374 self = self.inc_call_check_limit()?;
1375 let initial_lookahead = self.lookahead;
1376
1377 self.lookahead = if is_positive {
1378 match initial_lookahead {
1379 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1380 Lookahead::Negative => Lookahead::Negative,
1381 }
1382 } else {
1383 match initial_lookahead {
1384 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1385 Lookahead::Negative => Lookahead::Positive,
1386 }
1387 };
1388
1389 let initial_pos = self.position;
1390
1391 let result = f(self.checkpoint());
1392
1393 let result_state = match result {
1394 Ok(mut new_state) => {
1395 new_state.position = initial_pos;
1396 new_state.lookahead = initial_lookahead;
1397 Ok(new_state.restore())
1398 }
1399 Err(mut new_state) => {
1400 new_state.position = initial_pos;
1401 new_state.lookahead = initial_lookahead;
1402 Err(new_state.restore())
1403 }
1404 };
1405
1406 if is_positive {
1407 result_state
1408 } else {
1409 match result_state {
1410 Ok(state) => Err(state),
1411 Err(state) => Ok(state),
1412 }
1413 }
1414 }
1415
1416 /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1417 /// Used as wrapper over `rule` (or even another `atomic`) call.
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```
1422 /// # use pest::{self, Atomicity};
1423 /// # #[allow(non_camel_case_types)]
1424 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1425 /// enum Rule {
1426 /// a
1427 /// }
1428 ///
1429 /// let input = "a";
1430 /// let pairs: Vec<_> = pest::state(input, |state| {
1431 /// state.atomic(Atomicity::Atomic, |s| {
1432 /// s.rule(Rule::a, |s| Ok(s))
1433 /// })
1434 /// }).unwrap().collect();
1435 ///
1436 /// assert_eq!(pairs.len(), 0);
1437 /// ```
1438 #[inline]
1439 pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1440 where
1441 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1442 {
1443 self = self.inc_call_check_limit()?;
1444 // In case child parsing call is another `atomic` it will have its own atomicity status.
1445 let initial_atomicity = self.atomicity;
1446 // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1447 // E.g. we have the following rules:
1448 // * RootRule = @{ InnerRule }
1449 // * InnerRule = @{ ... }
1450 let should_toggle = self.atomicity != atomicity;
1451
1452 // Note that we take atomicity of the top rule and not of the leaf (inner).
1453 if should_toggle {
1454 self.atomicity = atomicity;
1455 }
1456
1457 let result = f(self);
1458
1459 match result {
1460 Ok(mut new_state) => {
1461 if should_toggle {
1462 new_state.atomicity = initial_atomicity;
1463 }
1464 Ok(new_state)
1465 }
1466 Err(mut new_state) => {
1467 if should_toggle {
1468 new_state.atomicity = initial_atomicity;
1469 }
1470 Err(new_state)
1471 }
1472 }
1473 }
1474
1475 /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1476 /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1477 /// called successfully, or `Err(Box<ParserState>)` otherwise.
1478 ///
1479 /// # Examples
1480 ///
1481 /// ```
1482 /// # use pest;
1483 /// # #[allow(non_camel_case_types)]
1484 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1485 /// enum Rule {}
1486 ///
1487 /// let input = "ab";
1488 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1489 /// let mut result = state.stack_push(|state| state.match_string("a"));
1490 /// assert!(result.is_ok());
1491 /// assert_eq!(result.unwrap().position().pos(), 1);
1492 /// ```
1493 #[inline]
1494 pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1495 where
1496 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1497 {
1498 self = self.inc_call_check_limit()?;
1499 let start = self.position;
1500
1501 let result = f(self);
1502
1503 match result {
1504 Ok(mut state) => {
1505 let end = state.position;
1506 state.stack.push(SpanOrLiteral::Span(start.span(&end)));
1507 Ok(state)
1508 }
1509 Err(state) => Err(state),
1510 }
1511 }
1512
1513 /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1514 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1515 ///
1516 /// # Examples
1517 ///
1518 /// ```
1519 /// # use pest;
1520 /// # #[allow(non_camel_case_types)]
1521 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1522 /// enum Rule {}
1523 ///
1524 /// let input = "aa";
1525 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1526 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1527 /// |state| state.stack_peek()
1528 /// );
1529 /// assert!(result.is_ok());
1530 /// assert_eq!(result.unwrap().position().pos(), 2);
1531 /// ```
1532 #[inline]
1533 pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1534 let string = self
1535 .stack
1536 .peek()
1537 .expect("peek was called on empty stack")
1538 .as_borrowed_or_rc();
1539 self.match_string(string.as_str())
1540 }
1541
1542 /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1543 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1544 ///
1545 /// # Examples
1546 ///
1547 /// ```
1548 /// # use pest;
1549 /// # #[allow(non_camel_case_types)]
1550 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1551 /// enum Rule {}
1552 ///
1553 /// let input = "aa";
1554 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1555 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1556 /// |state| state.stack_pop()
1557 /// );
1558 /// assert!(result.is_ok());
1559 /// assert_eq!(result.unwrap().position().pos(), 2);
1560 /// ```
1561 #[inline]
1562 pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1563 let string = self
1564 .stack
1565 .pop()
1566 .expect("pop was called on empty stack")
1567 .as_borrowed_or_rc();
1568 self.match_string(string.as_str())
1569 }
1570
1571 /// Matches part of the state of the stack.
1572 ///
1573 /// # Examples
1574 ///
1575 /// ```
1576 /// # use pest::{self, MatchDir};
1577 /// # #[allow(non_camel_case_types)]
1578 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1579 /// enum Rule {}
1580 ///
1581 /// let input = "abcd cd cb";
1582 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1583 /// let mut result = state
1584 /// .stack_push(|state| state.match_string("a"))
1585 /// .and_then(|state| state.stack_push(|state| state.match_string("b")))
1586 /// .and_then(|state| state.stack_push(|state| state.match_string("c")))
1587 /// .and_then(|state| state.stack_push(|state| state.match_string("d")))
1588 /// .and_then(|state| state.match_string(" "))
1589 /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1590 /// .and_then(|state| state.match_string(" "))
1591 /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1592 /// assert!(result.is_ok());
1593 /// assert_eq!(result.unwrap().position().pos(), 10);
1594 /// ```
1595 #[inline]
1596 pub fn stack_match_peek_slice(
1597 mut self: Box<Self>,
1598 start: i32,
1599 end: Option<i32>,
1600 match_dir: MatchDir,
1601 ) -> ParseResult<Box<Self>> {
1602 let range = match constrain_idxs(start, end, self.stack.len()) {
1603 Some(r) => r,
1604 None => return Err(self),
1605 };
1606 // return true if an empty sequence is requested
1607 if range.end <= range.start {
1608 return Ok(self);
1609 }
1610
1611 let mut position = self.position;
1612 let result = {
1613 let mut iter_b2t = self.stack[range].iter();
1614 let matcher =
1615 |span: &SpanOrLiteral<'_>| position.match_string(span.as_borrowed_or_rc().as_str());
1616 match match_dir {
1617 MatchDir::BottomToTop => iter_b2t.all(matcher),
1618 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1619 }
1620 };
1621 if result {
1622 self.position = position;
1623 Ok(self)
1624 } else {
1625 Err(self)
1626 }
1627 }
1628
1629 /// Matches the full state of the stack.
1630 ///
1631 /// # Examples
1632 ///
1633 /// ```
1634 /// # use pest;
1635 /// # #[allow(non_camel_case_types)]
1636 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1637 /// enum Rule {}
1638 ///
1639 /// let input = "abba";
1640 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1641 /// let mut result = state
1642 /// .stack_push(|state| state.match_string("a"))
1643 /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1644 /// .and_then(|state| state.stack_match_peek());
1645 /// assert!(result.is_ok());
1646 /// assert_eq!(result.unwrap().position().pos(), 4);
1647 /// ```
1648 #[inline]
1649 pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1650 self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1651 }
1652
1653 /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1654 ///
1655 /// # Examples
1656 ///
1657 /// ```
1658 /// /// # use pest;
1659 /// # #[allow(non_camel_case_types)]
1660 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1661 /// enum Rule {}
1662 ///
1663 /// let input = "aaaa";
1664 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1665 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1666 /// state.stack_push(|state| state.match_string("a"))
1667 /// }).and_then(|state| state.stack_match_peek());
1668 /// assert!(result.is_ok());
1669 /// assert_eq!(result.unwrap().position().pos(), 4);
1670 /// ```
1671 #[inline]
1672 pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1673 let mut position = self.position;
1674 let mut result = true;
1675 while let Some(span) = self.stack.pop() {
1676 result = position.match_string(span.as_borrowed_or_rc().as_str());
1677 if !result {
1678 break;
1679 }
1680 }
1681
1682 if result {
1683 self.position = position;
1684 Ok(self)
1685 } else {
1686 Err(self)
1687 }
1688 }
1689
1690 /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1691 /// `Err(Box<ParserState>)` otherwise.
1692 ///
1693 /// # Examples
1694 ///
1695 /// ```
1696 /// # use pest;
1697 /// # #[allow(non_camel_case_types)]
1698 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1699 /// enum Rule {}
1700 ///
1701 /// let input = "aa";
1702 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1703 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1704 /// |state| state.stack_drop()
1705 /// );
1706 /// assert!(result.is_ok());
1707 /// assert_eq!(result.unwrap().position().pos(), 1);
1708 /// ```
1709 #[inline]
1710 pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1711 match self.stack.pop() {
1712 Some(_) => Ok(self),
1713 None => Err(self),
1714 }
1715 }
1716
1717 /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1718 /// this method only restores the stack.
1719 ///
1720 /// # Examples
1721 ///
1722 /// ```
1723 /// # use pest;
1724 /// # #[allow(non_camel_case_types)]
1725 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1726 /// enum Rule {}
1727 ///
1728 /// let input = "ab";
1729 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1730 /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1731 /// state.match_string("a")).and_then(|state| state.match_string("a"))
1732 /// );
1733 ///
1734 /// assert!(result.is_err());
1735 ///
1736 /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1737 /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1738 /// assert!(catch_panic.is_err());
1739 /// ```
1740 #[inline]
1741 pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1742 where
1743 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1744 {
1745 match f(self.checkpoint()) {
1746 Ok(state) => Ok(state.checkpoint_ok()),
1747 Err(state) => Err(state.restore()),
1748 }
1749 }
1750
1751 // Mark the current state as a checkpoint and return the `Box`.
1752 #[inline]
1753 pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1754 self.stack.snapshot();
1755 self
1756 }
1757
1758 // The checkpoint was cleared successfully
1759 // so remove it without touching other stack state.
1760 #[inline]
1761 pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1762 self.stack.clear_snapshot();
1763 self
1764 }
1765
1766 // Restore the current state to the most recent checkpoint.
1767 #[inline]
1768 pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1769 self.stack.restore();
1770 self
1771 }
1772}
1773
1774/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
1775fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1776 let start_norm = normalize_index(start, len)?;
1777 let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1778 Some(start_norm..end_norm)
1779}
1780
1781/// `constrain_idxs` helper function.
1782/// Normalizes the index using its sequence’s length.
1783/// Returns `None` if the normalized index is OOB.
1784fn normalize_index(i: i32, len: usize) -> Option<usize> {
1785 if i > len as i32 {
1786 None
1787 } else if i >= 0 {
1788 Some(i as usize)
1789 } else {
1790 let real_i = len as i32 + i;
1791 if real_i >= 0 {
1792 Some(real_i as usize)
1793 } else {
1794 None
1795 }
1796 }
1797}
1798
1799#[cfg(test)]
1800mod test {
1801 use super::*;
1802
1803 #[test]
1804 fn normalize_index_pos() {
1805 assert_eq!(normalize_index(4, 6), Some(4));
1806 assert_eq!(normalize_index(5, 5), Some(5));
1807 assert_eq!(normalize_index(6, 3), None);
1808 }
1809
1810 #[test]
1811 fn normalize_index_neg() {
1812 assert_eq!(normalize_index(-4, 6), Some(2));
1813 assert_eq!(normalize_index(-5, 5), Some(0));
1814 assert_eq!(normalize_index(-6, 3), None);
1815 }
1816}