syntect/parsing/
parser.rs

Help
1// Suppression of a false positive clippy lint. Upstream issue:
2//
3//   mutable_key_type false positive for raw pointers
4//   https://github.com/rust-lang/rust-clippy/issues/6745
5//
6// We use `*const MatchPattern` as key in our `SearchCache` hash map.
7// Clippy thinks this is a problem since `MatchPattern` has interior mutability
8// via `MatchPattern::regex::regex` which is an `AtomicLazyCell`.
9// But raw pointers are hashed via the pointer itself, not what is pointed to.
10// See https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/hash/mod.rs#L717-L725
11#![allow(clippy::mutable_key_type)]
12
13use super::syntax_definition::*;
14use super::scope::*;
15use super::regex::Region;
16use std::usize;
17use std::collections::HashMap;
18use std::i32;
19use std::hash::BuildHasherDefault;
20use fnv::FnvHasher;
21use crate::parsing::syntax_set::{SyntaxSet, SyntaxReference};
22use crate::parsing::syntax_definition::ContextId;
23
24/// Errors that can occur while parsing.
25#[derive(Debug, thiserror::Error)]
26#[non_exhaustive]
27pub enum ParsingError {
28    #[error("Somehow main context was popped from the stack")]
29    MissingMainContext,
30    /// A context is missing. Usually caused by a syntax referencing a another
31    /// syntax that is not known to syntect. See e.g. <https://github.com/trishume/syntect/issues/421>
32    #[error("Missing context with ID '{0:?}'")]
33    MissingContext(ContextId),
34    #[error("Bad index to match_at: {0}")]
35    BadMatchIndex(usize),
36    #[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
37    UnresolvedContextReference(ContextReference),
38}
39
40/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
41///
42/// If you are parsing an entire file you create one of these at the start and use it
43/// all the way to the end.
44///
45/// # Caching
46///
47/// One reason this is exposed is that since it implements `Clone` you can actually cache
48/// these (probably along with a [`HighlightState`]) and only re-start parsing from the point of a change.
49/// See the docs for [`HighlightState`] for more in-depth discussion of caching.
50///
51/// This state doesn't keep track of the current scope stack and parsing only returns changes to this stack
52/// so if you want to construct scope stacks you'll need to keep track of that as well.
53/// Note that [`HighlightState`] contains exactly this as a public field that you can use.
54///
55/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to do so eventually.
56/// It is not recommended that you try caching the first time you implement highlighting.
57///
58/// [`HighlightState`]: ../highlighting/struct.HighlightState.html
59#[derive(Debug, Clone, Eq, PartialEq)]
60pub struct ParseState {
61    stack: Vec<StateLevel>,
62    first_line: bool,
63    // See issue #101. Contains indices of frames pushed by `with_prototype`s.
64    // Doesn't look at `with_prototype`s below top of stack.
65    proto_starts: Vec<usize>,
66}
67
68#[derive(Debug, Clone, Eq, PartialEq)]
69struct StateLevel {
70    context: ContextId,
71    prototypes: Vec<ContextId>,
72    captures: Option<(Region, String)>,
73}
74
75#[derive(Debug)]
76struct RegexMatch<'a> {
77    regions: Region,
78    context: &'a Context,
79    pat_index: usize,
80    from_with_prototype: bool,
81    would_loop: bool,
82}
83
84/// Maps the pattern to the start index, which is -1 if not found.
85type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;
86
87// To understand the implementation of this, here's an introduction to how
88// Sublime Text syntax definitions work.
89//
90// Let's say we have the following made-up syntax definition:
91//
92//     contexts:
93//       main:
94//         - match: A
95//           scope: scope.a.first
96//           push: context-a
97//         - match: b
98//           scope: scope.b
99//         - match: \w+
100//           scope: scope.other
101//       context-a:
102//         - match: a+
103//           scope: scope.a.rest
104//         - match: (?=.)
105//           pop: true
106//
107// There are two contexts, `main` and `context-a`. Each context contains a list
108// of match rules with instructions for how to proceed.
109//
110// Let's say we have the input string " Aaaabxxx". We start at position 0 in
111// the string. We keep a stack of contexts, which at the beginning is just main.
112//
113// So we start by looking at the top of the context stack (main), and look at
114// the rules in order. The rule that wins is the first one that matches
115// "earliest" in the input string. In our example:
116//
117// 1. The first one matches "A". Note that matches are not anchored, so this
118//    matches at position 1.
119// 2. The second one matches "b", so position 5. The first rule is winning.
120// 3. The third one matches "\w+", so also position 1. But because the first
121//    rule comes first, it wins.
122//
123// So now we execute the winning rule. Whenever we matched some text, we assign
124// the scope (if there is one) to the matched text and advance our position to
125// after the matched text. The scope is "scope.a.first" and our new position is
126// after the "A", so 2. The "push" means that we should change our stack by
127// pushing `context-a` on top of it.
128//
129// In the next step, we repeat the above, but now with the rules in `context-a`.
130// The result is that we match "a+" and assign "scope.a.rest" to "aaa", and our
131// new position is now after the "aaa". Note that there was no instruction for
132// changing the stack, so we stay in that context.
133//
134// In the next step, the first rule doesn't match anymore, so we go to the next
135// rule where "(?=.)" matches. The instruction is to "pop", which means we
136// pop the top of our context stack, which means we're now back in main.
137//
138// This time in main, we match "b", and in the next step we match the rest with
139// "\w+", and we're done.
140//
141//
142// ## Preventing loops
143//
144// These are the basics of how matching works. Now, you saw that you can write
145// patterns that result in an empty match and don't change the position. These
146// are called non-consuming matches. The problem with them is that they could
147// result in infinite loops. Let's look at a syntax where that is the case:
148//
149//     contexts:
150//       main:
151//         - match: (?=.)
152//           push: test
153//       test:
154//         - match: \w+
155//           scope: word
156//         - match: (?=.)
157//           pop: true
158//
159// This is a bit silly, but it's a minimal example for explaining how matching
160// works in that case.
161//
162// Let's say we have the input string " hello". In `main`, our rule matches and
163// we go into `test` and stay at position 0. Now, the best match is the rule
164// with "pop". But if we used that rule, we'd pop back to `main` and would still
165// be at the same position we started at! So this would be an infinite loop,
166// which we don't want.
167//
168// So what Sublime Text does in case a looping rule "won":
169//
170// * If there's another rule that matches at the same position and does not
171//   result in a loop, use that instead.
172// * Otherwise, go to the next position and go through all the rules in the
173//   current context again. Note that it means that the "pop" could again be the
174//   winning rule, but that's ok as it wouldn't result in a loop anymore.
175//
176// So in our input string, we'd skip one character and try to match the rules
177// again. This time, the "\w+" wins because it comes first.
178
179impl ParseState {
180    /// Creates a state from a syntax definition, keeping its own reference-counted point to the
181    /// main context of the syntax
182    pub fn new(syntax: &SyntaxReference) -> ParseState {
183        let start_state = StateLevel {
184            context: syntax.context_ids()["__start"],
185            prototypes: Vec::new(),
186            captures: None,
187        };
188        ParseState {
189            stack: vec![start_state],
190            first_line: true,
191            proto_starts: Vec::new(),
192        }
193    }
194
195    /// Parses a single line of the file. Because of the way regex engines work you unfortunately
196    /// have to pass in a single line contiguous in memory. This can be bad for really long lines.
197    /// Sublime Text avoids this by just not highlighting lines that are too long (thousands of characters).
198    ///
199    /// For efficiency reasons this returns only the changes to the current scope at each point in the line.
200    /// You can use [`ScopeStack::apply`] on each operation in succession to get the stack for a given point.
201    /// Look at the code in `highlighter.rs` for an example of doing this for highlighting purposes.
202    ///
203    /// The returned vector is in order both by index to apply at (the `usize`) and also by order to apply them at a
204    /// given index (e.g popping old scopes before pushing new scopes).
205    ///
206    /// The [`SyntaxSet`] has to be the one that contained the syntax that was used to construct
207    /// this [`ParseState`], or an extended version of it. Otherwise the parsing would return the
208    /// wrong result or even panic. The reason for this is that contexts within the [`SyntaxSet`]
209    /// are referenced via indexes.
210    ///
211    /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
212    /// [`SyntaxSet`]: struct.SyntaxSet.html
213    /// [`ParseState`]: struct.ParseState.html
214    pub fn parse_line(
215        &mut self,
216        line: &str,
217        syntax_set: &SyntaxSet,
218    ) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
219        if self.stack.is_empty() {
220            return Err(ParsingError::MissingMainContext)
221        }
222        let mut match_start = 0;
223        let mut res = Vec::new();
224
225        if self.first_line {
226            let cur_level = &self.stack[self.stack.len() - 1];
227            let context = syntax_set.get_context(&cur_level.context)?;
228            if !context.meta_content_scope.is_empty() {
229                res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
230            }
231            self.first_line = false;
232        }
233
234        let mut regions = Region::new();
235        let fnv = BuildHasherDefault::<FnvHasher>::default();
236        let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
237        // Used for detecting loops with push/pop, see long comment above.
238        let mut non_consuming_push_at = (0, 0);
239
240        while self.parse_next_token(
241            line,
242            syntax_set,
243            &mut match_start,
244            &mut search_cache,
245            &mut regions,
246            &mut non_consuming_push_at,
247            &mut res
248        )? {}
249
250        Ok(res)
251    }
252
253    #[allow(clippy::too_many_arguments)]
254    fn parse_next_token(
255        &mut self,
256        line: &str,
257        syntax_set: &SyntaxSet,
258        start: &mut usize,
259        search_cache: &mut SearchCache,
260        regions: &mut Region,
261        non_consuming_push_at: &mut (usize, usize),
262        ops: &mut Vec<(usize, ScopeStackOp)>,
263    ) -> Result<bool, ParsingError> {
264        let check_pop_loop = {
265            let (pos, stack_depth) = *non_consuming_push_at;
266            pos == *start && stack_depth == self.stack.len()
267        };
268
269        // Trim proto_starts that are no longer valid
270        while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
271            self.proto_starts.pop();
272        }
273
274        let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop)?;
275
276        if let Some(reg_match) = best_match {
277            if reg_match.would_loop {
278                // A push that doesn't consume anything (a regex that resulted
279                // in an empty match at the current position) can not be
280                // followed by a non-consuming pop. Otherwise we're back where
281                // we started and would try the same sequence of matches again,
282                // resulting in an infinite loop. In this case, Sublime Text
283                // advances one character and tries again, thus preventing the
284                // loop.
285
286                // println!("pop_would_loop for match {:?}, start {}", reg_match, *start);
287
288                // nth(1) gets the next character if there is one. Need to do
289                // this instead of just += 1 because we have byte indices and
290                // unicode characters can be more than 1 byte.
291                if let Some((i, _)) = line[*start..].char_indices().nth(1) {
292                    *start += i;
293                    return Ok(true);
294                } else {
295                    // End of line, no character to advance and no point trying
296                    // any more patterns.
297                    return Ok(false);
298                }
299            }
300
301            let match_end = reg_match.regions.pos(0).unwrap().1;
302
303            let consuming = match_end > *start;
304            if !consuming {
305                // The match doesn't consume any characters. If this is a
306                // "push", remember the position and stack size so that we can
307                // check the next "pop" for loops. Otherwise leave the state,
308                // e.g. non-consuming "set" could also result in a loop.
309                let context = reg_match.context;
310                let match_pattern = context.match_at(reg_match.pat_index)?;
311                if let MatchOperation::Push(_) = match_pattern.operation {
312                    *non_consuming_push_at = (match_end, self.stack.len() + 1);
313                }
314            }
315
316            *start = match_end;
317
318            // ignore `with_prototype`s below this if a context is pushed
319            if reg_match.from_with_prototype {
320                // use current height, since we're before the actual push
321                self.proto_starts.push(self.stack.len());
322            }
323
324            let level_context = {
325                let id = &self.stack[self.stack.len() - 1].context;
326                syntax_set.get_context(id)?
327            };
328            self.exec_pattern(line, &reg_match, level_context, syntax_set, ops)?;
329
330            Ok(true)
331        } else {
332            Ok(false)
333        }
334    }
335
336    fn find_best_match<'a>(
337        &self,
338        line: &str,
339        start: usize,
340        syntax_set: &'a SyntaxSet,
341        search_cache: &mut SearchCache,
342        regions: &mut Region,
343        check_pop_loop: bool,
344    ) -> Result<Option<RegexMatch<'a>>, ParsingError> {
345        let cur_level = &self.stack[self.stack.len() - 1];
346        let context = syntax_set.get_context(&cur_level.context)?;
347        let prototype = if let Some(ref p) = context.prototype {
348            Some(p)
349        } else {
350            None
351        };
352
353        // Build an iterator for the contexts we want to visit in order
354        let context_chain = {
355            let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
356            // Sublime applies with_prototypes from bottom to top
357            let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| lvl.prototypes.iter().map(move |ctx| (true, ctx, lvl.captures.as_ref())));
358            let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
359            let cur_context = Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
360            with_prototypes.chain(cur_prototype).chain(cur_context)
361        };
362
363        // println!("{:#?}", cur_level);
364        // println!("token at {} on {}", start, line.trim_right());
365
366        let mut min_start = usize::MAX;
367        let mut best_match: Option<RegexMatch<'_>> = None;
368        let mut pop_would_loop = false;
369
370        for (from_with_proto, ctx, captures) in context_chain {
371            for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
372                let match_pat = pat_context.match_at(pat_index)?;
373
374                if let Some(match_region) = self.search(
375                    line, start, match_pat, captures, search_cache, regions
376                ) {
377                    let (match_start, match_end) = match_region.pos(0).unwrap();
378
379                    // println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);
380
381                    if match_start < min_start || (match_start == min_start && pop_would_loop) {
382                        // New match is earlier in text than old match,
383                        // or old match was a looping pop at the same
384                        // position.
385
386                        // println!("setting as current match");
387
388                        min_start = match_start;
389
390                        let consuming = match_end > start;
391                        pop_would_loop = check_pop_loop
392                            && !consuming
393                            && matches!(match_pat.operation, MatchOperation::Pop);
394
395                        best_match = Some(RegexMatch {
396                            regions: match_region,
397                            context: pat_context,
398                            pat_index,
399                            from_with_prototype: from_with_proto,
400                            would_loop: pop_would_loop,
401                        });
402
403                        if match_start == start && !pop_would_loop {
404                            // We're not gonna find a better match after this,
405                            // so as an optimization we can stop matching now.
406                            return Ok(best_match);
407                        }
408                    }
409                }
410            }
411        }
412        Ok(best_match)
413    }
414
415    fn search(&self,
416              line: &str,
417              start: usize,
418              match_pat: &MatchPattern,
419              captures: Option<&(Region, String)>,
420              search_cache: &mut SearchCache,
421              regions: &mut Region,
422    ) -> Option<Region> {
423        // println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
424        let match_ptr = match_pat as *const MatchPattern;
425
426        if let Some(maybe_region) = search_cache.get(&match_ptr) {
427            if let Some(ref region) = *maybe_region {
428                let match_start = region.pos(0).unwrap().0;
429                if match_start >= start {
430                    // Cached match is valid, return it. Otherwise do another
431                    // search below.
432                    return Some(region.clone());
433                }
434            } else {
435                // Didn't find a match earlier, so no point trying to match it again
436                return None;
437            }
438        }
439
440        let (matched, can_cache) = match (match_pat.has_captures, captures) {
441            (true, Some(captures)) => {
442                let (region, s) = captures;
443                let regex = match_pat.regex_with_refs(region, s);
444                let matched = regex.search(line, start, line.len(), Some(regions));
445                (matched, false)
446            }
447            _ => {
448                let regex = match_pat.regex();
449                let matched = regex.search(line, start, line.len(), Some(regions));
450                (matched, true)
451            }
452        };
453
454        if matched {
455            let (match_start, match_end) = regions.pos(0).unwrap();
456            // this is necessary to avoid infinite looping on dumb patterns
457            let does_something = match match_pat.operation {
458                MatchOperation::None => match_start != match_end,
459                _ => true,
460            };
461            if can_cache && does_something {
462                search_cache.insert(match_pat, Some(regions.clone()));
463            }
464            if does_something {
465                // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
466                return Some(regions.clone());
467            }
468        } else if can_cache {
469            search_cache.insert(match_pat, None);
470        }
471        None
472    }
473
474    /// Returns true if the stack was changed
475    fn exec_pattern<'a>(
476        &mut self,
477        line: &str,
478        reg_match: &RegexMatch<'a>,
479        level_context: &'a Context,
480        syntax_set: &'a SyntaxSet,
481        ops: &mut Vec<(usize, ScopeStackOp)>,
482    ) -> Result<bool, ParsingError> {
483        let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
484        let context = reg_match.context;
485        let pat = context.match_at(reg_match.pat_index)?;
486        // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation);
487
488        self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops)?;
489        for s in &pat.scope {
490            // println!("pushing {:?} at {}", s, match_start);
491            ops.push((match_start, ScopeStackOp::Push(*s)));
492        }
493        if let Some(ref capture_map) = pat.captures {
494            // captures could appear in an arbitrary order, have to produce ops in right order
495            // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first
496            // we don't have to handle a capture matching multiple times, Sublime doesn't
497            let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
498            for &(cap_index, ref scopes) in capture_map.iter() {
499                if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
500                    // marking up empty captures causes pops to be sorted wrong
501                    if cap_start == cap_end {
502                        continue;
503                    }
504                    // println!("capture {:?} at {:?}-{:?}", scopes[0], cap_start, cap_end);
505                    for scope in scopes.iter() {
506                        map.push(((cap_start, -((cap_end - cap_start) as i32)),
507                                  ScopeStackOp::Push(*scope)));
508                    }
509                    map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
510                }
511            }
512            map.sort_by(|a, b| a.0.cmp(&b.0));
513            for ((index, _), op) in map.into_iter() {
514                ops.push((index, op));
515            }
516        }
517        if !pat.scope.is_empty() {
518            // println!("popping at {}", match_end);
519            ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
520        }
521        self.push_meta_ops(false, match_end, level_context, &pat.operation, syntax_set, ops)?;
522
523        self.perform_op(line, &reg_match.regions, pat, syntax_set)
524    }
525
526    fn push_meta_ops(
527        &self,
528        initial: bool,
529        index: usize,
530        cur_context: &Context,
531        match_op: &MatchOperation,
532        syntax_set: &SyntaxSet,
533        ops: &mut Vec<(usize, ScopeStackOp)>,
534    ) -> Result<(), ParsingError>{
535        // println!("metas ops for {:?}, initial: {}",
536        //          match_op,
537        //          initial);
538        // println!("{:?}", cur_context.meta_scope);
539        match *match_op {
540            MatchOperation::Pop => {
541                let v = if initial {
542                    &cur_context.meta_content_scope
543                } else {
544                    &cur_context.meta_scope
545                };
546                if !v.is_empty() {
547                    ops.push((index, ScopeStackOp::Pop(v.len())));
548                }
549
550                // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied
551                if !initial && cur_context.clear_scopes.is_some() {
552                    ops.push((index, ScopeStackOp::Restore))
553                }
554            },
555            // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops
556            // - the meta_content_scope of the current context is applied to the matched thing, unlike pop
557            // - the clear_scopes are applied after the matched token, unlike push
558            // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope.
559            MatchOperation::Push(ref context_refs) |
560            MatchOperation::Set(ref context_refs) => {
561                let is_set = matches!(*match_op, MatchOperation::Set(_));
562                // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context
563                if initial {
564                    if is_set && cur_context.clear_scopes.is_some() {
565                        // cleared scopes from the old context are restored immediately
566                        ops.push((index, ScopeStackOp::Restore));
567                    }
568                    // add each context's meta scope
569                    for r in context_refs.iter() {
570                        let ctx = r.resolve(syntax_set)?;
571
572                        if !is_set {
573                            if let Some(clear_amount) = ctx.clear_scopes {
574                                ops.push((index, ScopeStackOp::Clear(clear_amount)));
575                            }
576                        }
577
578                        for scope in ctx.meta_scope.iter() {
579                            ops.push((index, ScopeStackOp::Push(*scope)));
580                        }
581                    }
582                } else {
583                    let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| {
584                        let ctx = r.resolve(syntax_set).unwrap();
585
586                        !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set)
587                    });
588                    if repush {
589                        // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order
590                        let mut num_to_pop : usize = context_refs.iter().map(|r| {
591                            let ctx = r.resolve(syntax_set).unwrap();
592                            ctx.meta_scope.len()
593                        }).sum();
594
595                        // also pop off the original context's meta scopes
596                        if is_set {
597                            num_to_pop += cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
598                        }
599
600                        // do all the popping as one operation
601                        if num_to_pop > 0 {
602                            ops.push((index, ScopeStackOp::Pop(num_to_pop)));
603                        }
604
605                        // now we push meta scope and meta context scope for each context pushed
606                        for r in context_refs {
607                            let ctx = r.resolve(syntax_set)?;
608
609                            // for some reason, contrary to my reading of the docs, set does this after the token
610                            if is_set {
611                                if let Some(clear_amount) = ctx.clear_scopes {
612                                    ops.push((index, ScopeStackOp::Clear(clear_amount)));
613                                }
614                            }
615
616                            for scope in ctx.meta_scope.iter() {
617                                ops.push((index, ScopeStackOp::Push(*scope)));
618                            }
619                            for scope in ctx.meta_content_scope.iter() {
620                                ops.push((index, ScopeStackOp::Push(*scope)));
621                            }
622                        }
623                    }
624                }
625            },
626            MatchOperation::None => (),
627        }
628
629        Ok(())
630    }
631
632    /// Returns true if the stack was changed
633    fn perform_op(
634        &mut self,
635        line: &str,
636        regions: &Region,
637        pat: &MatchPattern,
638        syntax_set: &SyntaxSet
639    ) -> Result<bool, ParsingError> {
640        let (ctx_refs, old_proto_ids) = match pat.operation {
641            MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
642            MatchOperation::Set(ref ctx_refs) => {
643                // a `with_prototype` stays active when the context is `set`
644                // until the context layer in the stack (where the `with_prototype`
645                // was initially applied) is popped off.
646                (ctx_refs, self.stack.pop().map(|s| s.prototypes))
647            }
648            MatchOperation::Pop => {
649                self.stack.pop();
650                return Ok(true);
651            }
652            MatchOperation::None => return Ok(false),
653        };
654        for (i, r) in ctx_refs.iter().enumerate() {
655            let mut proto_ids = if i == 0 {
656                // it is only necessary to preserve the old prototypes
657                // at the first stack frame pushed
658                old_proto_ids.clone().unwrap_or_else(Vec::new)
659            } else {
660                Vec::new()
661            };
662            if i == ctx_refs.len() - 1 {
663                // if a with_prototype was specified, and multiple contexts were pushed,
664                // then the with_prototype applies only to the last context pushed, i.e.
665                // top most on the stack after all the contexts are pushed - this is also
666                // referred to as the "target" of the push by sublimehq - see
667                // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info
668                if let Some(ref p) = pat.with_prototype {
669                    proto_ids.push(p.id()?);
670                }
671            }
672            let context_id = r.id()?;
673            let context = syntax_set.get_context(&context_id)?;
674            let captures = {
675                let mut uses_backrefs = context.uses_backrefs;
676                if !proto_ids.is_empty() {
677                    uses_backrefs = uses_backrefs || proto_ids.iter().any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
678                }
679                if uses_backrefs {
680                    Some((regions.clone(), line.to_owned()))
681                } else {
682                    None
683                }
684            };
685            self.stack.push(StateLevel {
686                context: context_id,
687                prototypes: proto_ids,
688                captures,
689            });
690        }
691        Ok(true)
692    }
693}
694
695#[cfg(feature = "yaml-load")]
696#[cfg(test)]
697mod tests {
698    use super::*;
699    use crate::parsing::{SyntaxSet, SyntaxSetBuilder, Scope, ScopeStack};
700    use crate::parsing::ScopeStackOp::{Push, Pop, Clear, Restore};
701    use crate::util::debug_print_ops;
702
703    const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");
704
705    #[test]
706    fn can_parse_simple() {
707        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
708        let mut state = {
709            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
710            ParseState::new(syntax)
711        };
712
713        let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
714        let test_ops1 = vec![
715            (0, Push(Scope::new("source.ruby.rails").unwrap())),
716            (0, Push(Scope::new("meta.module.ruby").unwrap())),
717            (0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
718            (6, Pop(2)),
719            (6, Push(Scope::new("meta.module.ruby").unwrap())),
720            (7, Pop(1)),
721            (7, Push(Scope::new("meta.module.ruby").unwrap())),
722            (7, Push(Scope::new("entity.name.module.ruby").unwrap())),
723            (7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
724            (10, Pop(1)),
725            (10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
726        ];
727        assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);
728
729        let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
730        let test_ops2 = vec![
731            (0, Push(Scope::new("meta.function.ruby").unwrap())),
732            (0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
733            (3, Pop(2)),
734            (3, Push(Scope::new("meta.function.ruby").unwrap())),
735            (4, Push(Scope::new("entity.name.function.ruby").unwrap())),
736            (7, Pop(1))
737        ];
738        assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
739    }
740
741    #[test]
742    fn can_parse_yaml() {
743        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
744        let mut state = {
745            let syntax = ps.find_syntax_by_name("YAML").unwrap();
746            ParseState::new(syntax)
747        };
748
749        assert_eq!(ops(&mut state, "key: value\n", &ps), vec![
750            (0, Push(Scope::new("source.yaml").unwrap())),
751            (0, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
752            (0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
753            (3, Pop(2)),
754            (3, Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())),
755            (4, Pop(1)),
756            (5, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
757            (10, Pop(1)),
758        ]);
759    }
760
761    #[test]
762    fn can_parse_includes() {
763        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
764        let mut state = {
765            let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
766            ParseState::new(syntax)
767        };
768
769        let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);
770
771        let mut test_stack = ScopeStack::new();
772        test_stack.push(Scope::new("text.html.ruby").unwrap());
773        test_stack.push(Scope::new("text.html.basic").unwrap());
774        test_stack.push(Scope::new("source.js.embedded.html").unwrap());
775        test_stack.push(Scope::new("source.js").unwrap());
776        test_stack.push(Scope::new("string.quoted.single.js").unwrap());
777        test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
778        test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());
779
780        let mut stack = ScopeStack::new();
781        for (_, op) in ops.iter() {
782            stack.apply(op).expect("#[cfg(test)]");
783        }
784        assert_eq!(stack, test_stack);
785    }
786
787    #[test]
788    fn can_parse_backrefs() {
789        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
790        let mut state = {
791            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
792            ParseState::new(syntax)
793        };
794
795        // For parsing HEREDOC, the "SQL" is captured at the beginning and then used in another
796        // regex with a backref, to match the end of the HEREDOC. Note that there can be code
797        // after the marker (`.strip`) here.
798        assert_eq!(ops(&mut state, "lol = <<-SQL.strip", &ss), vec![
799            (0, Push(Scope::new("source.ruby.rails").unwrap())),
800            (4, Push(Scope::new("keyword.operator.assignment.ruby").unwrap())),
801            (5, Pop(1)),
802            (6, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
803            (6, Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())),
804            (12, Pop(1)),
805            (12, Pop(1)),
806            (12, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
807            (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
808            (12, Clear(ClearAmount::TopN(2))),
809            (12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
810            (13, Pop(1)),
811            (18, Restore),
812        ]);
813
814        assert_eq!(ops(&mut state, "wow", &ss), vec![]);
815
816        assert_eq!(ops(&mut state, "SQL", &ss), vec![
817            (0, Pop(1)),
818            (0, Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())),
819            (3, Pop(1)),
820            (3, Pop(1)),
821        ]);
822    }
823
824    #[test]
825    fn can_parse_preprocessor_rules() {
826        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
827        let mut state = {
828            let syntax = ss.find_syntax_by_name("C").unwrap();
829            ParseState::new(syntax)
830        };
831
832        assert_eq!(ops(&mut state, "#ifdef FOO", &ss), vec![
833            (0, Push(Scope::new("source.c").unwrap())),
834            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
835            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
836            (6, Pop(1)),
837            (10, Pop(1)),
838        ]);
839        assert_eq!(ops(&mut state, "{", &ss), vec![
840            (0, Push(Scope::new("meta.block.c").unwrap())),
841            (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
842            (1, Pop(1)),
843        ]);
844        assert_eq!(ops(&mut state, "#else", &ss), vec![
845            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
846            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
847            (5, Pop(1)),
848            (5, Pop(1)),
849        ]);
850        assert_eq!(ops(&mut state, "{", &ss), vec![
851            (0, Push(Scope::new("meta.block.c").unwrap())),
852            (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
853            (1, Pop(1)),
854        ]);
855        assert_eq!(ops(&mut state, "#endif", &ss), vec![
856            (0, Pop(1)),
857            (0, Push(Scope::new("meta.block.c").unwrap())),
858            (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
859            (0, Push(Scope::new("keyword.control.import.c").unwrap())),
860            (6, Pop(2)),
861            (6, Pop(2)),
862            (6, Push(Scope::new("meta.block.c").unwrap())),
863        ]);
864        assert_eq!(ops(&mut state, "    foo;", &ss), vec![
865            (7, Push(Scope::new("punctuation.terminator.c").unwrap())),
866            (8, Pop(1)),
867        ]);
868        assert_eq!(ops(&mut state, "}", &ss), vec![
869            (0, Push(Scope::new("punctuation.section.block.end.c").unwrap())),
870            (1, Pop(1)),
871            (1, Pop(1)),
872        ]);
873    }
874
875    #[test]
876    fn can_parse_issue25() {
877        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
878        let mut state = {
879            let syntax = ss.find_syntax_by_name("C").unwrap();
880            ParseState::new(syntax)
881        };
882
883        // test fix for issue #25
884        assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
885    }
886
887    #[test]
888    fn can_compare_parse_states() {
889        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
890        let syntax = ss.find_syntax_by_name("Java").unwrap();
891        let mut state1 = ParseState::new(syntax);
892        let mut state2 = ParseState::new(syntax);
893
894        assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
895        assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);
896
897        assert_eq!(state1, state2);
898        ops(&mut state1, "}", &ss);
899        assert_ne!(state1, state2);
900    }
901
902    #[test]
903    fn can_parse_non_nested_clear_scopes() {
904        let line = "'hello #simple_cleared_scopes_test world test \\n '";
905        let expect = [
906            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
907            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
908            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
909        ];
910        expect_scope_stacks(line, &expect, TEST_SYNTAX);
911    }
912
913    #[test]
914    fn can_parse_non_nested_too_many_clear_scopes() {
915        let line = "'hello #too_many_cleared_scopes_test world test \\n '";
916        let expect = [
917            "<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
918            "<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
919            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
920        ];
921        expect_scope_stacks(line, &expect, TEST_SYNTAX);
922    }
923
924    #[test]
925    fn can_parse_nested_clear_scopes() {
926        let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
927        let expect = [
928            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
929            "<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
930            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
931            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
932        ];
933        expect_scope_stacks(line, &expect, TEST_SYNTAX);
934    }
935
936    #[test]
937    fn can_parse_infinite_loop() {
938        let line = "#infinite_loop_test 123";
939        let expect = [
940            "<source.test>, <constant.numeric.test>",
941        ];
942        expect_scope_stacks(line, &expect, TEST_SYNTAX);
943    }
944
945    #[test]
946    fn can_parse_infinite_seeming_loop() {
947        // See https://github.com/SublimeTextIssues/Core/issues/1190 for an
948        // explanation.
949        let line = "#infinite_seeming_loop_test hello";
950        let expect = [
951            "<source.test>, <keyword.test>",
952            "<source.test>, <test>, <string.unquoted.test>",
953            "<source.test>, <test>, <keyword.control.test>",
954        ];
955        expect_scope_stacks(line, &expect, TEST_SYNTAX);
956    }
957
958    #[test]
959    fn can_parse_prototype_that_pops_main() {
960        let syntax = r#"
961name: test
962scope: source.test
963contexts:
964  prototype:
965    # This causes us to pop out of the main context. Sublime Text handles that
966    # by pushing main back automatically.
967    - match: (?=!)
968      pop: true
969  main:
970    - match: foo
971      scope: test.good
972"#;
973
974        let line = "foo!";
975        let expect = ["<source.test>, <test.good>"];
976        expect_scope_stacks(line, &expect, syntax);
977    }
978
979    #[test]
980    fn can_parse_syntax_with_newline_in_character_class() {
981        let syntax = r#"
982name: test
983scope: source.test
984contexts:
985  main:
986    - match: foo[\n]
987      scope: foo.end
988    - match: foo
989      scope: foo.any
990"#;
991
992        let line = "foo";
993        let expect = ["<source.test>, <foo.end>"];
994        expect_scope_stacks(line, &expect, syntax);
995
996        let line = "foofoofoo";
997        let expect = [
998            "<source.test>, <foo.any>",
999            "<source.test>, <foo.any>",
1000            "<source.test>, <foo.end>",
1001        ];
1002        expect_scope_stacks(line, &expect, syntax);
1003    }
1004
1005    #[test]
1006    fn can_parse_issue120() {
1007        let syntax = SyntaxDefinition::load_from_str(
1008            include_str!("../../testdata/embed_escape_test.sublime-syntax"),
1009            false,
1010            None
1011        ).unwrap();
1012
1013        let line1 = "\"abctest\" foobar";
1014        let expect1 = [
1015            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
1016            "<meta.attribute-with-value.style.html>, <source.css>",
1017            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
1018            "<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
1019            "<top-level.test>",
1020        ];
1021
1022        expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());
1023
1024        let line2 = ">abctest</style>foobar";
1025        let expect2 = [
1026            "<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
1027            "<source.css.embedded.html>, <test.embedded>",
1028            "<top-level.test>",
1029        ];
1030        expect_scope_stacks_with_syntax(line2, &expect2, syntax);
1031    }
1032
1033    #[test]
1034    fn can_parse_non_consuming_pop_that_would_loop() {
1035        // See https://github.com/trishume/syntect/issues/127
1036        let syntax = r#"
1037name: test
1038scope: source.test
1039contexts:
1040  main:
1041    # This makes us go into "test" without consuming any characters
1042    - match: (?=hello)
1043      push: test
1044  test:
1045    # If we used this match, we'd go back to "main" without consuming anything,
1046    # and then back into "test", infinitely looping. ST detects this at this
1047    # point and ignores this match until at least one character matched.
1048    - match: (?!world)
1049      pop: true
1050    - match: \w+
1051      scope: test.matched
1052"#;
1053
1054        let line = "hello";
1055        let expect = ["<source.test>, <test.matched>"];
1056        expect_scope_stacks(line, &expect, syntax);
1057    }
1058
1059    #[test]
1060    fn can_parse_non_consuming_set_and_pop_that_would_loop() {
1061        let syntax = r#"
1062name: test
1063scope: source.test
1064contexts:
1065  main:
1066    # This makes us go into "a" without advancing
1067    - match: (?=test)
1068      push: a
1069  a:
1070    # This makes us go into "b" without advancing
1071    - match: (?=t)
1072      set: b
1073  b:
1074    # If we used this match, we'd go back to "main" without having advanced,
1075    # which means we'd have an infinite loop like with the previous test.
1076    # So even for a "set", we have to check if we're advancing or not.
1077    - match: (?=t)
1078      pop: true
1079    - match: \w+
1080      scope: test.matched
1081"#;
1082
1083        let line = "test";
1084        let expect = ["<source.test>, <test.matched>"];
1085        expect_scope_stacks(line, &expect, syntax);
1086    }
1087
1088    #[test]
1089    fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
1090        let syntax = r#"
1091name: test
1092scope: source.test
1093contexts:
1094  main:
1095    # This makes us go into "a", but we consumed a character
1096    - match: t
1097      push: a
1098    - match: \w+
1099      scope: test.matched
1100  a:
1101    # This makes us go into "b" without consuming
1102    - match: (?=e)
1103      set: b
1104  b:
1105    # This match does not result in an infinite loop because we already consumed
1106    # a character to get into "a", so it's ok to pop back into "main".
1107    - match: (?=e)
1108      pop: true
1109"#;
1110
1111        let line = "test";
1112        let expect = ["<source.test>, <test.matched>"];
1113        expect_scope_stacks(line, &expect, syntax);
1114    }
1115
1116    #[test]
1117    fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
1118        let syntax = r#"
1119name: test
1120scope: source.test
1121contexts:
1122  main:
1123    - match: (?=hello)
1124      push: a
1125    - match: \w+
1126      scope: test.matched
1127  a:
1128    - match: h
1129      set: b
1130  b:
1131    - match: (?=e)
1132      set: c
1133  c:
1134    # This is not an infinite loop because "a" consumed a character, so we can
1135    # actually pop back into main and then match the rest of the input.
1136    - match: (?=e)
1137      pop: true
1138"#;
1139
1140        let line = "hello";
1141        let expect = ["<source.test>, <test.matched>"];
1142        expect_scope_stacks(line, &expect, syntax);
1143    }
1144
1145    #[test]
1146    fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
1147        let syntax = r#"
1148name: test
1149scope: source.test
1150contexts:
1151  main:
1152    # This makes us go into "test" without consuming, even at the end of line
1153    - match: ""
1154      push: test
1155  test:
1156    - match: ""
1157      pop: true
1158    - match: \w+
1159      scope: test.matched
1160"#;
1161
1162        let line = "hello";
1163        let expect = ["<source.test>, <test.matched>"];
1164        expect_scope_stacks(line, &expect, syntax);
1165    }
1166
1167    #[test]
1168    fn can_parse_empty_but_consuming_set_that_does_not_loop() {
1169        let syntax = r#"
1170name: test
1171scope: source.test
1172contexts:
1173  main:
1174    - match: (?=hello)
1175      push: a
1176    - match: ello
1177      scope: test.good
1178  a:
1179    # This is an empty match, but it consumed a character (the "h")
1180    - match: (?=e)
1181      set: b
1182  b:
1183    # .. so it's ok to pop back to main from here
1184    - match: ""
1185      pop: true
1186    - match: ello
1187      scope: test.bad
1188"#;
1189
1190        let line = "hello";
1191        let expect = ["<source.test>, <test.good>"];
1192        expect_scope_stacks(line, &expect, syntax);
1193    }
1194
1195    #[test]
1196    fn can_parse_non_consuming_pop_that_does_not_loop() {
1197        let syntax = r#"
1198name: test
1199scope: source.test
1200contexts:
1201  main:
1202    # This is a non-consuming push, so "b" will need to check for a
1203    # non-consuming pop
1204    - match: (?=hello)
1205      push: [b, a]
1206    - match: ello
1207      scope: test.good
1208  a:
1209    # This pop is ok, it consumed "h"
1210    - match: (?=e)
1211      pop: true
1212  b:
1213    # This is non-consuming, and we set to "c"
1214    - match: (?=e)
1215      set: c
1216  c:
1217    # It's ok to pop back to "main" here because we consumed a character in the
1218    # meantime.
1219    - match: ""
1220      pop: true
1221    - match: ello
1222      scope: test.bad
1223"#;
1224
1225        let line = "hello";
1226        let expect = ["<source.test>, <test.good>"];
1227        expect_scope_stacks(line, &expect, syntax);
1228    }
1229
1230    #[test]
1231    fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
1232        let syntax = r#"
1233name: test
1234scope: source.test
1235contexts:
1236  main:
1237    - match: (?=hello)
1238      push: [b, a]
1239    - match: ello
1240      scope: test.good
1241  a:
1242    # This pop is ok, as we're not popping back to "main" yet (which would loop),
1243    # we're popping to "b"
1244    - match: ""
1245      pop: true
1246    - match: \w+
1247      scope: test.bad
1248  b:
1249    - match: \w+
1250      scope: test.good
1251"#;
1252
1253        let line = "hello";
1254        let expect = ["<source.test>, <test.good>"];
1255        expect_scope_stacks(line, &expect, syntax);
1256    }
1257
1258    #[test]
1259    fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
1260        let syntax = r#"
1261name: test
1262scope: source.test
1263contexts:
1264  main:
1265    - match: xxx
1266      scope: test.good
1267    - include: basic-identifiers
1268
1269  basic-identifiers:
1270    - match: '\w+::'
1271      scope: test.matched
1272      push: no-type-names
1273
1274  no-type-names:
1275      - include: basic-identifiers
1276      - match: \w+
1277        scope: test.matched.inside
1278      # This is a tricky one because when this is the best match,
1279      # we have two instances of "no-type-names" on the stack, so we're popping
1280      # back from "no-type-names" to another "no-type-names".
1281      - match: ''
1282        pop: true
1283"#;
1284
1285        let line = "foo::bar::* xxx";
1286        let expect = ["<source.test>, <test.good>"];
1287        expect_scope_stacks(line, &expect, syntax);
1288    }
1289
1290    #[test]
1291    fn can_parse_non_consuming_pop_order() {
1292        let syntax = r#"
1293name: test
1294scope: source.test
1295contexts:
1296  main:
1297    - match: (?=hello)
1298      push: test
1299  test:
1300    # This matches first
1301    - match: (?=e)
1302      push: good
1303    # But this (looping) match replaces it, because it's an earlier match
1304    - match: (?=h)
1305      pop: true
1306    # And this should not replace it, as it's a later match (only matches at
1307    # the same position can replace looping pops).
1308    - match: (?=o)
1309      push: bad
1310  good:
1311    - match: \w+
1312      scope: test.good
1313  bad:
1314    - match: \w+
1315      scope: test.bad
1316"#;
1317
1318        let line = "hello";
1319        let expect = ["<source.test>, <test.good>"];
1320        expect_scope_stacks(line, &expect, syntax);
1321    }
1322
1323    #[test]
1324    fn can_parse_prototype_with_embed() {
1325        let syntax = r#"
1326name: Javadoc
1327scope: text.html.javadoc
1328contexts:
1329  prototype:
1330    - match: \*
1331      scope: punctuation.definition.comment.javadoc
1332
1333  main:
1334    - meta_include_prototype: false
1335    - match: /\*\*
1336      scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
1337      embed: contents
1338      embed_scope: comment.block.documentation.javadoc text.html.javadoc
1339      escape: \*/
1340      escape_captures:
1341        0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc
1342
1343  contents:
1344    - match: ''
1345"#;
1346
1347        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1348        expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
1349    }
1350
1351    #[test]
1352    fn can_parse_context_included_in_prototype_via_named_reference() {
1353        let syntax = r#"
1354scope: source.test
1355contexts:
1356  prototype:
1357    - match: a
1358      push: a
1359    - match: b
1360      scope: test.bad
1361  main:
1362    - match: unused
1363  # This context is included in the prototype (see `push: a`).
1364  # Because of that, ST doesn't apply the prototype to this context, so if
1365  # we're in here the "b" shouldn't match.
1366  a:
1367    - match: a
1368      scope: test.good
1369"#;
1370
1371        let stack_states = stack_states(parse("aa b", syntax));
1372        assert_eq!(stack_states, vec![
1373            "<source.test>",
1374            "<source.test>, <test.good>",
1375            "<source.test>",
1376        ], "Expected test.bad to not match");
1377    }
1378
1379    #[test]
1380    fn can_parse_with_prototype_set() {
1381        let syntax = r#"%YAML 1.2
1382---
1383scope: source.test-set-with-proto
1384contexts:
1385  main:
1386    - match: a
1387      scope: a
1388      set: next1
1389      with_prototype:
1390        - match: '1'
1391          scope: '1'
1392        - match: '2'
1393          scope: '2'
1394        - match: '3'
1395          scope: '3'
1396        - match: '4'
1397          scope: '4'
1398    - match: '5'
1399      scope: '5'
1400      set: [next3, next2]
1401      with_prototype:
1402        - match: c
1403          scope: cwith
1404  next1:
1405    - match: b
1406      scope: b
1407      set: next2
1408  next2:
1409    - match: c
1410      scope: c
1411      push: next3
1412    - match: e
1413      scope: e
1414      pop: true
1415    - match: f
1416      scope: f
1417      set: [next1, next2]
1418  next3:
1419    - match: d
1420      scope: d
1421    - match: (?=e)
1422      pop: true
1423    - match: c
1424      scope: cwithout
1425"#;
1426
1427        expect_scope_stacks_with_syntax(
1428            "a1b2c3d4e5",
1429            &[
1430                "<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>"
1431            ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
1432        );
1433        expect_scope_stacks_with_syntax(
1434            "5cfcecbedcdea",
1435            &[
1436                "<5>", "<cwith>", "<f>", "<e>", "<b>", "<d>", "<cwithout>", "<a>"
1437            ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
1438        );
1439    }
1440
1441    #[test]
1442    fn can_parse_issue176() {
1443        let syntax = r#"
1444scope: source.dummy
1445contexts:
1446  main:
1447    - match: (test)(?=(foo))(f)
1448      captures:
1449        1: test
1450        2: ignored
1451        3: f
1452      push:
1453        - match: (oo)
1454          captures:
1455            1: keyword
1456"#;
1457
1458        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1459        expect_scope_stacks_with_syntax("testfoo", &["<test>", /*"<ignored>",*/ "<f>", "<keyword>"], syntax);
1460    }
1461
1462    #[test]
1463    fn can_parse_two_with_prototypes_at_same_stack_level() {
1464        let syntax_yamlstr = r#"
1465%YAML 1.2
1466---
1467# See http://www.sublimetext.com/docs/3/syntax.html
1468scope: source.example-wp
1469contexts:
1470  main:
1471    - match: a
1472      scope: a
1473      push:
1474        - match: b
1475          scope: b
1476          set:
1477            - match: c
1478              scope: c
1479          with_prototype:
1480            - match: '2'
1481              scope: '2'
1482      with_prototype:
1483        - match: '1'
1484          scope: '1'
1485"#;
1486
1487        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1488        expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
1489    }
1490
1491    #[test]
1492    fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
1493        let syntax_yamlstr = r#"
1494%YAML 1.2
1495---
1496# See http://www.sublimetext.com/docs/3/syntax.html
1497scope: source.example-wp
1498contexts:
1499  main:
1500    - match: a
1501      scope: a
1502      push:
1503        - match: b
1504          scope: b
1505          set: [context1, context2, context3]
1506          with_prototype:
1507            - match: '2'
1508              scope: '2'
1509      with_prototype:
1510        - match: '1'
1511          scope: '1'
1512    - match: '1'
1513      scope: digit1
1514    - match: '2'
1515      scope: digit2
1516  context1:
1517    - match: e
1518      scope: e
1519      pop: true
1520    - match: '2'
1521      scope: digit2
1522  context2:
1523    - match: d
1524      scope: d
1525      pop: true
1526    - match: '2'
1527      scope: digit2
1528  context3:
1529    - match: c
1530      scope: c
1531      pop: true
1532"#;
1533
1534        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1535        expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
1536        expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
1537        expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
1538        expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
1539    }
1540
1541    #[test]
1542    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
1543        let syntax_yamlstr = r#"
1544%YAML 1.2
1545---
1546# See http://www.sublimetext.com/docs/3/syntax.html
1547scope: source.example-wp
1548contexts:
1549  main:
1550    - match: (a)
1551      scope: a
1552      push:
1553        - match: (b)
1554          scope: b
1555          set:
1556            - match: c
1557              scope: c
1558          with_prototype:
1559            - match: d
1560              scope: d
1561      with_prototype:
1562        - match: \1
1563          scope: '1'
1564          pop: true
1565"#;
1566
1567        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1568        expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
1569        expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
1570    }
1571
1572    #[test]
1573    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
1574        let syntax_yamlstr = r#"
1575%YAML 1.2
1576---
1577# See http://www.sublimetext.com/docs/3/syntax.html
1578scope: source.example-wp
1579contexts:
1580  main:
1581    - match: (a)(-)
1582      scope: a
1583      push:
1584        - match: (b)
1585          scope: b
1586          set:
1587            - match: c
1588              scope: c
1589          with_prototype:
1590            - match: d
1591              scope: d
1592      with_prototype:
1593        - match: \2
1594          scope: '2'
1595          pop: true
1596        - match: \1
1597          scope: '1'
1598          pop: true
1599"#;
1600
1601        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1602        expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
1603        // it seems that when ST encounters a non existing pop backreference, it just pops back to the with_prototype's original parent context - i.e. cdb is unscoped
1604        // TODO: it would be useful to have syntest functionality available here for easier testing and clarity
1605        expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
1606    }
1607
1608    #[test]
1609    fn can_parse_syntax_with_eol_and_newline() {
1610        let syntax = r#"
1611name: test
1612scope: source.test
1613contexts:
1614  main:
1615    - match: foo$\n
1616      scope: foo.newline
1617"#;
1618
1619        let line = "foo";
1620        let expect = ["<source.test>, <foo.newline>"];
1621        expect_scope_stacks(line, &expect, syntax);
1622    }
1623
1624    #[test]
1625    fn can_parse_syntax_with_eol_only() {
1626        let syntax = r#"
1627name: test
1628scope: source.test
1629contexts:
1630  main:
1631    - match: foo$
1632      scope: foo.newline
1633"#;
1634
1635        let line = "foo";
1636        let expect = ["<source.test>, <foo.newline>"];
1637        expect_scope_stacks(line, &expect, syntax);
1638    }
1639
1640    #[test]
1641    fn can_parse_syntax_with_beginning_of_line() {
1642        let syntax = r#"
1643name: test
1644scope: source.test
1645contexts:
1646  main:
1647    - match: \w+
1648      scope: word
1649      push:
1650        # this should not match at the end of the line
1651        - match: ^\s*$
1652          pop: true
1653        - match: =+
1654          scope: heading
1655          pop: true
1656    - match: .*
1657      scope: other
1658"#;
1659
1660        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1661        let syntax_set = link(syntax_newlines);
1662
1663        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1664        assert_eq!(ops(&mut state, "foo\n", &syntax_set), vec![
1665            (0, Push(Scope::new("source.test").unwrap())),
1666            (0, Push(Scope::new("word").unwrap())),
1667            (3, Pop(1))
1668        ]);
1669        assert_eq!(ops(&mut state, "===\n", &syntax_set), vec![
1670            (0, Push(Scope::new("heading").unwrap())),
1671            (3, Pop(1))
1672        ]);
1673
1674        assert_eq!(ops(&mut state, "bar\n", &syntax_set), vec![
1675            (0, Push(Scope::new("word").unwrap())),
1676            (3, Pop(1))
1677        ]);
1678        // This should result in popping out of the context
1679        assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
1680        // So now this matches other
1681        assert_eq!(ops(&mut state, "====\n", &syntax_set), vec![
1682            (0, Push(Scope::new("other").unwrap())),
1683            (4, Pop(1))
1684        ]);
1685    }
1686
1687    #[test]
1688    fn can_parse_syntax_with_comment_and_eol() {
1689        let syntax = r#"
1690name: test
1691scope: source.test
1692contexts:
1693  main:
1694    - match: (//).*$
1695      scope: comment.line.double-slash
1696"#;
1697
1698        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1699        let syntax_set = link(syntax_newlines);
1700
1701        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1702        assert_eq!(ops(&mut state, "// foo\n", &syntax_set), vec![
1703            (0, Push(Scope::new("source.test").unwrap())),
1704            (0, Push(Scope::new("comment.line.double-slash").unwrap())),
1705            // 6 is important here, should not be 7. The pattern should *not* consume the newline,
1706            // but instead match before it. This is important for whitespace-sensitive syntaxes
1707            // where newlines terminate statements such as Scala.
1708            (6, Pop(1))
1709        ]);
1710    }
1711
1712    #[test]
1713    fn can_parse_text_with_unicode_to_skip() {
1714        let syntax = r#"
1715name: test
1716scope: source.test
1717contexts:
1718  main:
1719    - match: (?=.)
1720      push: test
1721  test:
1722    - match: (?=.)
1723      pop: true
1724    - match: x
1725      scope: test.good
1726"#;
1727
1728        // U+03C0 GREEK SMALL LETTER PI, 2 bytes in UTF-8
1729        expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
1730        // U+0800 SAMARITAN LETTER ALAF, 3 bytes in UTF-8
1731        expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
1732        // U+1F600 GRINNING FACE, 4 bytes in UTF-8
1733        expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
1734    }
1735
1736    #[test]
1737    fn can_include_backrefs() {
1738        let syntax = SyntaxDefinition::load_from_str(r#"
1739                name: Backref Include Test
1740                scope: source.backrefinc
1741                contexts:
1742                  main:
1743                    - match: (a)
1744                      scope: a
1745                      push: context1
1746                  context1:
1747                    - include: context2
1748                  context2:
1749                    - match: \1
1750                      scope: b
1751                      pop: true
1752                "#, true, None).unwrap();
1753
1754        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1755    }
1756    
1757    #[test]
1758    fn can_include_nested_backrefs() {
1759        let syntax = SyntaxDefinition::load_from_str(r#"
1760                name: Backref Include Test
1761                scope: source.backrefinc
1762                contexts:
1763                  main:
1764                    - match: (a)
1765                      scope: a
1766                      push: context1
1767                  context1:
1768                    - include: context3
1769                  context3:
1770                    - include: context2
1771                  context2:
1772                    - match: \1
1773                      scope: b
1774                      pop: true
1775                "#, true, None).unwrap();
1776
1777        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1778    }
1779
1780    fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
1781        println!("Parsing with newlines");
1782        let line_with_newline = format!("{}\n", line_without_newline);
1783        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1784        expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);
1785
1786        println!("Parsing without newlines");
1787        let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
1788        expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
1789    }
1790
1791    fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
1792        // check that each expected scope stack appears at least once while parsing the given test line
1793
1794        let syntax_set = link(syntax);
1795        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1796        let ops = ops(&mut state, line, &syntax_set);
1797        expect_scope_stacks_for_ops(ops, expect);
1798    }
1799
1800    fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
1801        let mut criteria_met = Vec::new();
1802        for stack_str in stack_states(ops) {
1803            println!("{}", stack_str);
1804            for expectation in expect.iter() {
1805                if stack_str.contains(expectation) {
1806                    criteria_met.push(expectation);
1807                }
1808            }
1809        }
1810        if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
1811            panic!("expected scope stack '{}' missing", missing);
1812        }
1813    }
1814
1815    fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
1816        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1817        let syntax_set = link(syntax);
1818
1819        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1820        ops(&mut state, line, &syntax_set)
1821    }
1822
1823    fn link(syntax: SyntaxDefinition) -> SyntaxSet {
1824        let mut builder = SyntaxSetBuilder::new();
1825        builder.add(syntax);
1826        builder.build()
1827    }
1828
1829    fn ops(state: &mut ParseState, line: &str, syntax_set: &SyntaxSet) -> Vec<(usize, ScopeStackOp)> {
1830        let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
1831        debug_print_ops(line, &ops);
1832        ops
1833    }
1834
1835    fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
1836        let mut states = Vec::new();
1837        let mut stack = ScopeStack::new();
1838        for (_, op) in ops.iter() {
1839            stack.apply(op).expect("#[cfg(test)]");
1840            let scopes: Vec<String> = stack.as_slice().iter().map(|s| format!("{:?}", s)).collect();
1841            let stack_str = scopes.join(", ");
1842            states.push(stack_str);
1843        }
1844        states
1845    }
1846}
syntect/parsing/parser.rs

syntect/parsing/
parser.rs