#![allow(clippy::mutable_key_type)]
use super::syntax_definition::*;
use super::scope::*;
use super::regex::Region;
use std::usize;
use std::collections::HashMap;
use std::i32;
use std::hash::BuildHasherDefault;
use fnv::FnvHasher;
use crate::parsing::syntax_set::{SyntaxSet, SyntaxReference};
use crate::parsing::syntax_definition::ContextId;
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum ParsingError {
#[error("Somehow main context was popped from the stack")]
MissingMainContext,
#[error("Missing context with ID '{0:?}'")]
MissingContext(ContextId),
#[error("Bad index to match_at: {0}")]
BadMatchIndex(usize),
#[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
UnresolvedContextReference(ContextReference),
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct ParseState {
stack: Vec<StateLevel>,
first_line: bool,
proto_starts: Vec<usize>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
struct StateLevel {
context: ContextId,
prototypes: Vec<ContextId>,
captures: Option<(Region, String)>,
}
#[derive(Debug)]
struct RegexMatch<'a> {
regions: Region,
context: &'a Context,
pat_index: usize,
from_with_prototype: bool,
would_loop: bool,
}
type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;
impl ParseState {
pub fn new(syntax: &SyntaxReference) -> ParseState {
let start_state = StateLevel {
context: syntax.context_ids()["__start"],
prototypes: Vec::new(),
captures: None,
};
ParseState {
stack: vec![start_state],
first_line: true,
proto_starts: Vec::new(),
}
}
pub fn parse_line(
&mut self,
line: &str,
syntax_set: &SyntaxSet,
) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
if self.stack.is_empty() {
return Err(ParsingError::MissingMainContext)
}
let mut match_start = 0;
let mut res = Vec::new();
if self.first_line {
let cur_level = &self.stack[self.stack.len() - 1];
let context = syntax_set.get_context(&cur_level.context)?;
if !context.meta_content_scope.is_empty() {
res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
}
self.first_line = false;
}
let mut regions = Region::new();
let fnv = BuildHasherDefault::<FnvHasher>::default();
let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
let mut non_consuming_push_at = (0, 0);
while self.parse_next_token(
line,
syntax_set,
&mut match_start,
&mut search_cache,
&mut regions,
&mut non_consuming_push_at,
&mut res
)? {}
Ok(res)
}
#[allow(clippy::too_many_arguments)]
fn parse_next_token(
&mut self,
line: &str,
syntax_set: &SyntaxSet,
start: &mut usize,
search_cache: &mut SearchCache,
regions: &mut Region,
non_consuming_push_at: &mut (usize, usize),
ops: &mut Vec<(usize, ScopeStackOp)>,
) -> Result<bool, ParsingError> {
let check_pop_loop = {
let (pos, stack_depth) = *non_consuming_push_at;
pos == *start && stack_depth == self.stack.len()
};
while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
self.proto_starts.pop();
}
let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop)?;
if let Some(reg_match) = best_match {
if reg_match.would_loop {
if let Some((i, _)) = line[*start..].char_indices().nth(1) {
*start += i;
return Ok(true);
} else {
return Ok(false);
}
}
let match_end = reg_match.regions.pos(0).unwrap().1;
let consuming = match_end > *start;
if !consuming {
let context = reg_match.context;
let match_pattern = context.match_at(reg_match.pat_index)?;
if let MatchOperation::Push(_) = match_pattern.operation {
*non_consuming_push_at = (match_end, self.stack.len() + 1);
}
}
*start = match_end;
if reg_match.from_with_prototype {
self.proto_starts.push(self.stack.len());
}
let level_context = {
let id = &self.stack[self.stack.len() - 1].context;
syntax_set.get_context(id)?
};
self.exec_pattern(line, ®_match, level_context, syntax_set, ops)?;
Ok(true)
} else {
Ok(false)
}
}
fn find_best_match<'a>(
&self,
line: &str,
start: usize,
syntax_set: &'a SyntaxSet,
search_cache: &mut SearchCache,
regions: &mut Region,
check_pop_loop: bool,
) -> Result<Option<RegexMatch<'a>>, ParsingError> {
let cur_level = &self.stack[self.stack.len() - 1];
let context = syntax_set.get_context(&cur_level.context)?;
let prototype = if let Some(ref p) = context.prototype {
Some(p)
} else {
None
};
let context_chain = {
let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| lvl.prototypes.iter().map(move |ctx| (true, ctx, lvl.captures.as_ref())));
let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
let cur_context = Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
with_prototypes.chain(cur_prototype).chain(cur_context)
};
let mut min_start = usize::MAX;
let mut best_match: Option<RegexMatch<'_>> = None;
let mut pop_would_loop = false;
for (from_with_proto, ctx, captures) in context_chain {
for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
let match_pat = pat_context.match_at(pat_index)?;
if let Some(match_region) = self.search(
line, start, match_pat, captures, search_cache, regions
) {
let (match_start, match_end) = match_region.pos(0).unwrap();
if match_start < min_start || (match_start == min_start && pop_would_loop) {
min_start = match_start;
let consuming = match_end > start;
pop_would_loop = check_pop_loop
&& !consuming
&& matches!(match_pat.operation, MatchOperation::Pop);
best_match = Some(RegexMatch {
regions: match_region,
context: pat_context,
pat_index,
from_with_prototype: from_with_proto,
would_loop: pop_would_loop,
});
if match_start == start && !pop_would_loop {
return Ok(best_match);
}
}
}
}
}
Ok(best_match)
}
fn search(&self,
line: &str,
start: usize,
match_pat: &MatchPattern,
captures: Option<&(Region, String)>,
search_cache: &mut SearchCache,
regions: &mut Region,
) -> Option<Region> {
let match_ptr = match_pat as *const MatchPattern;
if let Some(maybe_region) = search_cache.get(&match_ptr) {
if let Some(ref region) = *maybe_region {
let match_start = region.pos(0).unwrap().0;
if match_start >= start {
return Some(region.clone());
}
} else {
return None;
}
}
let (matched, can_cache) = match (match_pat.has_captures, captures) {
(true, Some(captures)) => {
let (region, s) = captures;
let regex = match_pat.regex_with_refs(region, s);
let matched = regex.search(line, start, line.len(), Some(regions));
(matched, false)
}
_ => {
let regex = match_pat.regex();
let matched = regex.search(line, start, line.len(), Some(regions));
(matched, true)
}
};
if matched {
let (match_start, match_end) = regions.pos(0).unwrap();
let does_something = match match_pat.operation {
MatchOperation::None => match_start != match_end,
_ => true,
};
if can_cache && does_something {
search_cache.insert(match_pat, Some(regions.clone()));
}
if does_something {
return Some(regions.clone());
}
} else if can_cache {
search_cache.insert(match_pat, None);
}
None
}
fn exec_pattern<'a>(
&mut self,
line: &str,
reg_match: &RegexMatch<'a>,
level_context: &'a Context,
syntax_set: &'a SyntaxSet,
ops: &mut Vec<(usize, ScopeStackOp)>,
) -> Result<bool, ParsingError> {
let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
let context = reg_match.context;
let pat = context.match_at(reg_match.pat_index)?;
self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops)?;
for s in &pat.scope {
ops.push((match_start, ScopeStackOp::Push(*s)));
}
if let Some(ref capture_map) = pat.captures {
let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
for &(cap_index, ref scopes) in capture_map.iter() {
if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
if cap_start == cap_end {
continue;
}
for scope in scopes.iter() {
map.push(((cap_start, -((cap_end - cap_start) as i32)),
ScopeStackOp::Push(*scope)));
}
map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
}
}
map.sort_by(|a, b| a.0.cmp(&b.0));
for ((index, _), op) in map.into_iter() {
ops.push((index, op));
}
}
if !pat.scope.is_empty() {
ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
}
self.push_meta_ops(false, match_end, level_context, &pat.operation, syntax_set, ops)?;
self.perform_op(line, ®_match.regions, pat, syntax_set)
}
fn push_meta_ops(
&self,
initial: bool,
index: usize,
cur_context: &Context,
match_op: &MatchOperation,
syntax_set: &SyntaxSet,
ops: &mut Vec<(usize, ScopeStackOp)>,
) -> Result<(), ParsingError>{
match *match_op {
MatchOperation::Pop => {
let v = if initial {
&cur_context.meta_content_scope
} else {
&cur_context.meta_scope
};
if !v.is_empty() {
ops.push((index, ScopeStackOp::Pop(v.len())));
}
if !initial && cur_context.clear_scopes.is_some() {
ops.push((index, ScopeStackOp::Restore))
}
},
MatchOperation::Push(ref context_refs) |
MatchOperation::Set(ref context_refs) => {
let is_set = matches!(*match_op, MatchOperation::Set(_));
if initial {
if is_set && cur_context.clear_scopes.is_some() {
ops.push((index, ScopeStackOp::Restore));
}
for r in context_refs.iter() {
let ctx = r.resolve(syntax_set)?;
if !is_set {
if let Some(clear_amount) = ctx.clear_scopes {
ops.push((index, ScopeStackOp::Clear(clear_amount)));
}
}
for scope in ctx.meta_scope.iter() {
ops.push((index, ScopeStackOp::Push(*scope)));
}
}
} else {
let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| {
let ctx = r.resolve(syntax_set).unwrap();
!ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set)
});
if repush {
let mut num_to_pop : usize = context_refs.iter().map(|r| {
let ctx = r.resolve(syntax_set).unwrap();
ctx.meta_scope.len()
}).sum();
if is_set {
num_to_pop += cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
}
if num_to_pop > 0 {
ops.push((index, ScopeStackOp::Pop(num_to_pop)));
}
for r in context_refs {
let ctx = r.resolve(syntax_set)?;
if is_set {
if let Some(clear_amount) = ctx.clear_scopes {
ops.push((index, ScopeStackOp::Clear(clear_amount)));
}
}
for scope in ctx.meta_scope.iter() {
ops.push((index, ScopeStackOp::Push(*scope)));
}
for scope in ctx.meta_content_scope.iter() {
ops.push((index, ScopeStackOp::Push(*scope)));
}
}
}
}
},
MatchOperation::None => (),
}
Ok(())
}
fn perform_op(
&mut self,
line: &str,
regions: &Region,
pat: &MatchPattern,
syntax_set: &SyntaxSet
) -> Result<bool, ParsingError> {
let (ctx_refs, old_proto_ids) = match pat.operation {
MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
MatchOperation::Set(ref ctx_refs) => {
(ctx_refs, self.stack.pop().map(|s| s.prototypes))
}
MatchOperation::Pop => {
self.stack.pop();
return Ok(true);
}
MatchOperation::None => return Ok(false),
};
for (i, r) in ctx_refs.iter().enumerate() {
let mut proto_ids = if i == 0 {
old_proto_ids.clone().unwrap_or_else(Vec::new)
} else {
Vec::new()
};
if i == ctx_refs.len() - 1 {
if let Some(ref p) = pat.with_prototype {
proto_ids.push(p.id()?);
}
}
let context_id = r.id()?;
let context = syntax_set.get_context(&context_id)?;
let captures = {
let mut uses_backrefs = context.uses_backrefs;
if !proto_ids.is_empty() {
uses_backrefs = uses_backrefs || proto_ids.iter().any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
}
if uses_backrefs {
Some((regions.clone(), line.to_owned()))
} else {
None
}
};
self.stack.push(StateLevel {
context: context_id,
prototypes: proto_ids,
captures,
});
}
Ok(true)
}
}
#[cfg(feature = "yaml-load")]
#[cfg(test)]
mod tests {
use super::*;
use crate::parsing::{SyntaxSet, SyntaxSetBuilder, Scope, ScopeStack};
use crate::parsing::ScopeStackOp::{Push, Pop, Clear, Restore};
use crate::util::debug_print_ops;
const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");
#[test]
fn can_parse_simple() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
ParseState::new(syntax)
};
let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
let test_ops1 = vec![
(0, Push(Scope::new("source.ruby.rails").unwrap())),
(0, Push(Scope::new("meta.module.ruby").unwrap())),
(0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
(6, Pop(2)),
(6, Push(Scope::new("meta.module.ruby").unwrap())),
(7, Pop(1)),
(7, Push(Scope::new("meta.module.ruby").unwrap())),
(7, Push(Scope::new("entity.name.module.ruby").unwrap())),
(7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
(10, Pop(1)),
(10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
];
assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);
let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
let test_ops2 = vec![
(0, Push(Scope::new("meta.function.ruby").unwrap())),
(0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
(3, Pop(2)),
(3, Push(Scope::new("meta.function.ruby").unwrap())),
(4, Push(Scope::new("entity.name.function.ruby").unwrap())),
(7, Pop(1))
];
assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
}
#[test]
fn can_parse_yaml() {
let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ps.find_syntax_by_name("YAML").unwrap();
ParseState::new(syntax)
};
assert_eq!(ops(&mut state, "key: value\n", &ps), vec![
(0, Push(Scope::new("source.yaml").unwrap())),
(0, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
(0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
(3, Pop(2)),
(3, Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())),
(4, Pop(1)),
(5, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
(10, Pop(1)),
]);
}
#[test]
fn can_parse_includes() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
ParseState::new(syntax)
};
let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);
let mut test_stack = ScopeStack::new();
test_stack.push(Scope::new("text.html.ruby").unwrap());
test_stack.push(Scope::new("text.html.basic").unwrap());
test_stack.push(Scope::new("source.js.embedded.html").unwrap());
test_stack.push(Scope::new("source.js").unwrap());
test_stack.push(Scope::new("string.quoted.single.js").unwrap());
test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());
let mut stack = ScopeStack::new();
for (_, op) in ops.iter() {
stack.apply(op).expect("#[cfg(test)]");
}
assert_eq!(stack, test_stack);
}
#[test]
fn can_parse_backrefs() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
ParseState::new(syntax)
};
assert_eq!(ops(&mut state, "lol = <<-SQL.strip", &ss), vec![
(0, Push(Scope::new("source.ruby.rails").unwrap())),
(4, Push(Scope::new("keyword.operator.assignment.ruby").unwrap())),
(5, Pop(1)),
(6, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
(6, Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())),
(12, Pop(1)),
(12, Pop(1)),
(12, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
(12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
(12, Clear(ClearAmount::TopN(2))),
(12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
(13, Pop(1)),
(18, Restore),
]);
assert_eq!(ops(&mut state, "wow", &ss), vec![]);
assert_eq!(ops(&mut state, "SQL", &ss), vec![
(0, Pop(1)),
(0, Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())),
(3, Pop(1)),
(3, Pop(1)),
]);
}
#[test]
fn can_parse_preprocessor_rules() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ss.find_syntax_by_name("C").unwrap();
ParseState::new(syntax)
};
assert_eq!(ops(&mut state, "#ifdef FOO", &ss), vec![
(0, Push(Scope::new("source.c").unwrap())),
(0, Push(Scope::new("meta.preprocessor.c").unwrap())),
(0, Push(Scope::new("keyword.control.import.c").unwrap())),
(6, Pop(1)),
(10, Pop(1)),
]);
assert_eq!(ops(&mut state, "{", &ss), vec![
(0, Push(Scope::new("meta.block.c").unwrap())),
(0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
(1, Pop(1)),
]);
assert_eq!(ops(&mut state, "#else", &ss), vec![
(0, Push(Scope::new("meta.preprocessor.c").unwrap())),
(0, Push(Scope::new("keyword.control.import.c").unwrap())),
(5, Pop(1)),
(5, Pop(1)),
]);
assert_eq!(ops(&mut state, "{", &ss), vec![
(0, Push(Scope::new("meta.block.c").unwrap())),
(0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
(1, Pop(1)),
]);
assert_eq!(ops(&mut state, "#endif", &ss), vec![
(0, Pop(1)),
(0, Push(Scope::new("meta.block.c").unwrap())),
(0, Push(Scope::new("meta.preprocessor.c").unwrap())),
(0, Push(Scope::new("keyword.control.import.c").unwrap())),
(6, Pop(2)),
(6, Pop(2)),
(6, Push(Scope::new("meta.block.c").unwrap())),
]);
assert_eq!(ops(&mut state, " foo;", &ss), vec![
(7, Push(Scope::new("punctuation.terminator.c").unwrap())),
(8, Pop(1)),
]);
assert_eq!(ops(&mut state, "}", &ss), vec![
(0, Push(Scope::new("punctuation.section.block.end.c").unwrap())),
(1, Pop(1)),
(1, Pop(1)),
]);
}
#[test]
fn can_parse_issue25() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let mut state = {
let syntax = ss.find_syntax_by_name("C").unwrap();
ParseState::new(syntax)
};
assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
}
#[test]
fn can_compare_parse_states() {
let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
let syntax = ss.find_syntax_by_name("Java").unwrap();
let mut state1 = ParseState::new(syntax);
let mut state2 = ParseState::new(syntax);
assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);
assert_eq!(state1, state2);
ops(&mut state1, "}", &ss);
assert_ne!(state1, state2);
}
#[test]
fn can_parse_non_nested_clear_scopes() {
let line = "'hello #simple_cleared_scopes_test world test \\n '";
let expect = [
"<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
"<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
"<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
];
expect_scope_stacks(line, &expect, TEST_SYNTAX);
}
#[test]
fn can_parse_non_nested_too_many_clear_scopes() {
let line = "'hello #too_many_cleared_scopes_test world test \\n '";
let expect = [
"<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
"<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
"<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
];
expect_scope_stacks(line, &expect, TEST_SYNTAX);
}
#[test]
fn can_parse_nested_clear_scopes() {
let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
let expect = [
"<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
"<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
"<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
"<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
];
expect_scope_stacks(line, &expect, TEST_SYNTAX);
}
#[test]
fn can_parse_infinite_loop() {
let line = "#infinite_loop_test 123";
let expect = [
"<source.test>, <constant.numeric.test>",
];
expect_scope_stacks(line, &expect, TEST_SYNTAX);
}
#[test]
fn can_parse_infinite_seeming_loop() {
let line = "#infinite_seeming_loop_test hello";
let expect = [
"<source.test>, <keyword.test>",
"<source.test>, <test>, <string.unquoted.test>",
"<source.test>, <test>, <keyword.control.test>",
];
expect_scope_stacks(line, &expect, TEST_SYNTAX);
}
#[test]
fn can_parse_prototype_that_pops_main() {
let syntax = r#"
name: test
scope: source.test
contexts:
prototype:
# This causes us to pop out of the main context. Sublime Text handles that
# by pushing main back automatically.
- match: (?=!)
pop: true
main:
- match: foo
scope: test.good
"#;
let line = "foo!";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_syntax_with_newline_in_character_class() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: foo[\n]
scope: foo.end
- match: foo
scope: foo.any
"#;
let line = "foo";
let expect = ["<source.test>, <foo.end>"];
expect_scope_stacks(line, &expect, syntax);
let line = "foofoofoo";
let expect = [
"<source.test>, <foo.any>",
"<source.test>, <foo.any>",
"<source.test>, <foo.end>",
];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_issue120() {
let syntax = SyntaxDefinition::load_from_str(
include_str!("../../testdata/embed_escape_test.sublime-syntax"),
false,
None
).unwrap();
let line1 = "\"abctest\" foobar";
let expect1 = [
"<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
"<meta.attribute-with-value.style.html>, <source.css>",
"<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
"<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
"<top-level.test>",
];
expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());
let line2 = ">abctest</style>foobar";
let expect2 = [
"<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
"<source.css.embedded.html>, <test.embedded>",
"<top-level.test>",
];
expect_scope_stacks_with_syntax(line2, &expect2, syntax);
}
#[test]
fn can_parse_non_consuming_pop_that_would_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
# This makes us go into "test" without consuming any characters
- match: (?=hello)
push: test
test:
# If we used this match, we'd go back to "main" without consuming anything,
# and then back into "test", infinitely looping. ST detects this at this
# point and ignores this match until at least one character matched.
- match: (?!world)
pop: true
- match: \w+
scope: test.matched
"#;
let line = "hello";
let expect = ["<source.test>, <test.matched>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_set_and_pop_that_would_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
# This makes us go into "a" without advancing
- match: (?=test)
push: a
a:
# This makes us go into "b" without advancing
- match: (?=t)
set: b
b:
# If we used this match, we'd go back to "main" without having advanced,
# which means we'd have an infinite loop like with the previous test.
# So even for a "set", we have to check if we're advancing or not.
- match: (?=t)
pop: true
- match: \w+
scope: test.matched
"#;
let line = "test";
let expect = ["<source.test>, <test.matched>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
# This makes us go into "a", but we consumed a character
- match: t
push: a
- match: \w+
scope: test.matched
a:
# This makes us go into "b" without consuming
- match: (?=e)
set: b
b:
# This match does not result in an infinite loop because we already consumed
# a character to get into "a", so it's ok to pop back into "main".
- match: (?=e)
pop: true
"#;
let line = "test";
let expect = ["<source.test>, <test.matched>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (?=hello)
push: a
- match: \w+
scope: test.matched
a:
- match: h
set: b
b:
- match: (?=e)
set: c
c:
# This is not an infinite loop because "a" consumed a character, so we can
# actually pop back into main and then match the rest of the input.
- match: (?=e)
pop: true
"#;
let line = "hello";
let expect = ["<source.test>, <test.matched>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
# This makes us go into "test" without consuming, even at the end of line
- match: ""
push: test
test:
- match: ""
pop: true
- match: \w+
scope: test.matched
"#;
let line = "hello";
let expect = ["<source.test>, <test.matched>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_empty_but_consuming_set_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (?=hello)
push: a
- match: ello
scope: test.good
a:
# This is an empty match, but it consumed a character (the "h")
- match: (?=e)
set: b
b:
# .. so it's ok to pop back to main from here
- match: ""
pop: true
- match: ello
scope: test.bad
"#;
let line = "hello";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_pop_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
# This is a non-consuming push, so "b" will need to check for a
# non-consuming pop
- match: (?=hello)
push: [b, a]
- match: ello
scope: test.good
a:
# This pop is ok, it consumed "h"
- match: (?=e)
pop: true
b:
# This is non-consuming, and we set to "c"
- match: (?=e)
set: c
c:
# It's ok to pop back to "main" here because we consumed a character in the
# meantime.
- match: ""
pop: true
- match: ello
scope: test.bad
"#;
let line = "hello";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (?=hello)
push: [b, a]
- match: ello
scope: test.good
a:
# This pop is ok, as we're not popping back to "main" yet (which would loop),
# we're popping to "b"
- match: ""
pop: true
- match: \w+
scope: test.bad
b:
- match: \w+
scope: test.good
"#;
let line = "hello";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: xxx
scope: test.good
- include: basic-identifiers
basic-identifiers:
- match: '\w+::'
scope: test.matched
push: no-type-names
no-type-names:
- include: basic-identifiers
- match: \w+
scope: test.matched.inside
# This is a tricky one because when this is the best match,
# we have two instances of "no-type-names" on the stack, so we're popping
# back from "no-type-names" to another "no-type-names".
- match: ''
pop: true
"#;
let line = "foo::bar::* xxx";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_non_consuming_pop_order() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (?=hello)
push: test
test:
# This matches first
- match: (?=e)
push: good
# But this (looping) match replaces it, because it's an earlier match
- match: (?=h)
pop: true
# And this should not replace it, as it's a later match (only matches at
# the same position can replace looping pops).
- match: (?=o)
push: bad
good:
- match: \w+
scope: test.good
bad:
- match: \w+
scope: test.bad
"#;
let line = "hello";
let expect = ["<source.test>, <test.good>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_prototype_with_embed() {
let syntax = r#"
name: Javadoc
scope: text.html.javadoc
contexts:
prototype:
- match: \*
scope: punctuation.definition.comment.javadoc
main:
- meta_include_prototype: false
- match: /\*\*
scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
embed: contents
embed_scope: comment.block.documentation.javadoc text.html.javadoc
escape: \*/
escape_captures:
0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc
contents:
- match: ''
"#;
let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
}
#[test]
fn can_parse_context_included_in_prototype_via_named_reference() {
let syntax = r#"
scope: source.test
contexts:
prototype:
- match: a
push: a
- match: b
scope: test.bad
main:
- match: unused
# This context is included in the prototype (see `push: a`).
# Because of that, ST doesn't apply the prototype to this context, so if
# we're in here the "b" shouldn't match.
a:
- match: a
scope: test.good
"#;
let stack_states = stack_states(parse("aa b", syntax));
assert_eq!(stack_states, vec![
"<source.test>",
"<source.test>, <test.good>",
"<source.test>",
], "Expected test.bad to not match");
}
#[test]
fn can_parse_with_prototype_set() {
let syntax = r#"%YAML 1.2
---
scope: source.test-set-with-proto
contexts:
main:
- match: a
scope: a
set: next1
with_prototype:
- match: '1'
scope: '1'
- match: '2'
scope: '2'
- match: '3'
scope: '3'
- match: '4'
scope: '4'
- match: '5'
scope: '5'
set: [next3, next2]
with_prototype:
- match: c
scope: cwith
next1:
- match: b
scope: b
set: next2
next2:
- match: c
scope: c
push: next3
- match: e
scope: e
pop: true
- match: f
scope: f
set: [next1, next2]
next3:
- match: d
scope: d
- match: (?=e)
pop: true
- match: c
scope: cwithout
"#;
expect_scope_stacks_with_syntax(
"a1b2c3d4e5",
&[
"<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>"
], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
);
expect_scope_stacks_with_syntax(
"5cfcecbedcdea",
&[
"<5>", "<cwith>", "<f>", "<e>", "<b>", "<d>", "<cwithout>", "<a>"
], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
);
}
#[test]
fn can_parse_issue176() {
let syntax = r#"
scope: source.dummy
contexts:
main:
- match: (test)(?=(foo))(f)
captures:
1: test
2: ignored
3: f
push:
- match: (oo)
captures:
1: keyword
"#;
let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
expect_scope_stacks_with_syntax("testfoo", &["<test>", "<f>", "<keyword>"], syntax);
}
#[test]
fn can_parse_two_with_prototypes_at_same_stack_level() {
let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
main:
- match: a
scope: a
push:
- match: b
scope: b
set:
- match: c
scope: c
with_prototype:
- match: '2'
scope: '2'
with_prototype:
- match: '1'
scope: '1'
"#;
let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
}
#[test]
fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
main:
- match: a
scope: a
push:
- match: b
scope: b
set: [context1, context2, context3]
with_prototype:
- match: '2'
scope: '2'
with_prototype:
- match: '1'
scope: '1'
- match: '1'
scope: digit1
- match: '2'
scope: digit2
context1:
- match: e
scope: e
pop: true
- match: '2'
scope: digit2
context2:
- match: d
scope: d
pop: true
- match: '2'
scope: digit2
context3:
- match: c
scope: c
pop: true
"#;
let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
}
#[test]
fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
main:
- match: (a)
scope: a
push:
- match: (b)
scope: b
set:
- match: c
scope: c
with_prototype:
- match: d
scope: d
with_prototype:
- match: \1
scope: '1'
pop: true
"#;
let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
}
#[test]
fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
let syntax_yamlstr = r#"
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
scope: source.example-wp
contexts:
main:
- match: (a)(-)
scope: a
push:
- match: (b)
scope: b
set:
- match: c
scope: c
with_prototype:
- match: d
scope: d
with_prototype:
- match: \2
scope: '2'
pop: true
- match: \1
scope: '1'
pop: true
"#;
let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
}
#[test]
fn can_parse_syntax_with_eol_and_newline() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: foo$\n
scope: foo.newline
"#;
let line = "foo";
let expect = ["<source.test>, <foo.newline>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_syntax_with_eol_only() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: foo$
scope: foo.newline
"#;
let line = "foo";
let expect = ["<source.test>, <foo.newline>"];
expect_scope_stacks(line, &expect, syntax);
}
#[test]
fn can_parse_syntax_with_beginning_of_line() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: \w+
scope: word
push:
# this should not match at the end of the line
- match: ^\s*$
pop: true
- match: =+
scope: heading
pop: true
- match: .*
scope: other
"#;
let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
let syntax_set = link(syntax_newlines);
let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
assert_eq!(ops(&mut state, "foo\n", &syntax_set), vec![
(0, Push(Scope::new("source.test").unwrap())),
(0, Push(Scope::new("word").unwrap())),
(3, Pop(1))
]);
assert_eq!(ops(&mut state, "===\n", &syntax_set), vec![
(0, Push(Scope::new("heading").unwrap())),
(3, Pop(1))
]);
assert_eq!(ops(&mut state, "bar\n", &syntax_set), vec![
(0, Push(Scope::new("word").unwrap())),
(3, Pop(1))
]);
assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
assert_eq!(ops(&mut state, "====\n", &syntax_set), vec![
(0, Push(Scope::new("other").unwrap())),
(4, Pop(1))
]);
}
#[test]
fn can_parse_syntax_with_comment_and_eol() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (//).*$
scope: comment.line.double-slash
"#;
let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
let syntax_set = link(syntax_newlines);
let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
assert_eq!(ops(&mut state, "// foo\n", &syntax_set), vec![
(0, Push(Scope::new("source.test").unwrap())),
(0, Push(Scope::new("comment.line.double-slash").unwrap())),
(6, Pop(1))
]);
}
#[test]
fn can_parse_text_with_unicode_to_skip() {
let syntax = r#"
name: test
scope: source.test
contexts:
main:
- match: (?=.)
push: test
test:
- match: (?=.)
pop: true
- match: x
scope: test.good
"#;
expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
}
#[test]
fn can_include_backrefs() {
let syntax = SyntaxDefinition::load_from_str(r#"
name: Backref Include Test
scope: source.backrefinc
contexts:
main:
- match: (a)
scope: a
push: context1
context1:
- include: context2
context2:
- match: \1
scope: b
pop: true
"#, true, None).unwrap();
expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
}
#[test]
fn can_include_nested_backrefs() {
let syntax = SyntaxDefinition::load_from_str(r#"
name: Backref Include Test
scope: source.backrefinc
contexts:
main:
- match: (a)
scope: a
push: context1
context1:
- include: context3
context3:
- include: context2
context2:
- match: \1
scope: b
pop: true
"#, true, None).unwrap();
expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
}
fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
println!("Parsing with newlines");
let line_with_newline = format!("{}\n", line_without_newline);
let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);
println!("Parsing without newlines");
let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
}
fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
let syntax_set = link(syntax);
let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
let ops = ops(&mut state, line, &syntax_set);
expect_scope_stacks_for_ops(ops, expect);
}
fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
let mut criteria_met = Vec::new();
for stack_str in stack_states(ops) {
println!("{}", stack_str);
for expectation in expect.iter() {
if stack_str.contains(expectation) {
criteria_met.push(expectation);
}
}
}
if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
panic!("expected scope stack '{}' missing", missing);
}
}
fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
let syntax_set = link(syntax);
let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
ops(&mut state, line, &syntax_set)
}
fn link(syntax: SyntaxDefinition) -> SyntaxSet {
let mut builder = SyntaxSetBuilder::new();
builder.add(syntax);
builder.build()
}
fn ops(state: &mut ParseState, line: &str, syntax_set: &SyntaxSet) -> Vec<(usize, ScopeStackOp)> {
let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
debug_print_ops(line, &ops);
ops
}
fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
let mut states = Vec::new();
let mut stack = ScopeStack::new();
for (_, op) in ops.iter() {
stack.apply(op).expect("#[cfg(test)]");
let scopes: Vec<String> = stack.as_slice().iter().map(|s| format!("{:?}", s)).collect();
let stack_str = scopes.join(", ");
states.push(stack_str);
}
states
}
}