use super::regex::{Regex, Region};
use super::scope::*;
use super::syntax_definition::*;
use yaml_rust::{YamlLoader, Yaml, ScanError};
use yaml_rust::yaml::Hash;
use std::collections::HashMap;
use std::error::Error;
use std::path::Path;
use std::ops::DerefMut;
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum ParseSyntaxError {
#[error("Invalid YAML file syntax: {0}")]
InvalidYaml(#[from] ScanError),
#[error("The file must contain at least one YAML document")]
EmptyFile,
#[error("Missing mandatory key in YAML file: {0}")]
MissingMandatoryKey(&'static str),
#[error("Error while compiling regex '{0}': {1}")]
RegexCompileError(String, #[source] Box<dyn Error + Send + Sync + 'static>),
#[error("Invalid scope: {0}")]
InvalidScope(ParseScopeError),
#[error("Invalid file reference")]
BadFileRef,
#[error("Context 'main' is missing")]
MainMissing,
#[error("Type mismatch")]
TypeMismatch,
}
fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(map: &'a Hash,
key: &'static str,
f: F)
-> Result<R, ParseSyntaxError> {
map.get(&Yaml::String(key.to_owned()))
.ok_or(ParseSyntaxError::MissingMandatoryKey(key))
.and_then(|x| f(x).ok_or(ParseSyntaxError::TypeMismatch))
}
fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError> {
s.split_whitespace()
.map(|scope| repo.build(scope).map_err(ParseSyntaxError::InvalidScope))
.collect()
}
struct ParserState<'a> {
scope_repo: &'a mut ScopeRepository,
variables: HashMap<String, String>,
variable_regex: Regex,
backref_regex: Regex,
lines_include_newline: bool,
}
static START_CONTEXT: &str = "
__start:
- meta_include_prototype: false
- match: ''
push: __main
__main:
- include: main
";
impl SyntaxDefinition {
pub fn load_from_str(
s: &str,
lines_include_newline: bool,
fallback_name: Option<&str>,
) -> Result<SyntaxDefinition, ParseSyntaxError> {
let docs = match YamlLoader::load_from_str(s) {
Ok(x) => x,
Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
};
if docs.is_empty() {
return Err(ParseSyntaxError::EmptyFile);
}
let doc = &docs[0];
let mut scope_repo = SCOPE_REPO.lock().unwrap();
SyntaxDefinition::parse_top_level(doc, scope_repo.deref_mut(), lines_include_newline, fallback_name)
}
fn parse_top_level(doc: &Yaml,
scope_repo: &mut ScopeRepository,
lines_include_newline: bool,
fallback_name: Option<&str>)
-> Result<SyntaxDefinition, ParseSyntaxError> {
let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
let mut variables = HashMap::new();
if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) {
for (key, value) in map.iter() {
if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) {
variables.insert(key_str.to_owned(), val_str.to_owned());
}
}
}
let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?;
let top_level_scope = scope_repo.build(get_key(h, "scope", |x| x.as_str())?)
.map_err(ParseSyntaxError::InvalidScope)?;
let mut state = ParserState {
scope_repo,
variables,
variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}".into()),
backref_regex: Regex::new(r"\\\d".into()),
lines_include_newline,
};
let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?;
if !contexts.contains_key("main") {
return Err(ParseSyntaxError::MainMissing);
}
SyntaxDefinition::add_initial_contexts(
&mut contexts,
&mut state,
top_level_scope,
);
let mut file_extensions = Vec::new();
for extension_key in &["file_extensions", "hidden_file_extensions"] {
if let Ok(v) = get_key(h, extension_key, |x| x.as_vec()) {
file_extensions.extend(v.iter().filter_map(|y| y.as_str().map(|s| s.to_owned())))
}
}
let defn = SyntaxDefinition {
name: get_key(h, "name", |x| x.as_str()).unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed")).to_owned(),
scope: top_level_scope,
file_extensions,
first_line_match: get_key(h, "first_line_match", |x| x.as_str())
.ok()
.map(|s| s.to_owned()),
hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),
variables: state.variables,
contexts,
};
Ok(defn)
}
fn parse_contexts(map: &Hash,
state: &mut ParserState<'_>)
-> Result<HashMap<String, Context>, ParseSyntaxError> {
let mut contexts = HashMap::new();
for (key, value) in map.iter() {
if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) {
let is_prototype = name == "prototype";
let mut namer = ContextNamer::new(name);
SyntaxDefinition::parse_context(val_vec, state, &mut contexts, is_prototype, &mut namer)?;
}
}
Ok(contexts)
}
fn parse_context(vec: &[Yaml],
state: &mut ParserState<'_>,
contexts: &mut HashMap<String, Context>,
is_prototype: bool,
namer: &mut ContextNamer)
-> Result<String, ParseSyntaxError> {
let mut context = Context::new(!is_prototype);
let name = namer.next();
for y in vec.iter() {
let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
let mut is_special = false;
if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) {
context.meta_scope = str_to_scopes(x, state.scope_repo)?;
is_special = true;
}
if let Ok(x) = get_key(map, "meta_content_scope", |x| x.as_str()) {
context.meta_content_scope = str_to_scopes(x, state.scope_repo)?;
is_special = true;
}
if let Ok(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()) {
context.meta_include_prototype = x;
is_special = true;
}
if let Ok(true) = get_key(map, "clear_scopes", |x| x.as_bool()) {
context.clear_scopes = Some(ClearAmount::All);
is_special = true;
}
if let Ok(x) = get_key(map, "clear_scopes", |x| x.as_i64()) {
context.clear_scopes = Some(ClearAmount::TopN(x as usize));
is_special = true;
}
if !is_special {
if let Ok(x) = get_key(map, "include", Some) {
let reference = SyntaxDefinition::parse_reference(
x, state, contexts, namer, false)?;
context.patterns.push(Pattern::Include(reference));
} else {
let pattern = SyntaxDefinition::parse_match_pattern(
map, state, contexts, namer)?;
if pattern.has_captures {
context.uses_backrefs = true;
}
context.patterns.push(Pattern::Match(pattern));
}
}
}
contexts.insert(name.clone(), context);
Ok(name)
}
fn parse_reference(y: &Yaml,
state: &mut ParserState<'_>,
contexts: &mut HashMap<String, Context>,
namer: &mut ContextNamer,
with_escape: bool)
-> Result<ContextReference, ParseSyntaxError> {
if let Some(s) = y.as_str() {
let parts: Vec<&str> = s.split('#').collect();
let sub_context = if parts.len() > 1 {
Some(parts[1].to_owned())
} else {
None
};
if parts[0].starts_with("scope:") {
Ok(ContextReference::ByScope {
scope: state.scope_repo
.build(&parts[0][6..])
.map_err(ParseSyntaxError::InvalidScope)?,
sub_context,
with_escape,
})
} else if parts[0].ends_with(".sublime-syntax") {
let stem = Path::new(parts[0])
.file_stem()
.and_then(|x| x.to_str())
.ok_or(ParseSyntaxError::BadFileRef)?;
Ok(ContextReference::File {
name: stem.to_owned(),
sub_context,
with_escape,
})
} else {
Ok(ContextReference::Named(parts[0].to_owned()))
}
} else if let Some(v) = y.as_vec() {
let subname = SyntaxDefinition::parse_context(v, state, contexts, false, namer)?;
Ok(ContextReference::Inline(subname))
} else {
Err(ParseSyntaxError::TypeMismatch)
}
}
fn parse_match_pattern(map: &Hash,
state: &mut ParserState<'_>,
contexts: &mut HashMap<String, Context>,
namer: &mut ContextNamer)
-> Result<MatchPattern, ParseSyntaxError> {
let raw_regex = get_key(map, "match", |x| x.as_str())?;
let regex_str = Self::parse_regex(raw_regex, state)?;
let scope = get_key(map, "scope", |x| x.as_str())
.ok()
.map(|s| str_to_scopes(s, state.scope_repo))
.unwrap_or_else(|| Ok(vec![]))?;
let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) {
Some(Self::parse_captures(map, ®ex_str, state)?)
} else {
None
};
let mut has_captures = false;
let operation = if get_key(map, "pop", Some).is_ok() {
has_captures = state.backref_regex.search(®ex_str, 0, regex_str.len(), None);
MatchOperation::Pop
} else if let Ok(y) = get_key(map, "push", Some) {
MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
} else if let Ok(y) = get_key(map, "set", Some) {
MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
} else if let Ok(y) = get_key(map, "embed", Some) {
let mut embed_escape_context_yaml = vec!();
let mut commands = Hash::new();
commands.insert(Yaml::String("meta_include_prototype".to_string()), Yaml::Boolean(false));
embed_escape_context_yaml.push(Yaml::Hash(commands));
if let Ok(s) = get_key(map, "embed_scope", Some) {
commands = Hash::new();
commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone());
embed_escape_context_yaml.push(Yaml::Hash(commands));
}
if let Ok(v) = get_key(map, "escape", Some) {
let mut match_map = Hash::new();
match_map.insert(Yaml::String("match".to_string()), v.clone());
match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
if let Ok(y) = get_key(map, "escape_captures", Some) {
match_map.insert(Yaml::String("captures".to_string()), y.clone());
}
embed_escape_context_yaml.push(Yaml::Hash(match_map));
let escape_context = SyntaxDefinition::parse_context(
&embed_escape_context_yaml,
state,
contexts,
false,
namer,
)?;
MatchOperation::Push(vec![ContextReference::Inline(escape_context),
SyntaxDefinition::parse_reference(y, state, contexts, namer, true)?])
} else {
return Err(ParseSyntaxError::MissingMandatoryKey("escape"));
}
} else {
MatchOperation::None
};
let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) {
let subname = Self::parse_context(v, state, contexts, true, namer)?;
Some(ContextReference::Inline(subname))
} else if let Ok(v) = get_key(map, "escape", Some) {
let subname = namer.next();
let mut context = Context::new(false);
let mut match_map = Hash::new();
match_map.insert(Yaml::String("match".to_string()), Yaml::String(format!("(?={})", v.as_str().unwrap())));
match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
let pattern = SyntaxDefinition::parse_match_pattern(&match_map, state, contexts, namer)?;
if pattern.has_captures {
context.uses_backrefs = true;
}
context.patterns.push(Pattern::Match(pattern));
contexts.insert(subname.clone(), context);
Some(ContextReference::Inline(subname))
} else {
None
};
let pattern = MatchPattern::new(
has_captures,
regex_str,
scope,
captures,
operation,
with_prototype,
);
Ok(pattern)
}
fn parse_pushargs(y: &Yaml,
state: &mut ParserState<'_>,
contexts: &mut HashMap<String, Context>,
namer: &mut ContextNamer)
-> Result<Vec<ContextReference>, ParseSyntaxError> {
if y.as_vec().map_or(false, |v| !v.is_empty() && (v[0].as_str().is_some() || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))) {
y.as_vec()
.unwrap()
.iter()
.map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer, false))
.collect()
} else {
let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer, false)?;
Ok(vec![reference])
}
}
fn parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError> {
let regex = Self::resolve_variables(raw_regex, state);
let regex = replace_posix_char_classes(regex);
let regex = if state.lines_include_newline {
regex_for_newlines(regex)
} else {
regex_for_no_newlines(regex)
};
Self::try_compile_regex(®ex)?;
Ok(regex)
}
fn resolve_variables(raw_regex: &str, state: &ParserState<'_>) -> String {
let mut result = String::new();
let mut index = 0;
let mut region = Region::new();
while state.variable_regex.search(raw_regex, index, raw_regex.len(), Some(&mut region)) {
let (begin, end) = region.pos(0).unwrap();
result.push_str(&raw_regex[index..begin]);
let var_pos = region.pos(1).unwrap();
let var_name = &raw_regex[var_pos.0..var_pos.1];
let var_raw = state.variables.get(var_name).map(String::as_ref).unwrap_or("");
let var_resolved = Self::resolve_variables(var_raw, state);
result.push_str(&var_resolved);
index = end;
}
if index < raw_regex.len() {
result.push_str(&raw_regex[index..]);
}
result
}
fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
let regex_str = substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));
if let Some(error) = Regex::try_compile(®ex_str) {
Err(ParseSyntaxError::RegexCompileError(regex_str, error))
} else {
Ok(())
}
}
fn parse_captures(
map: &Hash,
regex_str: &str,
state: &mut ParserState<'_>,
) -> Result<CaptureMapping, ParseSyntaxError> {
let valid_indexes = get_consuming_capture_indexes(regex_str);
let mut captures = Vec::new();
for (key, value) in map.iter() {
if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) {
if valid_indexes.contains(&(key_int as usize)) {
captures.push((key_int as usize, str_to_scopes(val_str, state.scope_repo)?));
}
}
}
Ok(captures)
}
fn add_initial_contexts(
contexts: &mut HashMap<String, Context>,
state: &mut ParserState<'_>,
top_level_scope: Scope,
) {
let yaml_docs = YamlLoader::load_from_str(START_CONTEXT).unwrap();
let yaml = &yaml_docs[0];
let start_yaml : &[Yaml] = yaml["__start"].as_vec().unwrap();
SyntaxDefinition::parse_context(start_yaml, state, contexts, false, &mut ContextNamer::new("__start")).unwrap();
if let Some(start) = contexts.get_mut("__start") {
start.meta_content_scope = vec![top_level_scope];
}
let main_yaml : &[Yaml] = yaml["__main"].as_vec().unwrap();
SyntaxDefinition::parse_context(main_yaml, state, contexts, false, &mut ContextNamer::new("__main")).unwrap();
let meta_include_prototype = contexts["main"].meta_include_prototype;
let meta_scope = contexts["main"].meta_scope.clone();
let meta_content_scope = contexts["main"].meta_content_scope.clone();
if let Some(outer_main) = contexts.get_mut("__main") {
outer_main.meta_include_prototype = meta_include_prototype;
outer_main.meta_scope = meta_scope;
outer_main.meta_content_scope = meta_content_scope;
}
if let Some(main) = contexts.get_mut("main") {
main.meta_content_scope.insert(0, top_level_scope);
}
}
}
struct ContextNamer {
name: String,
anonymous_index: Option<usize>,
}
impl ContextNamer {
fn new(name: &str) -> ContextNamer {
ContextNamer {
name: name.to_string(),
anonymous_index: None,
}
}
fn next(&mut self) -> String {
let name = if let Some(index) = self.anonymous_index {
format!("#anon_{}_{}", self.name, index)
} else {
self.name.clone()
};
self.anonymous_index = Some(self.anonymous_index.map(|i| i + 1).unwrap_or(0));
name
}
}
fn replace_posix_char_classes(regex: String) -> String {
regex.replace("[:alpha:]", r"\p{L}")
.replace("[:alnum:]", r"\p{L}\p{N}")
.replace("[:lower:]", r"\p{Ll}")
.replace("[:upper:]", r"\p{Lu}")
.replace("[:digit:]", r"\p{Nd}")
}
fn regex_for_newlines(regex: String) -> String {
if !regex.contains('$') {
return regex;
}
let rewriter = RegexRewriterForNewlines {
parser: Parser::new(regex.as_bytes()),
};
rewriter.rewrite()
}
struct RegexRewriterForNewlines<'a> {
parser: Parser<'a>,
}
impl<'a> RegexRewriterForNewlines<'a> {
fn rewrite(mut self) -> String {
let mut result = Vec::new();
while let Some(c) = self.parser.peek() {
match c {
b'$' => {
self.parser.next();
result.extend_from_slice(br"(?m:$)");
}
b'\\' => {
self.parser.next();
result.push(c);
if let Some(c2) = self.parser.peek() {
self.parser.next();
result.push(c2);
}
}
b'[' => {
let (mut content, _) = self.parser.parse_character_class();
result.append(&mut content);
}
_ => {
self.parser.next();
result.push(c);
}
}
}
String::from_utf8(result).unwrap()
}
}
fn regex_for_no_newlines(regex: String) -> String {
if !regex.contains(r"\n") {
return regex;
}
let regex = regex.replace("(?:\\n)?", "(?:$|)");
let rewriter = RegexRewriterForNoNewlines {
parser: Parser::new(regex.as_bytes()),
};
rewriter.rewrite()
}
struct RegexRewriterForNoNewlines<'a> {
parser: Parser<'a>,
}
impl<'a> RegexRewriterForNoNewlines<'a> {
fn rewrite(mut self) -> String {
let mut result = Vec::new();
while let Some(c) = self.parser.peek() {
match c {
b'\\' => {
self.parser.next();
if let Some(c2) = self.parser.peek() {
self.parser.next();
let c3 = self.parser.peek();
if c2 == b'n' && c3 != Some(b'?') && c3 != Some(b'+') && c3 != Some(b'*') {
result.extend_from_slice(b"$");
} else {
result.push(c);
result.push(c2);
}
} else {
result.push(c);
}
}
b'[' => {
let (mut content, matches_newline) = self.parser.parse_character_class();
if matches_newline && self.parser.peek() != Some(b'?') {
result.extend_from_slice(b"(?:");
result.append(&mut content);
result.extend_from_slice(br"|$)");
} else {
result.append(&mut content);
}
}
_ => {
self.parser.next();
result.push(c);
}
}
}
String::from_utf8(result).unwrap()
}
}
fn get_consuming_capture_indexes(regex: &str) -> Vec<usize> {
let parser = ConsumingCaptureIndexParser {
parser: Parser::new(regex.as_bytes()),
};
parser.get_consuming_capture_indexes()
}
struct ConsumingCaptureIndexParser<'a> {
parser: Parser<'a>,
}
impl<'a> ConsumingCaptureIndexParser<'a> {
fn get_consuming_capture_indexes(mut self) -> Vec<usize> {
let mut result = Vec::new();
let mut stack = Vec::new();
let mut cap_num = 0;
let mut in_lookaround = false;
stack.push(in_lookaround);
result.push(cap_num);
while let Some(c) = self.parser.peek() {
match c {
b'\\' => {
self.parser.next();
self.parser.next();
}
b'[' => {
self.parser.parse_character_class();
}
b'(' => {
self.parser.next();
stack.push(in_lookaround);
if let Some(c2) = self.parser.peek() {
if c2 != b'?' {
cap_num += 1;
if !in_lookaround {
result.push(cap_num);
}
} else {
self.parser.next();
if let Some(c3) = self.parser.peek() {
self.parser.next();
if c3 == b'=' || c3 == b'!' {
in_lookaround = true;
} else if c3 == b'<' {
if let Some(c4) = self.parser.peek() {
if c4 == b'=' || c4 == b'!' {
self.parser.next();
in_lookaround = true;
}
}
} else if c3 == b'P' {
if let Some(c4) = self.parser.peek() {
if c4 == b'<' {
cap_num += 1;
if !in_lookaround {
result.push(cap_num);
}
}
}
}
}
}
}
}
b')' => {
if let Some(value) = stack.pop() {
in_lookaround = value;
}
self.parser.next();
}
_ => {
self.parser.next();
}
}
}
result
}
}
struct Parser<'a> {
bytes: &'a [u8],
index: usize,
}
impl<'a> Parser<'a> {
fn new(bytes: &[u8]) -> Parser {
Parser {
bytes,
index: 0,
}
}
fn peek(&self) -> Option<u8> {
self.bytes.get(self.index).copied()
}
fn next(&mut self) {
self.index += 1;
}
fn parse_character_class(&mut self) -> (Vec<u8>, bool) {
let mut content = Vec::new();
let mut negated = false;
let mut nesting = 0;
let mut matches_newline = false;
self.next();
content.push(b'[');
if let Some(b'^') = self.peek() {
self.next();
content.push(b'^');
negated = true;
}
if let Some(b']') = self.peek() {
self.next();
content.push(b']');
}
while let Some(c) = self.peek() {
match c {
b'\\' => {
self.next();
content.push(c);
if let Some(c2) = self.peek() {
self.next();
if c2 == b'n' && !negated && nesting == 0 {
matches_newline = true;
}
content.push(c2);
}
}
b'[' => {
self.next();
content.push(b'[');
nesting += 1;
}
b']' => {
self.next();
content.push(b']');
if nesting == 0 {
break;
}
nesting -= 1;
}
_ => {
self.next();
content.push(c);
}
}
}
(content, matches_newline)
}
}
#[cfg(test)]
mod tests {
use crate::parsing::syntax_definition::*;
use crate::parsing::Scope;
use super::*;
#[test]
fn can_parse() {
let defn: SyntaxDefinition =
SyntaxDefinition::load_from_str("name: C\nscope: source.c\ncontexts: {main: []}",
false, None)
.unwrap();
assert_eq!(defn.name, "C");
assert_eq!(defn.scope, Scope::new("source.c").unwrap());
let exts_empty: Vec<String> = Vec::new();
assert_eq!(defn.file_extensions, exts_empty);
assert!(!defn.hidden);
assert!(defn.variables.is_empty());
let defn2: SyntaxDefinition =
SyntaxDefinition::load_from_str("
name: C
scope: source.c
file_extensions: [c, h]
hidden_file_extensions: [k, l]
hidden: true
variables:
ident: '[QY]+'
contexts:
prototype:
- match: lol
scope: source.php
main:
- match: \\b(if|else|for|while|{{ident}})\\b
scope: keyword.control.c keyword.looping.c
captures:
1: meta.preprocessor.c++
2: keyword.control.include.c++
push: [string, 'scope:source.c#main', 'CSS.sublime-syntax#rule-list-body']
with_prototype:
- match: wow
pop: true
- match: '\"'
push: string
string:
- meta_scope: string.quoted.double.c
- meta_include_prototype: false
- match: \\\\.
scope: constant.character.escape.c
- match: '\"'
pop: true
",
false, None)
.unwrap();
assert_eq!(defn2.name, "C");
let top_level_scope = Scope::new("source.c").unwrap();
assert_eq!(defn2.scope, top_level_scope);
let exts: Vec<String> = vec!["c", "h", "k", "l"].into_iter().map(String::from).collect();
assert_eq!(defn2.file_extensions, exts);
assert!(defn2.hidden);
assert_eq!(defn2.variables.get("ident").unwrap(), "[QY]+");
let n: Vec<Scope> = Vec::new();
println!("{:?}", defn2);
let main = &defn2.contexts["main"];
assert_eq!(main.meta_content_scope, vec![top_level_scope]);
assert_eq!(main.meta_scope, n);
assert!(main.meta_include_prototype);
assert_eq!(defn2.contexts["__main"].meta_content_scope, n);
assert_eq!(defn2.contexts["__start"].meta_content_scope, vec![top_level_scope]);
assert_eq!(defn2.contexts["string"].meta_scope,
vec![Scope::new("string.quoted.double.c").unwrap()]);
let first_pattern: &Pattern = &main.patterns[0];
match *first_pattern {
Pattern::Match(ref match_pat) => {
let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
assert_eq!(&m[0], &(1,vec![Scope::new("meta.preprocessor.c++").unwrap()]));
use crate::parsing::syntax_definition::ContextReference::*;
let expected = MatchOperation::Push(vec![
Named("string".to_owned()),
ByScope {
scope: Scope::new("source.c").unwrap(),
sub_context: Some("main".to_owned()),
with_escape: false,
},
File {
name: "CSS".to_owned(),
sub_context: Some("rule-list-body".to_owned()),
with_escape: false,
},
]);
assert_eq!(format!("{:?}", match_pat.operation),
format!("{:?}", expected));
assert_eq!(match_pat.scope,
vec![Scope::new("keyword.control.c").unwrap(),
Scope::new("keyword.looping.c").unwrap()]);
assert!(match_pat.with_prototype.is_some());
}
_ => unreachable!(),
}
}
#[test]
fn can_parse_embed_as_with_prototypes() {
let old_def = SyntaxDefinition::load_from_str(r#"
name: C
scope: source.c
file_extensions: [c, h]
variables:
ident: '[QY]+'
contexts:
main:
- match: '(>)\s*'
captures:
1: meta.tag.style.begin.html punctuation.definition.tag.end.html
push:
- [{ meta_include_prototype: false }, { meta_content_scope: 'source.css.embedded.html' }, { match: '(?i)(?=</style)', pop: true }]
- scope:source.css
with_prototype:
- match: (?=(?i)(?=</style))
pop: true
"#,false, None).unwrap();
let mut def_with_embed = SyntaxDefinition::load_from_str(r#"
name: C
scope: source.c
file_extensions: [c, h]
variables:
ident: '[QY]+'
contexts:
main:
- match: '(>)\s*'
captures:
1: meta.tag.style.begin.html punctuation.definition.tag.end.html
embed: scope:source.css
embed_scope: source.css.embedded.html
escape: (?i)(?=</style)
"#,false, None).unwrap();
let def_with_embed_context = def_with_embed.contexts.get_mut("main").unwrap();
if let Pattern::Match(ref mut match_pattern) = def_with_embed_context.patterns[0] {
if let MatchOperation::Push(ref mut context_references) = match_pattern.operation {
if let ContextReference::ByScope {
ref mut with_escape,
..
} = context_references[1]
{
*with_escape = false;
}
}
}
assert_eq!(old_def.contexts["main"], def_with_embed.contexts["main"]);
}
#[test]
fn errors_on_embed_without_escape() {
let def = SyntaxDefinition::load_from_str(r#"
name: C
scope: source.c
file_extensions: [c, h]
variables:
ident: '[QY]+'
contexts:
main:
- match: '(>)\s*'
captures:
1: meta.tag.style.begin.html punctuation.definition.tag.end.html
embed: scope:source.css
embed_scope: source.css.embedded.html
"#,false, None);
assert!(def.is_err());
match def.unwrap_err() {
ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"),
_ => unreachable!("Got unexpected ParseSyntaxError"),
}
}
#[test]
fn errors_on_regex_compile_error() {
let def = SyntaxDefinition::load_from_str(r#"
name: C
scope: source.c
file_extensions: [test]
contexts:
main:
- match: '[a'
scope: keyword.name
"#,false, None);
assert!(def.is_err());
match def.unwrap_err() {
ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
_ => unreachable!("Got unexpected ParseSyntaxError"),
}
}
#[test]
fn can_parse_ugly_yaml() {
let defn: SyntaxDefinition =
SyntaxDefinition::load_from_str("
name: LaTeX
scope: text.tex.latex
contexts:
main:
- match: '((\\\\)(?:framebox|makebox))\\b'
captures:
1: support.function.box.latex
2: punctuation.definition.backslash.latex
push:
- [{meta_scope: meta.function.box.latex}, {match: '', pop: true}]
- argument
- optional-arguments
argument:
- match: '\\{'
scope: punctuation.definition.group.brace.begin.latex
- match: '(?=\\S)'
pop: true
optional-arguments:
- match: '(?=\\S)'
pop: true
",
false, None)
.unwrap();
assert_eq!(defn.name, "LaTeX");
let top_level_scope = Scope::new("text.tex.latex").unwrap();
assert_eq!(defn.scope, top_level_scope);
let first_pattern: &Pattern = &defn.contexts["main"].patterns[0];
match *first_pattern {
Pattern::Match(ref match_pat) => {
let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
assert_eq!(&m[0], &(1,vec![Scope::new("support.function.box.latex").unwrap()]));
assert!(match_pat.with_prototype.is_none());
}
_ => unreachable!(),
}
}
#[test]
fn names_anonymous_contexts() {
let def = SyntaxDefinition::load_from_str(
r#"
scope: source.c
contexts:
main:
- match: a
push: a
a:
- meta_scope: a
- match: x
push:
- meta_scope: anonymous_x
- match: anything
push:
- meta_scope: anonymous_x_2
- match: y
push:
- meta_scope: anonymous_y
- match: z
escape: 'test'
"#,
false,
None
).unwrap();
assert_eq!(def.contexts["a"].meta_scope, vec![Scope::new("a").unwrap()]);
assert_eq!(def.contexts["#anon_a_0"].meta_scope, vec![Scope::new("anonymous_x").unwrap()]);
assert_eq!(def.contexts["#anon_a_1"].meta_scope, vec![Scope::new("anonymous_x_2").unwrap()]);
assert_eq!(def.contexts["#anon_a_2"].meta_scope, vec![Scope::new("anonymous_y").unwrap()]);
assert_eq!(def.contexts["#anon_a_3"].patterns.len(), 1); }
#[test]
fn can_use_fallback_name() {
let def = SyntaxDefinition::load_from_str(r#"
scope: source.c
contexts:
main:
- match: ''
"#,false, Some("C"));
assert_eq!(def.unwrap().name, "C");
}
#[test]
fn can_rewrite_regex_for_newlines() {
fn rewrite(s: &str) -> String {
regex_for_newlines(s.to_string())
}
assert_eq!(&rewrite(r"a"), r"a");
assert_eq!(&rewrite(r"\b"), r"\b");
assert_eq!(&rewrite(r"(a)"), r"(a)");
assert_eq!(&rewrite(r"[a]"), r"[a]");
assert_eq!(&rewrite(r"[^a]"), r"[^a]");
assert_eq!(&rewrite(r"[]a]"), r"[]a]");
assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
assert_eq!(&rewrite(r"^"), r"^");
assert_eq!(&rewrite(r"$"), r"(?m:$)");
assert_eq!(&rewrite(r"^ab$"), r"^ab(?m:$)");
assert_eq!(&rewrite(r"\^ab\$"), r"\^ab\$");
assert_eq!(&rewrite(r"(//).*$"), r"(//).*(?m:$)");
assert_eq!(&rewrite(r"[a$]"), r"[a$]");
}
#[test]
fn can_rewrite_regex_for_no_newlines() {
fn rewrite(s: &str) -> String {
regex_for_no_newlines(s.to_string())
}
assert_eq!(&rewrite(r"a"), r"a");
assert_eq!(&rewrite(r"\b"), r"\b");
assert_eq!(&rewrite(r"(a)"), r"(a)");
assert_eq!(&rewrite(r"[a]"), r"[a]");
assert_eq!(&rewrite(r"[^a]"), r"[^a]");
assert_eq!(&rewrite(r"[]a]"), r"[]a]");
assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
assert_eq!(&rewrite(r"\n"), r"$");
assert_eq!(&rewrite(r"\[\n"), r"\[$");
assert_eq!(&rewrite(r"a\n?"), r"a\n?");
assert_eq!(&rewrite(r"a\n+"), r"a\n+");
assert_eq!(&rewrite(r"a\n*"), r"a\n*");
assert_eq!(&rewrite(r"[abc\n]"), r"(?:[abc\n]|$)");
assert_eq!(&rewrite(r"[^\n]"), r"[^\n]");
assert_eq!(&rewrite(r"[^]\n]"), r"[^]\n]");
assert_eq!(&rewrite(r"[\n]?"), r"[\n]?");
assert_eq!(&rewrite(r"[\n]"), r"(?:[\n]|$)");
assert_eq!(&rewrite(r"[]\n]"), r"(?:[]\n]|$)");
assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");
assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
}
#[test]
fn can_get_valid_captures_from_regex() {
let regex = "hello(test)(?=(world))(foo(?P<named>bar))";
println!("{:?}", regex);
let valid_indexes = get_consuming_capture_indexes(regex);
println!("{:?}", valid_indexes);
assert_eq!(valid_indexes, [0, 1, 3, 4]);
}
#[test]
fn can_get_valid_captures_from_regex2() {
let regex = "hello(test)[(?=tricked](foo(bar))";
println!("{:?}", regex);
let valid_indexes = get_consuming_capture_indexes(regex);
println!("{:?}", valid_indexes);
assert_eq!(valid_indexes, [0, 1, 2, 3]);
}
#[test]
fn can_get_valid_captures_from_nested_regex() {
let regex = "hello(test)(?=(world(?!(te(?<=(st))))))(foo(bar))";
println!("{:?}", regex);
let valid_indexes = get_consuming_capture_indexes(regex);
println!("{:?}", valid_indexes);
assert_eq!(valid_indexes, [0, 1, 5, 6]);
}
}