use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;
use super::{scope::*, ParsingError};
use super::regex::{Regex, Region};
use regex_syntax::escape;
use serde::ser::{Serialize, Serializer};
use serde_derive::{Deserialize, Serialize};
use crate::parsing::syntax_set::SyntaxSet;
pub type CaptureMapping = Vec<(usize, Vec<Scope>)>;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub struct ContextId {
pub(crate) syntax_index: usize,
pub(crate) context_index: usize,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct SyntaxDefinition {
pub name: String,
pub file_extensions: Vec<String>,
pub scope: Scope,
pub first_line_match: Option<String>,
pub hidden: bool,
#[serde(serialize_with = "ordered_map")]
pub variables: HashMap<String, String>,
#[serde(serialize_with = "ordered_map")]
pub contexts: HashMap<String, Context>,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct Context {
pub meta_scope: Vec<Scope>,
pub meta_content_scope: Vec<Scope>,
pub meta_include_prototype: bool,
pub clear_scopes: Option<ClearAmount>,
pub prototype: Option<ContextId>,
pub uses_backrefs: bool,
pub patterns: Vec<Pattern>,
}
impl Context {
pub fn new(meta_include_prototype: bool) -> Context {
Context {
meta_scope: Vec::new(),
meta_content_scope: Vec::new(),
meta_include_prototype,
clear_scopes: None,
uses_backrefs: false,
patterns: Vec::new(),
prototype: None,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum Pattern {
Match(MatchPattern),
Include(ContextReference),
}
#[derive(Debug)]
pub struct MatchIter<'a> {
syntax_set: &'a SyntaxSet,
ctx_stack: Vec<&'a Context>,
index_stack: Vec<usize>,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct MatchPattern {
pub has_captures: bool,
pub regex: Regex,
pub scope: Vec<Scope>,
pub captures: Option<CaptureMapping>,
pub operation: MatchOperation,
pub with_prototype: Option<ContextReference>,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum ContextReference {
#[non_exhaustive]
Named(String),
#[non_exhaustive]
ByScope {
scope: Scope,
sub_context: Option<String>,
with_escape: bool,
},
#[non_exhaustive]
File {
name: String,
sub_context: Option<String>,
with_escape: bool,
},
#[non_exhaustive]
Inline(String),
#[non_exhaustive]
Direct(ContextId),
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum MatchOperation {
Push(Vec<ContextReference>),
Set(Vec<ContextReference>),
Pop,
None,
}
impl<'a> Iterator for MatchIter<'a> {
type Item = (&'a Context, usize);
fn next(&mut self) -> Option<(&'a Context, usize)> {
loop {
if self.ctx_stack.is_empty() {
return None;
}
let last_index = self.ctx_stack.len() - 1;
let context = self.ctx_stack[last_index];
let index = self.index_stack[last_index];
self.index_stack[last_index] = index + 1;
if index < context.patterns.len() {
match context.patterns[index] {
Pattern::Match(_) => {
return Some((context, index));
},
Pattern::Include(ref ctx_ref) => {
let ctx_ptr = match *ctx_ref {
ContextReference::Direct(ref context_id) => {
self.syntax_set.get_context(context_id).unwrap()
}
_ => return self.next(), };
self.ctx_stack.push(ctx_ptr);
self.index_stack.push(0);
}
}
} else {
self.ctx_stack.pop();
self.index_stack.pop();
}
}
}
}
pub fn context_iter<'a>(syntax_set: &'a SyntaxSet, context: &'a Context) -> MatchIter<'a> {
MatchIter {
syntax_set,
ctx_stack: vec![context],
index_stack: vec![0],
}
}
impl Context {
pub fn match_at(&self, index: usize) -> Result<&MatchPattern, ParsingError> {
match self.patterns[index] {
Pattern::Match(ref match_pat) => Ok(match_pat),
_ => Err(ParsingError::BadMatchIndex(index)),
}
}
}
impl ContextReference {
pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> Result<&'a Context, ParsingError> {
match *self {
ContextReference::Direct(ref context_id) => syntax_set.get_context(context_id),
_ => Err(ParsingError::UnresolvedContextReference(self.clone())),
}
}
pub fn id(&self) -> Result<ContextId, ParsingError> {
match *self {
ContextReference::Direct(ref context_id) => Ok(*context_id),
_ => Err(ParsingError::UnresolvedContextReference(self.clone())),
}
}
}
pub(crate) fn substitute_backrefs_in_regex<F>(regex_str: &str, substituter: F) -> String
where F: Fn(usize) -> Option<String>
{
let mut reg_str = String::with_capacity(regex_str.len());
let mut last_was_escape = false;
for c in regex_str.chars() {
if last_was_escape && c.is_ascii_digit() {
let val = c.to_digit(10).unwrap() as usize;
if let Some(sub) = substituter(val) {
reg_str.push_str(&sub);
}
} else if last_was_escape {
reg_str.push('\\');
reg_str.push(c);
} else if c != '\\' {
reg_str.push(c);
}
last_was_escape = c == '\\' && !last_was_escape;
}
reg_str
}
impl MatchPattern {
pub fn new(
has_captures: bool,
regex_str: String,
scope: Vec<Scope>,
captures: Option<CaptureMapping>,
operation: MatchOperation,
with_prototype: Option<ContextReference>,
) -> MatchPattern {
MatchPattern {
has_captures,
regex: Regex::new(regex_str),
scope,
captures,
operation,
with_prototype,
}
}
pub fn regex_with_refs(&self, region: &Region, text: &str) -> Regex {
let new_regex = substitute_backrefs_in_regex(self.regex.regex_str(), |i| {
region.pos(i).map(|(start, end)| escape(&text[start..end]))
});
Regex::new(new_regex)
}
pub fn regex(&self) -> &Regex {
&self.regex
}
}
pub(crate) fn ordered_map<K, V, S>(map: &HashMap<K, V>, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer, K: Eq + Hash + Ord + Serialize, V: Serialize
{
let ordered: BTreeMap<_, _> = map.iter().collect();
ordered.serialize(serializer)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_compile_refs() {
let pat = MatchPattern {
has_captures: true,
regex: Regex::new(r"lol \\ \2 \1 '\9' \wz".into()),
scope: vec![],
captures: None,
operation: MatchOperation::None,
with_prototype: None,
};
let r = Regex::new(r"(\\\[\]\(\))(b)(c)(d)(e)".into());
let s = r"\[]()bcde";
let mut region = Region::new();
let matched = r.search(s, 0, s.len(), Some(&mut region));
assert!(matched);
let regex_with_refs = pat.regex_with_refs(®ion, s);
assert_eq!(regex_with_refs.regex_str(), r"lol \\ b \\\[\]\(\) '' \wz");
}
}