1use super::regex::{Regex, Region};
2use super::scope::*;
3use super::syntax_definition::*;
4use yaml_rust::{YamlLoader, Yaml, ScanError};
5use yaml_rust::yaml::Hash;
6use std::collections::HashMap;
7use std::error::Error;
8use std::path::Path;
9use std::ops::DerefMut;
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum ParseSyntaxError {
14 #[error("Invalid YAML file syntax: {0}")]
16 InvalidYaml(#[from] ScanError),
17 #[error("The file must contain at least one YAML document")]
19 EmptyFile,
20 #[error("Missing mandatory key in YAML file: {0}")]
22 MissingMandatoryKey(&'static str),
23 #[error("Error while compiling regex '{0}': {1}")]
25 RegexCompileError(String, #[source] Box<dyn Error + Send + Sync + 'static>),
26 #[error("Invalid scope: {0}")]
28 InvalidScope(ParseScopeError),
29 #[error("Invalid file reference")]
31 BadFileRef,
32 #[error("Context 'main' is missing")]
34 MainMissing,
35 #[error("Type mismatch")]
39 TypeMismatch,
40}
41
42fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(map: &'a Hash,
43 key: &'static str,
44 f: F)
45 -> Result<R, ParseSyntaxError> {
46 map.get(&Yaml::String(key.to_owned()))
47 .ok_or(ParseSyntaxError::MissingMandatoryKey(key))
48 .and_then(|x| f(x).ok_or(ParseSyntaxError::TypeMismatch))
49}
50
51fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError> {
52 s.split_whitespace()
53 .map(|scope| repo.build(scope).map_err(ParseSyntaxError::InvalidScope))
54 .collect()
55}
56
57struct ParserState<'a> {
58 scope_repo: &'a mut ScopeRepository,
59 variables: HashMap<String, String>,
60 variable_regex: Regex,
61 backref_regex: Regex,
62 lines_include_newline: bool,
63}
64
65static START_CONTEXT: &str = "
68__start:
69 - meta_include_prototype: false
70 - match: ''
71 push: __main
72__main:
73 - include: main
74";
75
76impl SyntaxDefinition {
77 pub fn load_from_str(
85 s: &str,
86 lines_include_newline: bool,
87 fallback_name: Option<&str>,
88 ) -> Result<SyntaxDefinition, ParseSyntaxError> {
89 let docs = match YamlLoader::load_from_str(s) {
90 Ok(x) => x,
91 Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
92 };
93 if docs.is_empty() {
94 return Err(ParseSyntaxError::EmptyFile);
95 }
96 let doc = &docs[0];
97 let mut scope_repo = SCOPE_REPO.lock().unwrap();
98 SyntaxDefinition::parse_top_level(doc, scope_repo.deref_mut(), lines_include_newline, fallback_name)
99 }
100
101 fn parse_top_level(doc: &Yaml,
102 scope_repo: &mut ScopeRepository,
103 lines_include_newline: bool,
104 fallback_name: Option<&str>)
105 -> Result<SyntaxDefinition, ParseSyntaxError> {
106 let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
107
108 let mut variables = HashMap::new();
109 if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) {
110 for (key, value) in map.iter() {
111 if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) {
112 variables.insert(key_str.to_owned(), val_str.to_owned());
113 }
114 }
115 }
116 let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?;
117 let top_level_scope = scope_repo.build(get_key(h, "scope", |x| x.as_str())?)
118 .map_err(ParseSyntaxError::InvalidScope)?;
119 let mut state = ParserState {
120 scope_repo,
121 variables,
122 variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}".into()),
123 backref_regex: Regex::new(r"\\\d".into()),
124 lines_include_newline,
125 };
126
127 let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?;
128 if !contexts.contains_key("main") {
129 return Err(ParseSyntaxError::MainMissing);
130 }
131
132 SyntaxDefinition::add_initial_contexts(
133 &mut contexts,
134 &mut state,
135 top_level_scope,
136 );
137
138 let mut file_extensions = Vec::new();
139 for extension_key in &["file_extensions", "hidden_file_extensions"] {
140 if let Ok(v) = get_key(h, extension_key, |x| x.as_vec()) {
141 file_extensions.extend(v.iter().filter_map(|y| y.as_str().map(|s| s.to_owned())))
142 }
143 }
144
145 let defn = SyntaxDefinition {
146 name: get_key(h, "name", |x| x.as_str()).unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed")).to_owned(),
147 scope: top_level_scope,
148 file_extensions,
149 first_line_match: get_key(h, "first_line_match", |x| x.as_str())
151 .ok()
152 .map(|s| s.to_owned()),
153 hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),
154
155 variables: state.variables,
156 contexts,
157 };
158 Ok(defn)
159 }
160
161 fn parse_contexts(map: &Hash,
162 state: &mut ParserState<'_>)
163 -> Result<HashMap<String, Context>, ParseSyntaxError> {
164 let mut contexts = HashMap::new();
165 for (key, value) in map.iter() {
166 if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) {
167 let is_prototype = name == "prototype";
168 let mut namer = ContextNamer::new(name);
169 SyntaxDefinition::parse_context(val_vec, state, &mut contexts, is_prototype, &mut namer)?;
170 }
171 }
172
173 Ok(contexts)
174 }
175
176 fn parse_context(vec: &[Yaml],
177 state: &mut ParserState<'_>,
179 contexts: &mut HashMap<String, Context>,
180 is_prototype: bool,
181 namer: &mut ContextNamer)
182 -> Result<String, ParseSyntaxError> {
183 let mut context = Context::new(!is_prototype);
184 let name = namer.next();
185
186 for y in vec.iter() {
187 let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
188
189 let mut is_special = false;
190 if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) {
191 context.meta_scope = str_to_scopes(x, state.scope_repo)?;
192 is_special = true;
193 }
194 if let Ok(x) = get_key(map, "meta_content_scope", |x| x.as_str()) {
195 context.meta_content_scope = str_to_scopes(x, state.scope_repo)?;
196 is_special = true;
197 }
198 if let Ok(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()) {
199 context.meta_include_prototype = x;
200 is_special = true;
201 }
202 if let Ok(true) = get_key(map, "clear_scopes", |x| x.as_bool()) {
203 context.clear_scopes = Some(ClearAmount::All);
204 is_special = true;
205 }
206 if let Ok(x) = get_key(map, "clear_scopes", |x| x.as_i64()) {
207 context.clear_scopes = Some(ClearAmount::TopN(x as usize));
208 is_special = true;
209 }
210 if !is_special {
211 if let Ok(x) = get_key(map, "include", Some) {
212 let reference = SyntaxDefinition::parse_reference(
213 x, state, contexts, namer, false)?;
214 context.patterns.push(Pattern::Include(reference));
215 } else {
216 let pattern = SyntaxDefinition::parse_match_pattern(
217 map, state, contexts, namer)?;
218 if pattern.has_captures {
219 context.uses_backrefs = true;
220 }
221 context.patterns.push(Pattern::Match(pattern));
222 }
223 }
224
225 }
226
227 contexts.insert(name.clone(), context);
228 Ok(name)
229 }
230
231 fn parse_reference(y: &Yaml,
232 state: &mut ParserState<'_>,
233 contexts: &mut HashMap<String, Context>,
234 namer: &mut ContextNamer,
235 with_escape: bool)
236 -> Result<ContextReference, ParseSyntaxError> {
237 if let Some(s) = y.as_str() {
238 let parts: Vec<&str> = s.split('#').collect();
239 let sub_context = if parts.len() > 1 {
240 Some(parts[1].to_owned())
241 } else {
242 None
243 };
244 if parts[0].starts_with("scope:") {
245 Ok(ContextReference::ByScope {
246 scope: state.scope_repo
247 .build(&parts[0][6..])
248 .map_err(ParseSyntaxError::InvalidScope)?,
249 sub_context,
250 with_escape,
251 })
252 } else if parts[0].ends_with(".sublime-syntax") {
253 let stem = Path::new(parts[0])
254 .file_stem()
255 .and_then(|x| x.to_str())
256 .ok_or(ParseSyntaxError::BadFileRef)?;
257 Ok(ContextReference::File {
258 name: stem.to_owned(),
259 sub_context,
260 with_escape,
261 })
262 } else {
263 Ok(ContextReference::Named(parts[0].to_owned()))
264 }
265 } else if let Some(v) = y.as_vec() {
266 let subname = SyntaxDefinition::parse_context(v, state, contexts, false, namer)?;
267 Ok(ContextReference::Inline(subname))
268 } else {
269 Err(ParseSyntaxError::TypeMismatch)
270 }
271 }
272
273 fn parse_match_pattern(map: &Hash,
274 state: &mut ParserState<'_>,
275 contexts: &mut HashMap<String, Context>,
276 namer: &mut ContextNamer)
277 -> Result<MatchPattern, ParseSyntaxError> {
278 let raw_regex = get_key(map, "match", |x| x.as_str())?;
279 let regex_str = Self::parse_regex(raw_regex, state)?;
280 let scope = get_key(map, "scope", |x| x.as_str())
283 .ok()
284 .map(|s| str_to_scopes(s, state.scope_repo))
285 .unwrap_or_else(|| Ok(vec![]))?;
286
287 let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) {
288 Some(Self::parse_captures(map, ®ex_str, state)?)
289 } else {
290 None
291 };
292
293 let mut has_captures = false;
294 let operation = if get_key(map, "pop", Some).is_ok() {
295 has_captures = state.backref_regex.search(®ex_str, 0, regex_str.len(), None);
297 MatchOperation::Pop
298 } else if let Ok(y) = get_key(map, "push", Some) {
299 MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
300 } else if let Ok(y) = get_key(map, "set", Some) {
301 MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
302 } else if let Ok(y) = get_key(map, "embed", Some) {
303 let mut embed_escape_context_yaml = vec!();
305 let mut commands = Hash::new();
306 commands.insert(Yaml::String("meta_include_prototype".to_string()), Yaml::Boolean(false));
307 embed_escape_context_yaml.push(Yaml::Hash(commands));
308 if let Ok(s) = get_key(map, "embed_scope", Some) {
309 commands = Hash::new();
310 commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone());
311 embed_escape_context_yaml.push(Yaml::Hash(commands));
312 }
313 if let Ok(v) = get_key(map, "escape", Some) {
314 let mut match_map = Hash::new();
315 match_map.insert(Yaml::String("match".to_string()), v.clone());
316 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
317 if let Ok(y) = get_key(map, "escape_captures", Some) {
318 match_map.insert(Yaml::String("captures".to_string()), y.clone());
319 }
320 embed_escape_context_yaml.push(Yaml::Hash(match_map));
321 let escape_context = SyntaxDefinition::parse_context(
322 &embed_escape_context_yaml,
323 state,
324 contexts,
325 false,
326 namer,
327 )?;
328 MatchOperation::Push(vec![ContextReference::Inline(escape_context),
329 SyntaxDefinition::parse_reference(y, state, contexts, namer, true)?])
330 } else {
331 return Err(ParseSyntaxError::MissingMandatoryKey("escape"));
332 }
333
334 } else {
335 MatchOperation::None
336 };
337
338 let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) {
339 let subname = Self::parse_context(v, state, contexts, true, namer)?;
341 Some(ContextReference::Inline(subname))
342 } else if let Ok(v) = get_key(map, "escape", Some) {
343 let subname = namer.next();
344
345 let mut context = Context::new(false);
346 let mut match_map = Hash::new();
347 match_map.insert(Yaml::String("match".to_string()), Yaml::String(format!("(?={})", v.as_str().unwrap())));
348 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
349 let pattern = SyntaxDefinition::parse_match_pattern(&match_map, state, contexts, namer)?;
350 if pattern.has_captures {
351 context.uses_backrefs = true;
352 }
353 context.patterns.push(Pattern::Match(pattern));
354
355 contexts.insert(subname.clone(), context);
356 Some(ContextReference::Inline(subname))
357 } else {
358 None
359 };
360
361 let pattern = MatchPattern::new(
362 has_captures,
363 regex_str,
364 scope,
365 captures,
366 operation,
367 with_prototype,
368 );
369
370 Ok(pattern)
371 }
372
373 fn parse_pushargs(y: &Yaml,
374 state: &mut ParserState<'_>,
375 contexts: &mut HashMap<String, Context>,
376 namer: &mut ContextNamer)
377 -> Result<Vec<ContextReference>, ParseSyntaxError> {
378 if y.as_vec().map_or(false, |v| !v.is_empty() && (v[0].as_str().is_some() || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))) {
380 y.as_vec()
382 .unwrap()
383 .iter()
384 .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer, false))
385 .collect()
386 } else {
387 let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer, false)?;
388 Ok(vec![reference])
389 }
390 }
391
392 fn parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError> {
393 let regex = Self::resolve_variables(raw_regex, state);
394 let regex = replace_posix_char_classes(regex);
395 let regex = if state.lines_include_newline {
396 regex_for_newlines(regex)
397 } else {
398 regex_for_no_newlines(regex)
402 };
403 Self::try_compile_regex(®ex)?;
404 Ok(regex)
405 }
406
407 fn resolve_variables(raw_regex: &str, state: &ParserState<'_>) -> String {
408 let mut result = String::new();
409 let mut index = 0;
410 let mut region = Region::new();
411 while state.variable_regex.search(raw_regex, index, raw_regex.len(), Some(&mut region)) {
412 let (begin, end) = region.pos(0).unwrap();
413
414 result.push_str(&raw_regex[index..begin]);
415
416 let var_pos = region.pos(1).unwrap();
417 let var_name = &raw_regex[var_pos.0..var_pos.1];
418 let var_raw = state.variables.get(var_name).map(String::as_ref).unwrap_or("");
419 let var_resolved = Self::resolve_variables(var_raw, state);
420 result.push_str(&var_resolved);
421
422 index = end;
423 }
424 if index < raw_regex.len() {
425 result.push_str(&raw_regex[index..]);
426 }
427 result
428 }
429
430 fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
431 let regex_str = substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));
433
434 if let Some(error) = Regex::try_compile(®ex_str) {
435 Err(ParseSyntaxError::RegexCompileError(regex_str, error))
436 } else {
437 Ok(())
438 }
439 }
440
441 fn parse_captures(
442 map: &Hash,
443 regex_str: &str,
444 state: &mut ParserState<'_>,
445 ) -> Result<CaptureMapping, ParseSyntaxError> {
446 let valid_indexes = get_consuming_capture_indexes(regex_str);
447 let mut captures = Vec::new();
448 for (key, value) in map.iter() {
449 if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) {
450 if valid_indexes.contains(&(key_int as usize)) {
451 captures.push((key_int as usize, str_to_scopes(val_str, state.scope_repo)?));
452 }
453 }
454 }
455 Ok(captures)
456 }
457
458 fn add_initial_contexts(
465 contexts: &mut HashMap<String, Context>,
466 state: &mut ParserState<'_>,
467 top_level_scope: Scope,
468 ) {
469 let yaml_docs = YamlLoader::load_from_str(START_CONTEXT).unwrap();
470 let yaml = &yaml_docs[0];
471
472 let start_yaml : &[Yaml] = yaml["__start"].as_vec().unwrap();
473 SyntaxDefinition::parse_context(start_yaml, state, contexts, false, &mut ContextNamer::new("__start")).unwrap();
474 if let Some(start) = contexts.get_mut("__start") {
475 start.meta_content_scope = vec![top_level_scope];
476 }
477
478 let main_yaml : &[Yaml] = yaml["__main"].as_vec().unwrap();
479 SyntaxDefinition::parse_context(main_yaml, state, contexts, false, &mut ContextNamer::new("__main")).unwrap();
480
481 let meta_include_prototype = contexts["main"].meta_include_prototype;
482 let meta_scope = contexts["main"].meta_scope.clone();
483 let meta_content_scope = contexts["main"].meta_content_scope.clone();
484
485 if let Some(outer_main) = contexts.get_mut("__main") {
486 outer_main.meta_include_prototype = meta_include_prototype;
487 outer_main.meta_scope = meta_scope;
488 outer_main.meta_content_scope = meta_content_scope;
489 }
490
491 if let Some(main) = contexts.get_mut("main") {
495 main.meta_content_scope.insert(0, top_level_scope);
496 }
497 }
498}
499
500struct ContextNamer {
501 name: String,
502 anonymous_index: Option<usize>,
503}
504
505impl ContextNamer {
506 fn new(name: &str) -> ContextNamer {
507 ContextNamer {
508 name: name.to_string(),
509 anonymous_index: None,
510 }
511 }
512
513 fn next(&mut self) -> String {
514 let name = if let Some(index) = self.anonymous_index {
515 format!("#anon_{}_{}", self.name, index)
516 } else {
517 self.name.clone()
518 };
519
520 self.anonymous_index = Some(self.anonymous_index.map(|i| i + 1).unwrap_or(0));
521 name
522 }
523}
524
525fn replace_posix_char_classes(regex: String) -> String {
530 regex.replace("[:alpha:]", r"\p{L}")
531 .replace("[:alnum:]", r"\p{L}\p{N}")
532 .replace("[:lower:]", r"\p{Ll}")
533 .replace("[:upper:]", r"\p{Lu}")
534 .replace("[:digit:]", r"\p{Nd}")
535}
536
537
538fn regex_for_newlines(regex: String) -> String {
549 if !regex.contains('$') {
550 return regex;
551 }
552
553 let rewriter = RegexRewriterForNewlines {
554 parser: Parser::new(regex.as_bytes()),
555 };
556 rewriter.rewrite()
557}
558
559struct RegexRewriterForNewlines<'a> {
560 parser: Parser<'a>,
561}
562
563impl<'a> RegexRewriterForNewlines<'a> {
564 fn rewrite(mut self) -> String {
565 let mut result = Vec::new();
566
567 while let Some(c) = self.parser.peek() {
568 match c {
569 b'$' => {
570 self.parser.next();
571 result.extend_from_slice(br"(?m:$)");
572 }
573 b'\\' => {
574 self.parser.next();
575 result.push(c);
576 if let Some(c2) = self.parser.peek() {
577 self.parser.next();
578 result.push(c2);
579 }
580 }
581 b'[' => {
582 let (mut content, _) = self.parser.parse_character_class();
583 result.append(&mut content);
584 }
585 _ => {
586 self.parser.next();
587 result.push(c);
588 }
589 }
590 }
591 String::from_utf8(result).unwrap()
592 }
593}
594
595fn regex_for_no_newlines(regex: String) -> String {
604 if !regex.contains(r"\n") {
605 return regex;
606 }
607
608 let regex = regex.replace("(?:\\n)?", "(?:$|)");
611
612 let rewriter = RegexRewriterForNoNewlines {
613 parser: Parser::new(regex.as_bytes()),
614 };
615 rewriter.rewrite()
616}
617
618struct RegexRewriterForNoNewlines<'a> {
619 parser: Parser<'a>,
620}
621
622impl<'a> RegexRewriterForNoNewlines<'a> {
623 fn rewrite(mut self) -> String {
624 let mut result = Vec::new();
625 while let Some(c) = self.parser.peek() {
626 match c {
627 b'\\' => {
628 self.parser.next();
629 if let Some(c2) = self.parser.peek() {
630 self.parser.next();
631 let c3 = self.parser.peek();
634 if c2 == b'n' && c3 != Some(b'?') && c3 != Some(b'+') && c3 != Some(b'*') {
635 result.extend_from_slice(b"$");
636 } else {
637 result.push(c);
638 result.push(c2);
639 }
640 } else {
641 result.push(c);
642 }
643 }
644 b'[' => {
645 let (mut content, matches_newline) = self.parser.parse_character_class();
646 if matches_newline && self.parser.peek() != Some(b'?') {
647 result.extend_from_slice(b"(?:");
648 result.append(&mut content);
649 result.extend_from_slice(br"|$)");
650 } else {
651 result.append(&mut content);
652 }
653 }
654 _ => {
655 self.parser.next();
656 result.push(c);
657 }
658 }
659 }
660 String::from_utf8(result).unwrap()
661 }
662}
663
664fn get_consuming_capture_indexes(regex: &str) -> Vec<usize> {
665 let parser = ConsumingCaptureIndexParser {
666 parser: Parser::new(regex.as_bytes()),
667 };
668 parser.get_consuming_capture_indexes()
669}
670
671struct ConsumingCaptureIndexParser<'a> {
672 parser: Parser<'a>,
673}
674
675impl<'a> ConsumingCaptureIndexParser<'a> {
676 fn get_consuming_capture_indexes(mut self) -> Vec<usize> {
683 let mut result = Vec::new();
684 let mut stack = Vec::new();
685 let mut cap_num = 0;
686 let mut in_lookaround = false;
687 stack.push(in_lookaround);
688 result.push(cap_num);
689
690 while let Some(c) = self.parser.peek() {
691 match c {
692 b'\\' => {
693 self.parser.next();
694 self.parser.next();
695 }
696 b'[' => {
697 self.parser.parse_character_class();
698 }
699 b'(' => {
700 self.parser.next();
701 stack.push(in_lookaround);
703 if let Some(c2) = self.parser.peek() {
704 if c2 != b'?' {
705 cap_num += 1;
707 if !in_lookaround {
710 result.push(cap_num);
711 }
712 } else {
713 self.parser.next();
714 if let Some(c3) = self.parser.peek() {
715 self.parser.next();
716 if c3 == b'=' || c3 == b'!' {
717 in_lookaround = true;
719 } else if c3 == b'<' {
720 if let Some(c4) = self.parser.peek() {
721 if c4 == b'=' || c4 == b'!' {
722 self.parser.next();
723 in_lookaround = true;
725 }
726 }
727 } else if c3 == b'P' {
728 if let Some(c4) = self.parser.peek() {
729 if c4 == b'<' {
730 cap_num += 1;
732 if !in_lookaround {
735 result.push(cap_num);
736 }
737 }
738 }
739 }
740 }
741 }
742 }
743 }
744 b')' => {
745 if let Some(value) = stack.pop() {
746 in_lookaround = value;
747 }
748 self.parser.next();
749 }
750 _ => {
751 self.parser.next();
752 }
753 }
754 }
755 result
756 }
757}
758
759struct Parser<'a> {
760 bytes: &'a [u8],
761 index: usize,
762}
763
764impl<'a> Parser<'a> {
765 fn new(bytes: &[u8]) -> Parser {
766 Parser {
767 bytes,
768 index: 0,
769 }
770 }
771
772 fn peek(&self) -> Option<u8> {
773 self.bytes.get(self.index).copied()
774 }
775
776 fn next(&mut self) {
777 self.index += 1;
778 }
779
780 fn parse_character_class(&mut self) -> (Vec<u8>, bool) {
781 let mut content = Vec::new();
782 let mut negated = false;
783 let mut nesting = 0;
784 let mut matches_newline = false;
785
786 self.next();
787 content.push(b'[');
788 if let Some(b'^') = self.peek() {
789 self.next();
790 content.push(b'^');
791 negated = true;
792 }
793
794 if let Some(b']') = self.peek() {
796 self.next();
797 content.push(b']');
798 }
799
800 while let Some(c) = self.peek() {
801 match c {
802 b'\\' => {
803 self.next();
804 content.push(c);
805 if let Some(c2) = self.peek() {
806 self.next();
807 if c2 == b'n' && !negated && nesting == 0 {
808 matches_newline = true;
809 }
810 content.push(c2);
811 }
812 }
813 b'[' => {
814 self.next();
815 content.push(b'[');
816 nesting += 1;
817 }
818 b']' => {
819 self.next();
820 content.push(b']');
821 if nesting == 0 {
822 break;
823 }
824 nesting -= 1;
825 }
826 _ => {
827 self.next();
828 content.push(c);
829 }
830 }
831 }
832
833 (content, matches_newline)
834 }
835}
836
837
838#[cfg(test)]
839mod tests {
840 use crate::parsing::syntax_definition::*;
841 use crate::parsing::Scope;
842 use super::*;
843
844 #[test]
845 fn can_parse() {
846 let defn: SyntaxDefinition =
847 SyntaxDefinition::load_from_str("name: C\nscope: source.c\ncontexts: {main: []}",
848 false, None)
849 .unwrap();
850 assert_eq!(defn.name, "C");
851 assert_eq!(defn.scope, Scope::new("source.c").unwrap());
852 let exts_empty: Vec<String> = Vec::new();
853 assert_eq!(defn.file_extensions, exts_empty);
854 assert!(!defn.hidden);
855 assert!(defn.variables.is_empty());
856 let defn2: SyntaxDefinition =
857 SyntaxDefinition::load_from_str("
858 name: C
859 scope: source.c
860 file_extensions: [c, h]
861 hidden_file_extensions: [k, l]
862 hidden: true
863 variables:
864 ident: '[QY]+'
865 contexts:
866 prototype:
867 - match: lol
868 scope: source.php
869 main:
870 - match: \\b(if|else|for|while|{{ident}})\\b
871 scope: keyword.control.c keyword.looping.c
872 captures:
873 1: meta.preprocessor.c++
874 2: keyword.control.include.c++
875 push: [string, 'scope:source.c#main', 'CSS.sublime-syntax#rule-list-body']
876 with_prototype:
877 - match: wow
878 pop: true
879 - match: '\"'
880 push: string
881 string:
882 - meta_scope: string.quoted.double.c
883 - meta_include_prototype: false
884 - match: \\\\.
885 scope: constant.character.escape.c
886 - match: '\"'
887 pop: true
888 ",
889 false, None)
890 .unwrap();
891 assert_eq!(defn2.name, "C");
892 let top_level_scope = Scope::new("source.c").unwrap();
893 assert_eq!(defn2.scope, top_level_scope);
894 let exts: Vec<String> = vec!["c", "h", "k", "l"].into_iter().map(String::from).collect();
895 assert_eq!(defn2.file_extensions, exts);
896 assert!(defn2.hidden);
897 assert_eq!(defn2.variables.get("ident").unwrap(), "[QY]+");
898
899 let n: Vec<Scope> = Vec::new();
900 println!("{:?}", defn2);
901 let main = &defn2.contexts["main"];
903 assert_eq!(main.meta_content_scope, vec![top_level_scope]);
904 assert_eq!(main.meta_scope, n);
905 assert!(main.meta_include_prototype);
906
907 assert_eq!(defn2.contexts["__main"].meta_content_scope, n);
908 assert_eq!(defn2.contexts["__start"].meta_content_scope, vec![top_level_scope]);
909
910 assert_eq!(defn2.contexts["string"].meta_scope,
911 vec![Scope::new("string.quoted.double.c").unwrap()]);
912 let first_pattern: &Pattern = &main.patterns[0];
913 match *first_pattern {
914 Pattern::Match(ref match_pat) => {
915 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
916 assert_eq!(&m[0], &(1,vec![Scope::new("meta.preprocessor.c++").unwrap()]));
917 use crate::parsing::syntax_definition::ContextReference::*;
918
919 let expected = MatchOperation::Push(vec![
921 Named("string".to_owned()),
922 ByScope {
923 scope: Scope::new("source.c").unwrap(),
924 sub_context: Some("main".to_owned()),
925 with_escape: false,
926 },
927 File {
928 name: "CSS".to_owned(),
929 sub_context: Some("rule-list-body".to_owned()),
930 with_escape: false,
931 },
932 ]);
933 assert_eq!(format!("{:?}", match_pat.operation),
934 format!("{:?}", expected));
935
936 assert_eq!(match_pat.scope,
937 vec![Scope::new("keyword.control.c").unwrap(),
938 Scope::new("keyword.looping.c").unwrap()]);
939
940 assert!(match_pat.with_prototype.is_some());
941 }
942 _ => unreachable!(),
943 }
944 }
945
946 #[test]
947 fn can_parse_embed_as_with_prototypes() {
948 let old_def = SyntaxDefinition::load_from_str(r#"
949 name: C
950 scope: source.c
951 file_extensions: [c, h]
952 variables:
953 ident: '[QY]+'
954 contexts:
955 main:
956 - match: '(>)\s*'
957 captures:
958 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
959 push:
960 - [{ meta_include_prototype: false }, { meta_content_scope: 'source.css.embedded.html' }, { match: '(?i)(?=</style)', pop: true }]
961 - scope:source.css
962 with_prototype:
963 - match: (?=(?i)(?=</style))
964 pop: true
965 "#,false, None).unwrap();
966
967 let mut def_with_embed = SyntaxDefinition::load_from_str(r#"
968 name: C
969 scope: source.c
970 file_extensions: [c, h]
971 variables:
972 ident: '[QY]+'
973 contexts:
974 main:
975 - match: '(>)\s*'
976 captures:
977 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
978 embed: scope:source.css
979 embed_scope: source.css.embedded.html
980 escape: (?i)(?=</style)
981 "#,false, None).unwrap();
982
983 let def_with_embed_context = def_with_embed.contexts.get_mut("main").unwrap();
988 if let Pattern::Match(ref mut match_pattern) = def_with_embed_context.patterns[0] {
989 if let MatchOperation::Push(ref mut context_references) = match_pattern.operation {
990 if let ContextReference::ByScope {
991 ref mut with_escape,
992 ..
993 } = context_references[1]
994 {
995 *with_escape = false;
996 }
997 }
998 }
999
1000 assert_eq!(old_def.contexts["main"], def_with_embed.contexts["main"]);
1001 }
1002
1003 #[test]
1004 fn errors_on_embed_without_escape() {
1005 let def = SyntaxDefinition::load_from_str(r#"
1006 name: C
1007 scope: source.c
1008 file_extensions: [c, h]
1009 variables:
1010 ident: '[QY]+'
1011 contexts:
1012 main:
1013 - match: '(>)\s*'
1014 captures:
1015 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1016 embed: scope:source.css
1017 embed_scope: source.css.embedded.html
1018 "#,false, None);
1019 assert!(def.is_err());
1020 match def.unwrap_err() {
1021 ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"),
1022 _ => unreachable!("Got unexpected ParseSyntaxError"),
1023 }
1024 }
1025
1026 #[test]
1027 fn errors_on_regex_compile_error() {
1028 let def = SyntaxDefinition::load_from_str(r#"
1029 name: C
1030 scope: source.c
1031 file_extensions: [test]
1032 contexts:
1033 main:
1034 - match: '[a'
1035 scope: keyword.name
1036 "#,false, None);
1037 assert!(def.is_err());
1038 match def.unwrap_err() {
1039 ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
1040 _ => unreachable!("Got unexpected ParseSyntaxError"),
1041 }
1042 }
1043
1044 #[test]
1045 fn can_parse_ugly_yaml() {
1046 let defn: SyntaxDefinition =
1047 SyntaxDefinition::load_from_str("
1048 name: LaTeX
1049 scope: text.tex.latex
1050 contexts:
1051 main:
1052 - match: '((\\\\)(?:framebox|makebox))\\b'
1053 captures:
1054 1: support.function.box.latex
1055 2: punctuation.definition.backslash.latex
1056 push:
1057 - [{meta_scope: meta.function.box.latex}, {match: '', pop: true}]
1058 - argument
1059 - optional-arguments
1060 argument:
1061 - match: '\\{'
1062 scope: punctuation.definition.group.brace.begin.latex
1063 - match: '(?=\\S)'
1064 pop: true
1065 optional-arguments:
1066 - match: '(?=\\S)'
1067 pop: true
1068 ",
1069 false, None)
1070 .unwrap();
1071 assert_eq!(defn.name, "LaTeX");
1072 let top_level_scope = Scope::new("text.tex.latex").unwrap();
1073 assert_eq!(defn.scope, top_level_scope);
1074
1075 let first_pattern: &Pattern = &defn.contexts["main"].patterns[0];
1076 match *first_pattern {
1077 Pattern::Match(ref match_pat) => {
1078 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
1079 assert_eq!(&m[0], &(1,vec![Scope::new("support.function.box.latex").unwrap()]));
1080
1081 assert!(match_pat.with_prototype.is_none());
1087 }
1088 _ => unreachable!(),
1089 }
1090 }
1091
1092 #[test]
1093 fn names_anonymous_contexts() {
1094 let def = SyntaxDefinition::load_from_str(
1095 r#"
1096 scope: source.c
1097 contexts:
1098 main:
1099 - match: a
1100 push: a
1101 a:
1102 - meta_scope: a
1103 - match: x
1104 push:
1105 - meta_scope: anonymous_x
1106 - match: anything
1107 push:
1108 - meta_scope: anonymous_x_2
1109 - match: y
1110 push:
1111 - meta_scope: anonymous_y
1112 - match: z
1113 escape: 'test'
1114 "#,
1115 false,
1116 None
1117 ).unwrap();
1118
1119 assert_eq!(def.contexts["a"].meta_scope, vec![Scope::new("a").unwrap()]);
1120 assert_eq!(def.contexts["#anon_a_0"].meta_scope, vec![Scope::new("anonymous_x").unwrap()]);
1121 assert_eq!(def.contexts["#anon_a_1"].meta_scope, vec![Scope::new("anonymous_x_2").unwrap()]);
1122 assert_eq!(def.contexts["#anon_a_2"].meta_scope, vec![Scope::new("anonymous_y").unwrap()]);
1123 assert_eq!(def.contexts["#anon_a_3"].patterns.len(), 1); }
1125
1126 #[test]
1127 fn can_use_fallback_name() {
1128 let def = SyntaxDefinition::load_from_str(r#"
1129 scope: source.c
1130 contexts:
1131 main:
1132 - match: ''
1133 "#,false, Some("C"));
1134 assert_eq!(def.unwrap().name, "C");
1135 }
1136
1137 #[test]
1138 fn can_rewrite_regex_for_newlines() {
1139 fn rewrite(s: &str) -> String {
1140 regex_for_newlines(s.to_string())
1141 }
1142
1143 assert_eq!(&rewrite(r"a"), r"a");
1144 assert_eq!(&rewrite(r"\b"), r"\b");
1145 assert_eq!(&rewrite(r"(a)"), r"(a)");
1146 assert_eq!(&rewrite(r"[a]"), r"[a]");
1147 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1148 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1149 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1150
1151 assert_eq!(&rewrite(r"^"), r"^");
1152 assert_eq!(&rewrite(r"$"), r"(?m:$)");
1153 assert_eq!(&rewrite(r"^ab$"), r"^ab(?m:$)");
1154 assert_eq!(&rewrite(r"\^ab\$"), r"\^ab\$");
1155 assert_eq!(&rewrite(r"(//).*$"), r"(//).*(?m:$)");
1156
1157 assert_eq!(&rewrite(r"[a$]"), r"[a$]");
1159 }
1160
1161 #[test]
1162 fn can_rewrite_regex_for_no_newlines() {
1163 fn rewrite(s: &str) -> String {
1164 regex_for_no_newlines(s.to_string())
1165 }
1166
1167 assert_eq!(&rewrite(r"a"), r"a");
1168 assert_eq!(&rewrite(r"\b"), r"\b");
1169 assert_eq!(&rewrite(r"(a)"), r"(a)");
1170 assert_eq!(&rewrite(r"[a]"), r"[a]");
1171 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1172 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1173 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1174
1175 assert_eq!(&rewrite(r"\n"), r"$");
1176 assert_eq!(&rewrite(r"\[\n"), r"\[$");
1177 assert_eq!(&rewrite(r"a\n?"), r"a\n?");
1178 assert_eq!(&rewrite(r"a\n+"), r"a\n+");
1179 assert_eq!(&rewrite(r"a\n*"), r"a\n*");
1180 assert_eq!(&rewrite(r"[abc\n]"), r"(?:[abc\n]|$)");
1181 assert_eq!(&rewrite(r"[^\n]"), r"[^\n]");
1182 assert_eq!(&rewrite(r"[^]\n]"), r"[^]\n]");
1183 assert_eq!(&rewrite(r"[\n]?"), r"[\n]?");
1184 assert_eq!(&rewrite(r"[\n]"), r"(?:[\n]|$)");
1186 assert_eq!(&rewrite(r"[]\n]"), r"(?:[]\n]|$)");
1187 assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");
1189
1190 assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
1191 assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
1192 assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
1193 }
1194
1195 #[test]
1196 fn can_get_valid_captures_from_regex() {
1197 let regex = "hello(test)(?=(world))(foo(?P<named>bar))";
1198 println!("{:?}", regex);
1199 let valid_indexes = get_consuming_capture_indexes(regex);
1200 println!("{:?}", valid_indexes);
1201 assert_eq!(valid_indexes, [0, 1, 3, 4]);
1202 }
1203
1204 #[test]
1205 fn can_get_valid_captures_from_regex2() {
1206 let regex = "hello(test)[(?=tricked](foo(bar))";
1207 println!("{:?}", regex);
1208 let valid_indexes = get_consuming_capture_indexes(regex);
1209 println!("{:?}", valid_indexes);
1210 assert_eq!(valid_indexes, [0, 1, 2, 3]);
1211 }
1212
1213 #[test]
1214 fn can_get_valid_captures_from_nested_regex() {
1215 let regex = "hello(test)(?=(world(?!(te(?<=(st))))))(foo(bar))";
1216 println!("{:?}", regex);
1217 let valid_indexes = get_consuming_capture_indexes(regex);
1218 println!("{:?}", valid_indexes);
1219 assert_eq!(valid_indexes, [0, 1, 5, 6]);
1220 }
1221}