1use super::regex::{Regex, Region};
2use super::scope::*;
3use super::syntax_definition::*;
4use std::collections::HashMap;
5use std::error::Error;
6use std::ops::DerefMut;
7use std::path::Path;
8use yaml_rust::yaml::Hash;
9use yaml_rust::{ScanError, Yaml, YamlLoader};
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum ParseSyntaxError {
14 #[error("Invalid YAML file syntax: {0}")]
16 InvalidYaml(#[from] ScanError),
17 #[error("The file must contain at least one YAML document")]
19 EmptyFile,
20 #[error("Missing mandatory key in YAML file: {0}")]
22 MissingMandatoryKey(&'static str),
23 #[error("Error while compiling regex '{0}': {1}")]
25 RegexCompileError(String, #[source] Box<dyn Error + Send + Sync + 'static>),
26 #[error("Invalid scope: {0}")]
28 InvalidScope(ParseScopeError),
29 #[error("Invalid file reference")]
31 BadFileRef,
32 #[error("Context 'main' is missing")]
34 MainMissing,
35 #[error("Type mismatch")]
39 TypeMismatch,
40}
41
42fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(
43 map: &'a Hash,
44 key: &'static str,
45 f: F,
46) -> Result<R, ParseSyntaxError> {
47 map.get(&Yaml::String(key.to_owned()))
48 .ok_or(ParseSyntaxError::MissingMandatoryKey(key))
49 .and_then(|x| f(x).ok_or(ParseSyntaxError::TypeMismatch))
50}
51
52fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError> {
53 s.split_whitespace()
54 .map(|scope| repo.build(scope).map_err(ParseSyntaxError::InvalidScope))
55 .collect()
56}
57
58struct ParserState<'a> {
59 scope_repo: &'a mut ScopeRepository,
60 variables: HashMap<String, String>,
61 variable_regex: Regex,
62 backref_regex: Regex,
63 lines_include_newline: bool,
64}
65
66static START_CONTEXT: &str = "
69__start:
70 - meta_include_prototype: false
71 - match: ''
72 push: __main
73__main:
74 - include: main
75";
76
77impl SyntaxDefinition {
78 pub fn load_from_str(
86 s: &str,
87 lines_include_newline: bool,
88 fallback_name: Option<&str>,
89 ) -> Result<SyntaxDefinition, ParseSyntaxError> {
90 let docs = match YamlLoader::load_from_str(s) {
91 Ok(x) => x,
92 Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
93 };
94 if docs.is_empty() {
95 return Err(ParseSyntaxError::EmptyFile);
96 }
97 let doc = &docs[0];
98 let mut scope_repo = lock_global_scope_repo();
99 SyntaxDefinition::parse_top_level(
100 doc,
101 scope_repo.deref_mut(),
102 lines_include_newline,
103 fallback_name,
104 )
105 }
106
107 fn parse_top_level(
108 doc: &Yaml,
109 scope_repo: &mut ScopeRepository,
110 lines_include_newline: bool,
111 fallback_name: Option<&str>,
112 ) -> Result<SyntaxDefinition, ParseSyntaxError> {
113 let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
114
115 let mut variables = HashMap::new();
116 if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) {
117 for (key, value) in map.iter() {
118 if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) {
119 variables.insert(key_str.to_owned(), val_str.to_owned());
120 }
121 }
122 }
123 let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?;
124 let top_level_scope = scope_repo
125 .build(get_key(h, "scope", |x| x.as_str())?)
126 .map_err(ParseSyntaxError::InvalidScope)?;
127 let mut state = ParserState {
128 scope_repo,
129 variables,
130 variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}".into()),
131 backref_regex: Regex::new(r"\\\d".into()),
132 lines_include_newline,
133 };
134
135 let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?;
136 if !contexts.contains_key("main") {
137 return Err(ParseSyntaxError::MainMissing);
138 }
139
140 SyntaxDefinition::add_initial_contexts(&mut contexts, &mut state, top_level_scope);
141
142 let mut file_extensions = Vec::new();
143 for extension_key in &["file_extensions", "hidden_file_extensions"] {
144 if let Ok(v) = get_key(h, extension_key, |x| x.as_vec()) {
145 file_extensions.extend(v.iter().filter_map(|y| y.as_str().map(|s| s.to_owned())))
146 }
147 }
148
149 let defn = SyntaxDefinition {
150 name: get_key(h, "name", |x| x.as_str())
151 .unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed"))
152 .to_owned(),
153 scope: top_level_scope,
154 file_extensions,
155 first_line_match: get_key(h, "first_line_match", |x| x.as_str())
157 .ok()
158 .map(|s| s.to_owned()),
159 hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),
160
161 variables: state.variables,
162 contexts,
163 };
164 Ok(defn)
165 }
166
167 fn parse_contexts(
168 map: &Hash,
169 state: &mut ParserState<'_>,
170 ) -> Result<HashMap<String, Context>, ParseSyntaxError> {
171 let mut contexts = HashMap::new();
172 for (key, value) in map.iter() {
173 if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) {
174 let is_prototype = name == "prototype";
175 let mut namer = ContextNamer::new(name);
176 SyntaxDefinition::parse_context(
177 val_vec,
178 state,
179 &mut contexts,
180 is_prototype,
181 &mut namer,
182 )?;
183 }
184 }
185
186 Ok(contexts)
187 }
188
189 fn parse_context(
190 vec: &[Yaml],
191 state: &mut ParserState<'_>,
193 contexts: &mut HashMap<String, Context>,
194 is_prototype: bool,
195 namer: &mut ContextNamer,
196 ) -> Result<String, ParseSyntaxError> {
197 let mut context = Context::new(!is_prototype);
198 let name = namer.next();
199
200 for y in vec.iter() {
201 let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
202
203 let mut is_special = false;
204 if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) {
205 context.meta_scope = str_to_scopes(x, state.scope_repo)?;
206 is_special = true;
207 }
208 if let Ok(x) = get_key(map, "meta_content_scope", |x| x.as_str()) {
209 context.meta_content_scope = str_to_scopes(x, state.scope_repo)?;
210 is_special = true;
211 }
212 if let Ok(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()) {
213 context.meta_include_prototype = x;
214 is_special = true;
215 }
216 if let Ok(true) = get_key(map, "clear_scopes", |x| x.as_bool()) {
217 context.clear_scopes = Some(ClearAmount::All);
218 is_special = true;
219 }
220 if let Ok(x) = get_key(map, "clear_scopes", |x| x.as_i64()) {
221 context.clear_scopes = Some(ClearAmount::TopN(x as usize));
222 is_special = true;
223 }
224 if !is_special {
225 if let Ok(x) = get_key(map, "include", Some) {
226 let reference =
227 SyntaxDefinition::parse_reference(x, state, contexts, namer, false)?;
228 context.patterns.push(Pattern::Include(reference));
229 } else {
230 let pattern =
231 SyntaxDefinition::parse_match_pattern(map, state, contexts, namer)?;
232 if pattern.has_captures {
233 context.uses_backrefs = true;
234 }
235 context.patterns.push(Pattern::Match(pattern));
236 }
237 }
238 }
239
240 contexts.insert(name.clone(), context);
241 Ok(name)
242 }
243
244 fn parse_reference(
245 y: &Yaml,
246 state: &mut ParserState<'_>,
247 contexts: &mut HashMap<String, Context>,
248 namer: &mut ContextNamer,
249 with_escape: bool,
250 ) -> Result<ContextReference, ParseSyntaxError> {
251 if let Some(s) = y.as_str() {
252 let parts: Vec<&str> = s.split('#').collect();
253 let sub_context = if parts.len() > 1 {
254 Some(parts[1].to_owned())
255 } else {
256 None
257 };
258 if parts[0].starts_with("scope:") {
259 Ok(ContextReference::ByScope {
260 scope: state
261 .scope_repo
262 .build(&parts[0][6..])
263 .map_err(ParseSyntaxError::InvalidScope)?,
264 sub_context,
265 with_escape,
266 })
267 } else if parts[0].ends_with(".sublime-syntax") {
268 let stem = Path::new(parts[0])
269 .file_stem()
270 .and_then(|x| x.to_str())
271 .ok_or(ParseSyntaxError::BadFileRef)?;
272 Ok(ContextReference::File {
273 name: stem.to_owned(),
274 sub_context,
275 with_escape,
276 })
277 } else {
278 Ok(ContextReference::Named(parts[0].to_owned()))
279 }
280 } else if let Some(v) = y.as_vec() {
281 let subname = SyntaxDefinition::parse_context(v, state, contexts, false, namer)?;
282 Ok(ContextReference::Inline(subname))
283 } else {
284 Err(ParseSyntaxError::TypeMismatch)
285 }
286 }
287
288 fn parse_match_pattern(
289 map: &Hash,
290 state: &mut ParserState<'_>,
291 contexts: &mut HashMap<String, Context>,
292 namer: &mut ContextNamer,
293 ) -> Result<MatchPattern, ParseSyntaxError> {
294 let raw_regex = get_key(map, "match", |x| x.as_str())?;
295 let regex_str = Self::parse_regex(raw_regex, state)?;
296 let scope = get_key(map, "scope", |x| x.as_str())
299 .ok()
300 .map(|s| str_to_scopes(s, state.scope_repo))
301 .unwrap_or_else(|| Ok(vec![]))?;
302
303 let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) {
304 Some(Self::parse_captures(map, ®ex_str, state)?)
305 } else {
306 None
307 };
308
309 let mut has_captures = false;
310 let operation = if get_key(map, "pop", Some).is_ok() {
311 has_captures = state
313 .backref_regex
314 .search(®ex_str, 0, regex_str.len(), None);
315 MatchOperation::Pop
316 } else if let Ok(y) = get_key(map, "push", Some) {
317 MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
318 } else if let Ok(y) = get_key(map, "set", Some) {
319 MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
320 } else if let Ok(y) = get_key(map, "embed", Some) {
321 let mut embed_escape_context_yaml = vec![];
323 let mut commands = Hash::new();
324 commands.insert(
325 Yaml::String("meta_include_prototype".to_string()),
326 Yaml::Boolean(false),
327 );
328 embed_escape_context_yaml.push(Yaml::Hash(commands));
329 if let Ok(s) = get_key(map, "embed_scope", Some) {
330 commands = Hash::new();
331 commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone());
332 embed_escape_context_yaml.push(Yaml::Hash(commands));
333 }
334 if let Ok(v) = get_key(map, "escape", Some) {
335 let mut match_map = Hash::new();
336 match_map.insert(Yaml::String("match".to_string()), v.clone());
337 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
338 if let Ok(y) = get_key(map, "escape_captures", Some) {
339 match_map.insert(Yaml::String("captures".to_string()), y.clone());
340 }
341 embed_escape_context_yaml.push(Yaml::Hash(match_map));
342 let escape_context = SyntaxDefinition::parse_context(
343 &embed_escape_context_yaml,
344 state,
345 contexts,
346 false,
347 namer,
348 )?;
349 MatchOperation::Push(vec![
350 ContextReference::Inline(escape_context),
351 SyntaxDefinition::parse_reference(y, state, contexts, namer, true)?,
352 ])
353 } else {
354 return Err(ParseSyntaxError::MissingMandatoryKey("escape"));
355 }
356 } else {
357 MatchOperation::None
358 };
359
360 let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) {
361 let subname = Self::parse_context(v, state, contexts, true, namer)?;
363 Some(ContextReference::Inline(subname))
364 } else if let Ok(v) = get_key(map, "escape", Some) {
365 let subname = namer.next();
366
367 let mut context = Context::new(false);
368 let mut match_map = Hash::new();
369 match_map.insert(
370 Yaml::String("match".to_string()),
371 Yaml::String(format!("(?={})", v.as_str().unwrap())),
372 );
373 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
374 let pattern =
375 SyntaxDefinition::parse_match_pattern(&match_map, state, contexts, namer)?;
376 if pattern.has_captures {
377 context.uses_backrefs = true;
378 }
379 context.patterns.push(Pattern::Match(pattern));
380
381 contexts.insert(subname.clone(), context);
382 Some(ContextReference::Inline(subname))
383 } else {
384 None
385 };
386
387 let pattern = MatchPattern::new(
388 has_captures,
389 regex_str,
390 scope,
391 captures,
392 operation,
393 with_prototype,
394 );
395
396 Ok(pattern)
397 }
398
399 fn parse_pushargs(
400 y: &Yaml,
401 state: &mut ParserState<'_>,
402 contexts: &mut HashMap<String, Context>,
403 namer: &mut ContextNamer,
404 ) -> Result<Vec<ContextReference>, ParseSyntaxError> {
405 if y.as_vec().is_some_and(|v| {
407 !v.is_empty()
408 && (v[0].as_str().is_some()
409 || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))
410 }) {
411 y.as_vec()
413 .unwrap()
414 .iter()
415 .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer, false))
416 .collect()
417 } else {
418 let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer, false)?;
419 Ok(vec![reference])
420 }
421 }
422
423 fn parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError> {
424 let regex = Self::resolve_variables(raw_regex, state);
425 let regex = replace_posix_char_classes(regex);
426 let regex = if state.lines_include_newline {
427 regex_for_newlines(regex)
428 } else {
429 regex_for_no_newlines(regex)
433 };
434 Self::try_compile_regex(®ex)?;
435 Ok(regex)
436 }
437
438 fn resolve_variables(raw_regex: &str, state: &ParserState<'_>) -> String {
439 let mut result = String::new();
440 let mut index = 0;
441 let mut region = Region::new();
442 while state
443 .variable_regex
444 .search(raw_regex, index, raw_regex.len(), Some(&mut region))
445 {
446 let (begin, end) = region.pos(0).unwrap();
447
448 result.push_str(&raw_regex[index..begin]);
449
450 let var_pos = region.pos(1).unwrap();
451 let var_name = &raw_regex[var_pos.0..var_pos.1];
452 let var_raw = state
453 .variables
454 .get(var_name)
455 .map(String::as_ref)
456 .unwrap_or("");
457 let var_resolved = Self::resolve_variables(var_raw, state);
458 result.push_str(&var_resolved);
459
460 index = end;
461 }
462 if index < raw_regex.len() {
463 result.push_str(&raw_regex[index..]);
464 }
465 result
466 }
467
468 fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
469 let regex_str =
471 substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));
472
473 if let Some(error) = Regex::try_compile(®ex_str) {
474 Err(ParseSyntaxError::RegexCompileError(regex_str, error))
475 } else {
476 Ok(())
477 }
478 }
479
480 fn parse_captures(
481 map: &Hash,
482 regex_str: &str,
483 state: &mut ParserState<'_>,
484 ) -> Result<CaptureMapping, ParseSyntaxError> {
485 let valid_indexes = get_consuming_capture_indexes(regex_str);
486 let mut captures = Vec::new();
487 for (key, value) in map.iter() {
488 if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) {
489 if valid_indexes.contains(&(key_int as usize)) {
490 captures.push((key_int as usize, str_to_scopes(val_str, state.scope_repo)?));
491 }
492 }
493 }
494 Ok(captures)
495 }
496
497 fn add_initial_contexts(
504 contexts: &mut HashMap<String, Context>,
505 state: &mut ParserState<'_>,
506 top_level_scope: Scope,
507 ) {
508 let yaml_docs = YamlLoader::load_from_str(START_CONTEXT).unwrap();
509 let yaml = &yaml_docs[0];
510
511 let start_yaml: &[Yaml] = yaml["__start"].as_vec().unwrap();
512 SyntaxDefinition::parse_context(
513 start_yaml,
514 state,
515 contexts,
516 false,
517 &mut ContextNamer::new("__start"),
518 )
519 .unwrap();
520 if let Some(start) = contexts.get_mut("__start") {
521 start.meta_content_scope = vec![top_level_scope];
522 }
523
524 let main_yaml: &[Yaml] = yaml["__main"].as_vec().unwrap();
525 SyntaxDefinition::parse_context(
526 main_yaml,
527 state,
528 contexts,
529 false,
530 &mut ContextNamer::new("__main"),
531 )
532 .unwrap();
533
534 let meta_include_prototype = contexts["main"].meta_include_prototype;
535 let meta_scope = contexts["main"].meta_scope.clone();
536 let meta_content_scope = contexts["main"].meta_content_scope.clone();
537
538 if let Some(outer_main) = contexts.get_mut("__main") {
539 outer_main.meta_include_prototype = meta_include_prototype;
540 outer_main.meta_scope = meta_scope;
541 outer_main.meta_content_scope = meta_content_scope;
542 }
543
544 if let Some(main) = contexts.get_mut("main") {
548 main.meta_content_scope.insert(0, top_level_scope);
549 }
550 }
551}
552
553struct ContextNamer {
554 name: String,
555 anonymous_index: Option<usize>,
556}
557
558impl ContextNamer {
559 fn new(name: &str) -> ContextNamer {
560 ContextNamer {
561 name: name.to_string(),
562 anonymous_index: None,
563 }
564 }
565
566 fn next(&mut self) -> String {
567 let name = if let Some(index) = self.anonymous_index {
568 format!("#anon_{}_{}", self.name, index)
569 } else {
570 self.name.clone()
571 };
572
573 self.anonymous_index = Some(self.anonymous_index.map(|i| i + 1).unwrap_or(0));
574 name
575 }
576}
577
578fn replace_posix_char_classes(regex: String) -> String {
583 regex
584 .replace("[:alpha:]", r"\p{L}")
585 .replace("[:alnum:]", r"\p{L}\p{N}")
586 .replace("[:lower:]", r"\p{Ll}")
587 .replace("[:upper:]", r"\p{Lu}")
588 .replace("[:digit:]", r"\p{Nd}")
589}
590
591fn regex_for_newlines(regex: String) -> String {
602 if !regex.contains('$') {
603 return regex;
604 }
605
606 let rewriter = RegexRewriterForNewlines {
607 parser: Parser::new(regex.as_bytes()),
608 };
609 rewriter.rewrite()
610}
611
612struct RegexRewriterForNewlines<'a> {
613 parser: Parser<'a>,
614}
615
616impl RegexRewriterForNewlines<'_> {
617 fn rewrite(mut self) -> String {
618 let mut result = Vec::new();
619
620 while let Some(c) = self.parser.peek() {
621 match c {
622 b'$' => {
623 self.parser.next();
624 result.extend_from_slice(br"(?m:$)");
625 }
626 b'\\' => {
627 self.parser.next();
628 result.push(c);
629 if let Some(c2) = self.parser.peek() {
630 self.parser.next();
631 result.push(c2);
632 }
633 }
634 b'[' => {
635 let (mut content, _) = self.parser.parse_character_class();
636 result.append(&mut content);
637 }
638 _ => {
639 self.parser.next();
640 result.push(c);
641 }
642 }
643 }
644 String::from_utf8(result).unwrap()
645 }
646}
647
648fn regex_for_no_newlines(regex: String) -> String {
657 if !regex.contains(r"\n") {
658 return regex;
659 }
660
661 let regex = regex.replace("(?:\\n)?", "(?:$|)");
664
665 let rewriter = RegexRewriterForNoNewlines {
666 parser: Parser::new(regex.as_bytes()),
667 };
668 rewriter.rewrite()
669}
670
671struct RegexRewriterForNoNewlines<'a> {
672 parser: Parser<'a>,
673}
674
675impl RegexRewriterForNoNewlines<'_> {
676 fn rewrite(mut self) -> String {
677 let mut result = Vec::new();
678 while let Some(c) = self.parser.peek() {
679 match c {
680 b'\\' => {
681 self.parser.next();
682 if let Some(c2) = self.parser.peek() {
683 self.parser.next();
684 let c3 = self.parser.peek();
687 if c2 == b'n' && c3 != Some(b'?') && c3 != Some(b'+') && c3 != Some(b'*') {
688 result.extend_from_slice(b"$");
689 } else {
690 result.push(c);
691 result.push(c2);
692 }
693 } else {
694 result.push(c);
695 }
696 }
697 b'[' => {
698 let (mut content, matches_newline) = self.parser.parse_character_class();
699 if matches_newline && self.parser.peek() != Some(b'?') {
700 result.extend_from_slice(b"(?:");
701 result.append(&mut content);
702 result.extend_from_slice(br"|$)");
703 } else {
704 result.append(&mut content);
705 }
706 }
707 _ => {
708 self.parser.next();
709 result.push(c);
710 }
711 }
712 }
713 String::from_utf8(result).unwrap()
714 }
715}
716
717fn get_consuming_capture_indexes(regex: &str) -> Vec<usize> {
718 let parser = ConsumingCaptureIndexParser {
719 parser: Parser::new(regex.as_bytes()),
720 };
721 parser.get_consuming_capture_indexes()
722}
723
724struct ConsumingCaptureIndexParser<'a> {
725 parser: Parser<'a>,
726}
727
728impl ConsumingCaptureIndexParser<'_> {
729 fn get_consuming_capture_indexes(mut self) -> Vec<usize> {
736 let mut result = Vec::new();
737 let mut stack = Vec::new();
738 let mut cap_num = 0;
739 let mut in_lookaround = false;
740 stack.push(in_lookaround);
741 result.push(cap_num);
742
743 while let Some(c) = self.parser.peek() {
744 match c {
745 b'\\' => {
746 self.parser.next();
747 self.parser.next();
748 }
749 b'[' => {
750 self.parser.parse_character_class();
751 }
752 b'(' => {
753 self.parser.next();
754 stack.push(in_lookaround);
756 if let Some(c2) = self.parser.peek() {
757 if c2 != b'?' {
758 cap_num += 1;
760 if !in_lookaround {
763 result.push(cap_num);
764 }
765 } else {
766 self.parser.next();
767 if let Some(c3) = self.parser.peek() {
768 self.parser.next();
769 if c3 == b'=' || c3 == b'!' {
770 in_lookaround = true;
772 } else if c3 == b'<' {
773 if let Some(c4) = self.parser.peek() {
774 if c4 == b'=' || c4 == b'!' {
775 self.parser.next();
776 in_lookaround = true;
778 }
779 }
780 } else if c3 == b'P' {
781 if let Some(c4) = self.parser.peek() {
782 if c4 == b'<' {
783 cap_num += 1;
785 if !in_lookaround {
788 result.push(cap_num);
789 }
790 }
791 }
792 }
793 }
794 }
795 }
796 }
797 b')' => {
798 if let Some(value) = stack.pop() {
799 in_lookaround = value;
800 }
801 self.parser.next();
802 }
803 _ => {
804 self.parser.next();
805 }
806 }
807 }
808 result
809 }
810}
811
812struct Parser<'a> {
813 bytes: &'a [u8],
814 index: usize,
815}
816
817impl Parser<'_> {
818 fn new(bytes: &[u8]) -> Parser {
819 Parser { bytes, index: 0 }
820 }
821
822 fn peek(&self) -> Option<u8> {
823 self.bytes.get(self.index).copied()
824 }
825
826 fn next(&mut self) {
827 self.index += 1;
828 }
829
830 fn parse_character_class(&mut self) -> (Vec<u8>, bool) {
831 let mut content = Vec::new();
832 let mut negated = false;
833 let mut nesting = 0;
834 let mut matches_newline = false;
835
836 self.next();
837 content.push(b'[');
838 if let Some(b'^') = self.peek() {
839 self.next();
840 content.push(b'^');
841 negated = true;
842 }
843
844 if let Some(b']') = self.peek() {
846 self.next();
847 content.push(b']');
848 }
849
850 while let Some(c) = self.peek() {
851 match c {
852 b'\\' => {
853 self.next();
854 content.push(c);
855 if let Some(c2) = self.peek() {
856 self.next();
857 if c2 == b'n' && !negated && nesting == 0 {
858 matches_newline = true;
859 }
860 content.push(c2);
861 }
862 }
863 b'[' => {
864 self.next();
865 content.push(b'[');
866 nesting += 1;
867 }
868 b']' => {
869 self.next();
870 content.push(b']');
871 if nesting == 0 {
872 break;
873 }
874 nesting -= 1;
875 }
876 _ => {
877 self.next();
878 content.push(c);
879 }
880 }
881 }
882
883 (content, matches_newline)
884 }
885}
886
887#[cfg(test)]
888mod tests {
889 use super::*;
890 use crate::parsing::Scope;
891
892 #[test]
893 fn can_parse() {
894 let defn: SyntaxDefinition = SyntaxDefinition::load_from_str(
895 "name: C\nscope: source.c\ncontexts: {main: []}",
896 false,
897 None,
898 )
899 .unwrap();
900 assert_eq!(defn.name, "C");
901 assert_eq!(defn.scope, Scope::new("source.c").unwrap());
902 let exts_empty: Vec<String> = Vec::new();
903 assert_eq!(defn.file_extensions, exts_empty);
904 assert!(!defn.hidden);
905 assert!(defn.variables.is_empty());
906 let defn2: SyntaxDefinition = SyntaxDefinition::load_from_str(
907 "
908 name: C
909 scope: source.c
910 file_extensions: [c, h]
911 hidden_file_extensions: [k, l]
912 hidden: true
913 variables:
914 ident: '[QY]+'
915 contexts:
916 prototype:
917 - match: lol
918 scope: source.php
919 main:
920 - match: \\b(if|else|for|while|{{ident}})\\b
921 scope: keyword.control.c keyword.looping.c
922 captures:
923 1: meta.preprocessor.c++
924 2: keyword.control.include.c++
925 push: [string, 'scope:source.c#main', 'CSS.sublime-syntax#rule-list-body']
926 with_prototype:
927 - match: wow
928 pop: true
929 - match: '\"'
930 push: string
931 string:
932 - meta_scope: string.quoted.double.c
933 - meta_include_prototype: false
934 - match: \\\\.
935 scope: constant.character.escape.c
936 - match: '\"'
937 pop: true
938 ",
939 false,
940 None,
941 )
942 .unwrap();
943 assert_eq!(defn2.name, "C");
944 let top_level_scope = Scope::new("source.c").unwrap();
945 assert_eq!(defn2.scope, top_level_scope);
946 let exts: Vec<String> = vec!["c", "h", "k", "l"]
947 .into_iter()
948 .map(String::from)
949 .collect();
950 assert_eq!(defn2.file_extensions, exts);
951 assert!(defn2.hidden);
952 assert_eq!(defn2.variables.get("ident").unwrap(), "[QY]+");
953
954 let n: Vec<Scope> = Vec::new();
955 println!("{:?}", defn2);
956 let main = &defn2.contexts["main"];
958 assert_eq!(main.meta_content_scope, vec![top_level_scope]);
959 assert_eq!(main.meta_scope, n);
960 assert!(main.meta_include_prototype);
961
962 assert_eq!(defn2.contexts["__main"].meta_content_scope, n);
963 assert_eq!(
964 defn2.contexts["__start"].meta_content_scope,
965 vec![top_level_scope]
966 );
967
968 assert_eq!(
969 defn2.contexts["string"].meta_scope,
970 vec![Scope::new("string.quoted.double.c").unwrap()]
971 );
972 let first_pattern: &Pattern = &main.patterns[0];
973 match *first_pattern {
974 Pattern::Match(ref match_pat) => {
975 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
976 assert_eq!(
977 &m[0],
978 &(1, vec![Scope::new("meta.preprocessor.c++").unwrap()])
979 );
980 use crate::parsing::syntax_definition::ContextReference::*;
981
982 let expected = MatchOperation::Push(vec![
984 Named("string".to_owned()),
985 ByScope {
986 scope: Scope::new("source.c").unwrap(),
987 sub_context: Some("main".to_owned()),
988 with_escape: false,
989 },
990 File {
991 name: "CSS".to_owned(),
992 sub_context: Some("rule-list-body".to_owned()),
993 with_escape: false,
994 },
995 ]);
996 assert_eq!(
997 format!("{:?}", match_pat.operation),
998 format!("{:?}", expected)
999 );
1000
1001 assert_eq!(
1002 match_pat.scope,
1003 vec![
1004 Scope::new("keyword.control.c").unwrap(),
1005 Scope::new("keyword.looping.c").unwrap()
1006 ]
1007 );
1008
1009 assert!(match_pat.with_prototype.is_some());
1010 }
1011 _ => unreachable!(),
1012 }
1013 }
1014
1015 #[test]
1016 fn can_parse_embed_as_with_prototypes() {
1017 let old_def = SyntaxDefinition::load_from_str(r#"
1018 name: C
1019 scope: source.c
1020 file_extensions: [c, h]
1021 variables:
1022 ident: '[QY]+'
1023 contexts:
1024 main:
1025 - match: '(>)\s*'
1026 captures:
1027 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1028 push:
1029 - [{ meta_include_prototype: false }, { meta_content_scope: 'source.css.embedded.html' }, { match: '(?i)(?=</style)', pop: true }]
1030 - scope:source.css
1031 with_prototype:
1032 - match: (?=(?i)(?=</style))
1033 pop: true
1034 "#,false, None).unwrap();
1035
1036 let mut def_with_embed = SyntaxDefinition::load_from_str(
1037 r#"
1038 name: C
1039 scope: source.c
1040 file_extensions: [c, h]
1041 variables:
1042 ident: '[QY]+'
1043 contexts:
1044 main:
1045 - match: '(>)\s*'
1046 captures:
1047 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1048 embed: scope:source.css
1049 embed_scope: source.css.embedded.html
1050 escape: (?i)(?=</style)
1051 "#,
1052 false,
1053 None,
1054 )
1055 .unwrap();
1056
1057 let def_with_embed_context = def_with_embed.contexts.get_mut("main").unwrap();
1062 if let Pattern::Match(ref mut match_pattern) = def_with_embed_context.patterns[0] {
1063 if let MatchOperation::Push(ref mut context_references) = match_pattern.operation {
1064 if let ContextReference::ByScope {
1065 ref mut with_escape,
1066 ..
1067 } = context_references[1]
1068 {
1069 *with_escape = false;
1070 }
1071 }
1072 }
1073
1074 assert_eq!(old_def.contexts["main"], def_with_embed.contexts["main"]);
1075 }
1076
1077 #[test]
1078 fn errors_on_embed_without_escape() {
1079 let def = SyntaxDefinition::load_from_str(
1080 r#"
1081 name: C
1082 scope: source.c
1083 file_extensions: [c, h]
1084 variables:
1085 ident: '[QY]+'
1086 contexts:
1087 main:
1088 - match: '(>)\s*'
1089 captures:
1090 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1091 embed: scope:source.css
1092 embed_scope: source.css.embedded.html
1093 "#,
1094 false,
1095 None,
1096 );
1097 assert!(def.is_err());
1098 match def.unwrap_err() {
1099 ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"),
1100 _ => unreachable!("Got unexpected ParseSyntaxError"),
1101 }
1102 }
1103
1104 #[test]
1105 fn errors_on_regex_compile_error() {
1106 let def = SyntaxDefinition::load_from_str(
1107 r#"
1108 name: C
1109 scope: source.c
1110 file_extensions: [test]
1111 contexts:
1112 main:
1113 - match: '[a'
1114 scope: keyword.name
1115 "#,
1116 false,
1117 None,
1118 );
1119 assert!(def.is_err());
1120 match def.unwrap_err() {
1121 ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
1122 _ => unreachable!("Got unexpected ParseSyntaxError"),
1123 }
1124 }
1125
1126 #[test]
1127 fn can_parse_ugly_yaml() {
1128 let defn: SyntaxDefinition = SyntaxDefinition::load_from_str(
1129 "
1130 name: LaTeX
1131 scope: text.tex.latex
1132 contexts:
1133 main:
1134 - match: '((\\\\)(?:framebox|makebox))\\b'
1135 captures:
1136 1: support.function.box.latex
1137 2: punctuation.definition.backslash.latex
1138 push:
1139 - [{meta_scope: meta.function.box.latex}, {match: '', pop: true}]
1140 - argument
1141 - optional-arguments
1142 argument:
1143 - match: '\\{'
1144 scope: punctuation.definition.group.brace.begin.latex
1145 - match: '(?=\\S)'
1146 pop: true
1147 optional-arguments:
1148 - match: '(?=\\S)'
1149 pop: true
1150 ",
1151 false,
1152 None,
1153 )
1154 .unwrap();
1155 assert_eq!(defn.name, "LaTeX");
1156 let top_level_scope = Scope::new("text.tex.latex").unwrap();
1157 assert_eq!(defn.scope, top_level_scope);
1158
1159 let first_pattern: &Pattern = &defn.contexts["main"].patterns[0];
1160 match *first_pattern {
1161 Pattern::Match(ref match_pat) => {
1162 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
1163 assert_eq!(
1164 &m[0],
1165 &(1, vec![Scope::new("support.function.box.latex").unwrap()])
1166 );
1167
1168 assert!(match_pat.with_prototype.is_none());
1174 }
1175 _ => unreachable!(),
1176 }
1177 }
1178
1179 #[test]
1180 fn names_anonymous_contexts() {
1181 let def = SyntaxDefinition::load_from_str(
1182 r#"
1183 scope: source.c
1184 contexts:
1185 main:
1186 - match: a
1187 push: a
1188 a:
1189 - meta_scope: a
1190 - match: x
1191 push:
1192 - meta_scope: anonymous_x
1193 - match: anything
1194 push:
1195 - meta_scope: anonymous_x_2
1196 - match: y
1197 push:
1198 - meta_scope: anonymous_y
1199 - match: z
1200 escape: 'test'
1201 "#,
1202 false,
1203 None,
1204 )
1205 .unwrap();
1206
1207 assert_eq!(def.contexts["a"].meta_scope, vec![Scope::new("a").unwrap()]);
1208 assert_eq!(
1209 def.contexts["#anon_a_0"].meta_scope,
1210 vec![Scope::new("anonymous_x").unwrap()]
1211 );
1212 assert_eq!(
1213 def.contexts["#anon_a_1"].meta_scope,
1214 vec![Scope::new("anonymous_x_2").unwrap()]
1215 );
1216 assert_eq!(
1217 def.contexts["#anon_a_2"].meta_scope,
1218 vec![Scope::new("anonymous_y").unwrap()]
1219 );
1220 assert_eq!(def.contexts["#anon_a_3"].patterns.len(), 1); }
1222
1223 #[test]
1224 fn can_use_fallback_name() {
1225 let def = SyntaxDefinition::load_from_str(
1226 r#"
1227 scope: source.c
1228 contexts:
1229 main:
1230 - match: ''
1231 "#,
1232 false,
1233 Some("C"),
1234 );
1235 assert_eq!(def.unwrap().name, "C");
1236 }
1237
1238 #[test]
1239 fn can_rewrite_regex_for_newlines() {
1240 fn rewrite(s: &str) -> String {
1241 regex_for_newlines(s.to_string())
1242 }
1243
1244 assert_eq!(&rewrite(r"a"), r"a");
1245 assert_eq!(&rewrite(r"\b"), r"\b");
1246 assert_eq!(&rewrite(r"(a)"), r"(a)");
1247 assert_eq!(&rewrite(r"[a]"), r"[a]");
1248 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1249 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1250 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1251
1252 assert_eq!(&rewrite(r"^"), r"^");
1253 assert_eq!(&rewrite(r"$"), r"(?m:$)");
1254 assert_eq!(&rewrite(r"^ab$"), r"^ab(?m:$)");
1255 assert_eq!(&rewrite(r"\^ab\$"), r"\^ab\$");
1256 assert_eq!(&rewrite(r"(//).*$"), r"(//).*(?m:$)");
1257
1258 assert_eq!(&rewrite(r"[a$]"), r"[a$]");
1260 }
1261
1262 #[test]
1263 fn can_rewrite_regex_for_no_newlines() {
1264 fn rewrite(s: &str) -> String {
1265 regex_for_no_newlines(s.to_string())
1266 }
1267
1268 assert_eq!(&rewrite(r"a"), r"a");
1269 assert_eq!(&rewrite(r"\b"), r"\b");
1270 assert_eq!(&rewrite(r"(a)"), r"(a)");
1271 assert_eq!(&rewrite(r"[a]"), r"[a]");
1272 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1273 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1274 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1275
1276 assert_eq!(&rewrite(r"\n"), r"$");
1277 assert_eq!(&rewrite(r"\[\n"), r"\[$");
1278 assert_eq!(&rewrite(r"a\n?"), r"a\n?");
1279 assert_eq!(&rewrite(r"a\n+"), r"a\n+");
1280 assert_eq!(&rewrite(r"a\n*"), r"a\n*");
1281 assert_eq!(&rewrite(r"[abc\n]"), r"(?:[abc\n]|$)");
1282 assert_eq!(&rewrite(r"[^\n]"), r"[^\n]");
1283 assert_eq!(&rewrite(r"[^]\n]"), r"[^]\n]");
1284 assert_eq!(&rewrite(r"[\n]?"), r"[\n]?");
1285 assert_eq!(&rewrite(r"[\n]"), r"(?:[\n]|$)");
1287 assert_eq!(&rewrite(r"[]\n]"), r"(?:[]\n]|$)");
1288 assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");
1290
1291 assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
1292 assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
1293 assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
1294 }
1295
1296 #[test]
1297 fn can_get_valid_captures_from_regex() {
1298 let regex = "hello(test)(?=(world))(foo(?P<named>bar))";
1299 println!("{:?}", regex);
1300 let valid_indexes = get_consuming_capture_indexes(regex);
1301 println!("{:?}", valid_indexes);
1302 assert_eq!(valid_indexes, [0, 1, 3, 4]);
1303 }
1304
1305 #[test]
1306 fn can_get_valid_captures_from_regex2() {
1307 let regex = "hello(test)[(?=tricked](foo(bar))";
1308 println!("{:?}", regex);
1309 let valid_indexes = get_consuming_capture_indexes(regex);
1310 println!("{:?}", valid_indexes);
1311 assert_eq!(valid_indexes, [0, 1, 2, 3]);
1312 }
1313
1314 #[test]
1315 fn can_get_valid_captures_from_nested_regex() {
1316 let regex = "hello(test)(?=(world(?!(te(?<=(st))))))(foo(bar))";
1317 println!("{:?}", regex);
1318 let valid_indexes = get_consuming_capture_indexes(regex);
1319 println!("{:?}", valid_indexes);
1320 assert_eq!(valid_indexes, [0, 1, 5, 6]);
1321 }
1322
1323 #[test]
1324 fn error_loading_syntax_with_unescaped_backslash() {
1325 let load_err = SyntaxDefinition::load_from_str(
1326 r#"
1327 name: Unescaped Backslash
1328 scope: source.c
1329 file_extensions: [test]
1330 contexts:
1331 main:
1332 - match: '\'
1333 "#,
1334 false,
1335 None,
1336 )
1337 .unwrap_err();
1338 match load_err {
1339 ParseSyntaxError::RegexCompileError(bad_regex, _) => assert_eq!(bad_regex, r"\"),
1340 _ => panic!("Unexpected error: {load_err}"),
1341 }
1342 }
1343}