syntect/parsing/
parser.rs

1// Suppression of a false positive clippy lint. Upstream issue:
2//
3//   mutable_key_type false positive for raw pointers
4//   https://github.com/rust-lang/rust-clippy/issues/6745
5//
6// We use `*const MatchPattern` as key in our `SearchCache` hash map.
7// Clippy thinks this is a problem since `MatchPattern` has interior mutability
8// via `MatchPattern::regex::regex` which is an `AtomicLazyCell`.
9// But raw pointers are hashed via the pointer itself, not what is pointed to.
10// See https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/hash/mod.rs#L717-L725
11#![allow(clippy::mutable_key_type)]
12
13use super::regex::Region;
14use super::scope::*;
15use super::syntax_definition::*;
16use crate::parsing::syntax_definition::ContextId;
17use crate::parsing::syntax_set::{SyntaxReference, SyntaxSet};
18use fnv::FnvHasher;
19use std::collections::HashMap;
20use std::hash::BuildHasherDefault;
21
22/// Errors that can occur while parsing.
23#[derive(Debug, thiserror::Error)]
24#[non_exhaustive]
25pub enum ParsingError {
26    #[error("Somehow main context was popped from the stack")]
27    MissingMainContext,
28    /// A context is missing. Usually caused by a syntax referencing a another
29    /// syntax that is not known to syntect. See e.g. <https://github.com/trishume/syntect/issues/421>
30    #[error("Missing context with ID '{0:?}'")]
31    MissingContext(ContextId),
32    #[error("Bad index to match_at: {0}")]
33    BadMatchIndex(usize),
34    #[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
35    UnresolvedContextReference(ContextReference),
36}
37
38/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
39///
40/// If you are parsing an entire file you create one of these at the start and use it
41/// all the way to the end.
42///
43/// # Caching
44///
45/// One reason this is exposed is that since it implements `Clone` you can actually cache
46/// these (probably along with a [`HighlightState`]) and only re-start parsing from the point of a change.
47/// See the docs for [`HighlightState`] for more in-depth discussion of caching.
48///
49/// This state doesn't keep track of the current scope stack and parsing only returns changes to this stack
50/// so if you want to construct scope stacks you'll need to keep track of that as well.
51/// Note that [`HighlightState`] contains exactly this as a public field that you can use.
52///
53/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to do so eventually.
54/// It is not recommended that you try caching the first time you implement highlighting.
55///
56/// [`HighlightState`]: ../highlighting/struct.HighlightState.html
57#[derive(Debug, Clone, Eq, PartialEq)]
58pub struct ParseState {
59    stack: Vec<StateLevel>,
60    first_line: bool,
61    // See issue #101. Contains indices of frames pushed by `with_prototype`s.
62    // Doesn't look at `with_prototype`s below top of stack.
63    proto_starts: Vec<usize>,
64}
65
66#[derive(Debug, Clone, Eq, PartialEq)]
67struct StateLevel {
68    context: ContextId,
69    prototypes: Vec<ContextId>,
70    captures: Option<(Region, String)>,
71}
72
73#[derive(Debug)]
74struct RegexMatch<'a> {
75    regions: Region,
76    context: &'a Context,
77    pat_index: usize,
78    from_with_prototype: bool,
79    would_loop: bool,
80}
81
82/// Maps the pattern to the start index, which is -1 if not found.
83type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;
84
85// To understand the implementation of this, here's an introduction to how
86// Sublime Text syntax definitions work.
87//
88// Let's say we have the following made-up syntax definition:
89//
90//     contexts:
91//       main:
92//         - match: A
93//           scope: scope.a.first
94//           push: context-a
95//         - match: b
96//           scope: scope.b
97//         - match: \w+
98//           scope: scope.other
99//       context-a:
100//         - match: a+
101//           scope: scope.a.rest
102//         - match: (?=.)
103//           pop: true
104//
105// There are two contexts, `main` and `context-a`. Each context contains a list
106// of match rules with instructions for how to proceed.
107//
108// Let's say we have the input string " Aaaabxxx". We start at position 0 in
109// the string. We keep a stack of contexts, which at the beginning is just main.
110//
111// So we start by looking at the top of the context stack (main), and look at
112// the rules in order. The rule that wins is the first one that matches
113// "earliest" in the input string. In our example:
114//
115// 1. The first one matches "A". Note that matches are not anchored, so this
116//    matches at position 1.
117// 2. The second one matches "b", so position 5. The first rule is winning.
118// 3. The third one matches "\w+", so also position 1. But because the first
119//    rule comes first, it wins.
120//
121// So now we execute the winning rule. Whenever we matched some text, we assign
122// the scope (if there is one) to the matched text and advance our position to
123// after the matched text. The scope is "scope.a.first" and our new position is
124// after the "A", so 2. The "push" means that we should change our stack by
125// pushing `context-a` on top of it.
126//
127// In the next step, we repeat the above, but now with the rules in `context-a`.
128// The result is that we match "a+" and assign "scope.a.rest" to "aaa", and our
129// new position is now after the "aaa". Note that there was no instruction for
130// changing the stack, so we stay in that context.
131//
132// In the next step, the first rule doesn't match anymore, so we go to the next
133// rule where "(?=.)" matches. The instruction is to "pop", which means we
134// pop the top of our context stack, which means we're now back in main.
135//
136// This time in main, we match "b", and in the next step we match the rest with
137// "\w+", and we're done.
138//
139//
140// ## Preventing loops
141//
142// These are the basics of how matching works. Now, you saw that you can write
143// patterns that result in an empty match and don't change the position. These
144// are called non-consuming matches. The problem with them is that they could
145// result in infinite loops. Let's look at a syntax where that is the case:
146//
147//     contexts:
148//       main:
149//         - match: (?=.)
150//           push: test
151//       test:
152//         - match: \w+
153//           scope: word
154//         - match: (?=.)
155//           pop: true
156//
157// This is a bit silly, but it's a minimal example for explaining how matching
158// works in that case.
159//
160// Let's say we have the input string " hello". In `main`, our rule matches and
161// we go into `test` and stay at position 0. Now, the best match is the rule
162// with "pop". But if we used that rule, we'd pop back to `main` and would still
163// be at the same position we started at! So this would be an infinite loop,
164// which we don't want.
165//
166// So what Sublime Text does in case a looping rule "won":
167//
168// * If there's another rule that matches at the same position and does not
169//   result in a loop, use that instead.
170// * Otherwise, go to the next position and go through all the rules in the
171//   current context again. Note that it means that the "pop" could again be the
172//   winning rule, but that's ok as it wouldn't result in a loop anymore.
173//
174// So in our input string, we'd skip one character and try to match the rules
175// again. This time, the "\w+" wins because it comes first.
176
177impl ParseState {
178    /// Creates a state from a syntax definition, keeping its own reference-counted point to the
179    /// main context of the syntax
180    pub fn new(syntax: &SyntaxReference) -> ParseState {
181        let start_state = StateLevel {
182            context: syntax.context_ids()["__start"],
183            prototypes: Vec::new(),
184            captures: None,
185        };
186        ParseState {
187            stack: vec![start_state],
188            first_line: true,
189            proto_starts: Vec::new(),
190        }
191    }
192
193    /// Parses a single line of the file. Because of the way regex engines work you unfortunately
194    /// have to pass in a single line contiguous in memory. This can be bad for really long lines.
195    /// Sublime Text avoids this by just not highlighting lines that are too long (thousands of characters).
196    ///
197    /// For efficiency reasons this returns only the changes to the current scope at each point in the line.
198    /// You can use [`ScopeStack::apply`] on each operation in succession to get the stack for a given point.
199    /// Look at the code in `highlighter.rs` for an example of doing this for highlighting purposes.
200    ///
201    /// The returned vector is in order both by index to apply at (the `usize`) and also by order to apply them at a
202    /// given index (e.g popping old scopes before pushing new scopes).
203    ///
204    /// The [`SyntaxSet`] has to be the one that contained the syntax that was used to construct
205    /// this [`ParseState`], or an extended version of it. Otherwise the parsing would return the
206    /// wrong result or even panic. The reason for this is that contexts within the [`SyntaxSet`]
207    /// are referenced via indexes.
208    ///
209    /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
210    /// [`SyntaxSet`]: struct.SyntaxSet.html
211    /// [`ParseState`]: struct.ParseState.html
212    pub fn parse_line(
213        &mut self,
214        line: &str,
215        syntax_set: &SyntaxSet,
216    ) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
217        if self.stack.is_empty() {
218            return Err(ParsingError::MissingMainContext);
219        }
220        let mut match_start = 0;
221        let mut res = Vec::new();
222
223        if self.first_line {
224            let cur_level = &self.stack[self.stack.len() - 1];
225            let context = syntax_set.get_context(&cur_level.context)?;
226            if !context.meta_content_scope.is_empty() {
227                res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
228            }
229            self.first_line = false;
230        }
231
232        let mut regions = Region::new();
233        let fnv = BuildHasherDefault::<FnvHasher>::default();
234        let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
235        // Used for detecting loops with push/pop, see long comment above.
236        let mut non_consuming_push_at = (0, 0);
237
238        while self.parse_next_token(
239            line,
240            syntax_set,
241            &mut match_start,
242            &mut search_cache,
243            &mut regions,
244            &mut non_consuming_push_at,
245            &mut res,
246        )? {}
247
248        Ok(res)
249    }
250
251    #[allow(clippy::too_many_arguments)]
252    fn parse_next_token(
253        &mut self,
254        line: &str,
255        syntax_set: &SyntaxSet,
256        start: &mut usize,
257        search_cache: &mut SearchCache,
258        regions: &mut Region,
259        non_consuming_push_at: &mut (usize, usize),
260        ops: &mut Vec<(usize, ScopeStackOp)>,
261    ) -> Result<bool, ParsingError> {
262        let check_pop_loop = {
263            let (pos, stack_depth) = *non_consuming_push_at;
264            pos == *start && stack_depth == self.stack.len()
265        };
266
267        // Trim proto_starts that are no longer valid
268        while self
269            .proto_starts
270            .last()
271            .map(|start| *start >= self.stack.len())
272            .unwrap_or(false)
273        {
274            self.proto_starts.pop();
275        }
276
277        let best_match = self.find_best_match(
278            line,
279            *start,
280            syntax_set,
281            search_cache,
282            regions,
283            check_pop_loop,
284        )?;
285
286        if let Some(reg_match) = best_match {
287            if reg_match.would_loop {
288                // A push that doesn't consume anything (a regex that resulted
289                // in an empty match at the current position) can not be
290                // followed by a non-consuming pop. Otherwise we're back where
291                // we started and would try the same sequence of matches again,
292                // resulting in an infinite loop. In this case, Sublime Text
293                // advances one character and tries again, thus preventing the
294                // loop.
295
296                // println!("pop_would_loop for match {:?}, start {}", reg_match, *start);
297
298                // nth(1) gets the next character if there is one. Need to do
299                // this instead of just += 1 because we have byte indices and
300                // unicode characters can be more than 1 byte.
301                if let Some((i, _)) = line[*start..].char_indices().nth(1) {
302                    *start += i;
303                    return Ok(true);
304                } else {
305                    // End of line, no character to advance and no point trying
306                    // any more patterns.
307                    return Ok(false);
308                }
309            }
310
311            let match_end = reg_match.regions.pos(0).unwrap().1;
312
313            let consuming = match_end > *start;
314            if !consuming {
315                // The match doesn't consume any characters. If this is a
316                // "push", remember the position and stack size so that we can
317                // check the next "pop" for loops. Otherwise leave the state,
318                // e.g. non-consuming "set" could also result in a loop.
319                let context = reg_match.context;
320                let match_pattern = context.match_at(reg_match.pat_index)?;
321                if let MatchOperation::Push(_) = match_pattern.operation {
322                    *non_consuming_push_at = (match_end, self.stack.len() + 1);
323                }
324            }
325
326            *start = match_end;
327
328            // ignore `with_prototype`s below this if a context is pushed
329            if reg_match.from_with_prototype {
330                // use current height, since we're before the actual push
331                self.proto_starts.push(self.stack.len());
332            }
333
334            let level_context = {
335                let id = &self.stack[self.stack.len() - 1].context;
336                syntax_set.get_context(id)?
337            };
338            self.exec_pattern(line, &reg_match, level_context, syntax_set, ops)?;
339
340            Ok(true)
341        } else {
342            Ok(false)
343        }
344    }
345
346    fn find_best_match<'a>(
347        &self,
348        line: &str,
349        start: usize,
350        syntax_set: &'a SyntaxSet,
351        search_cache: &mut SearchCache,
352        regions: &mut Region,
353        check_pop_loop: bool,
354    ) -> Result<Option<RegexMatch<'a>>, ParsingError> {
355        let cur_level = &self.stack[self.stack.len() - 1];
356        let context = syntax_set.get_context(&cur_level.context)?;
357        let prototype = if let Some(ref p) = context.prototype {
358            Some(p)
359        } else {
360            None
361        };
362
363        // Build an iterator for the contexts we want to visit in order
364        let context_chain = {
365            let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
366            // Sublime applies with_prototypes from bottom to top
367            let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| {
368                lvl.prototypes
369                    .iter()
370                    .map(move |ctx| (true, ctx, lvl.captures.as_ref()))
371            });
372            let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
373            let cur_context =
374                Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
375            with_prototypes.chain(cur_prototype).chain(cur_context)
376        };
377
378        // println!("{:#?}", cur_level);
379        // println!("token at {} on {}", start, line.trim_right());
380
381        let mut min_start = usize::MAX;
382        let mut best_match: Option<RegexMatch<'_>> = None;
383        let mut pop_would_loop = false;
384
385        for (from_with_proto, ctx, captures) in context_chain {
386            for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
387                let match_pat = pat_context.match_at(pat_index)?;
388
389                if let Some(match_region) =
390                    self.search(line, start, match_pat, captures, search_cache, regions)
391                {
392                    let (match_start, match_end) = match_region.pos(0).unwrap();
393
394                    // println!("matched pattern {:?} at start {} end {} (pop would loop: {}, min start: {}, initial start: {}, check_pop_loop: {}, stack_len: {})", match_pat, match_start, match_end, pop_would_loop, min_start, start, check_pop_loop, self.stack.len());
395
396                    if match_start < min_start || (match_start == min_start && pop_would_loop) {
397                        // New match is earlier in text than old match,
398                        // or old match was a looping pop at the same
399                        // position.
400
401                        // println!("setting as current match");
402
403                        min_start = match_start;
404
405                        let consuming = match_end > start;
406                        pop_would_loop = check_pop_loop
407                            && !consuming
408                            && matches!(match_pat.operation, MatchOperation::Pop);
409
410                        let push_too_deep = matches!(match_pat.operation, MatchOperation::Push(_))
411                            && self.stack.len() >= 100;
412
413                        if push_too_deep {
414                            return Ok(None);
415                        }
416
417                        best_match = Some(RegexMatch {
418                            regions: match_region,
419                            context: pat_context,
420                            pat_index,
421                            from_with_prototype: from_with_proto,
422                            would_loop: pop_would_loop,
423                        });
424
425                        if match_start == start && !pop_would_loop {
426                            // We're not gonna find a better match after this,
427                            // so as an optimization we can stop matching now.
428                            return Ok(best_match);
429                        }
430                    }
431                }
432            }
433        }
434        Ok(best_match)
435    }
436
437    fn search(
438        &self,
439        line: &str,
440        start: usize,
441        match_pat: &MatchPattern,
442        captures: Option<&(Region, String)>,
443        search_cache: &mut SearchCache,
444        regions: &mut Region,
445    ) -> Option<Region> {
446        // println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
447        let match_ptr = match_pat as *const MatchPattern;
448
449        if let Some(maybe_region) = search_cache.get(&match_ptr) {
450            if let Some(ref region) = *maybe_region {
451                let match_start = region.pos(0).unwrap().0;
452                if match_start >= start {
453                    // Cached match is valid, return it. Otherwise do another
454                    // search below.
455                    return Some(region.clone());
456                }
457            } else {
458                // Didn't find a match earlier, so no point trying to match it again
459                return None;
460            }
461        }
462
463        let (regex, can_cache) = match (match_pat.has_captures, captures) {
464            (true, Some(captures)) => {
465                let (region, s) = captures;
466                (&match_pat.regex_with_refs(region, s), false)
467            }
468            _ => (match_pat.regex(), true),
469        };
470        // print!("  executing regex: {:?} at pos {} on line {}", regex.regex_str(), start, line);
471        let matched = regex.search(line, start, line.len(), Some(regions));
472
473        if matched {
474            let (match_start, match_end) = regions.pos(0).unwrap();
475            // this is necessary to avoid infinite looping on dumb patterns
476            let does_something = match match_pat.operation {
477                MatchOperation::None => match_start != match_end,
478                MatchOperation::Push(_) => self.stack.len() < 100,
479                _ => true,
480            };
481            if can_cache && does_something {
482                search_cache.insert(match_pat, Some(regions.clone()));
483            }
484            if does_something {
485                // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
486                return Some(regions.clone());
487            }
488        } else if can_cache {
489            search_cache.insert(match_pat, None);
490        }
491        None
492    }
493
494    /// Returns true if the stack was changed
495    fn exec_pattern<'a>(
496        &mut self,
497        line: &str,
498        reg_match: &RegexMatch<'a>,
499        level_context: &'a Context,
500        syntax_set: &'a SyntaxSet,
501        ops: &mut Vec<(usize, ScopeStackOp)>,
502    ) -> Result<bool, ParsingError> {
503        let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
504        let context = reg_match.context;
505        let pat = context.match_at(reg_match.pat_index)?;
506        // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation);
507
508        self.push_meta_ops(
509            true,
510            match_start,
511            level_context,
512            &pat.operation,
513            syntax_set,
514            ops,
515        )?;
516        for s in &pat.scope {
517            // println!("pushing {:?} at {}", s, match_start);
518            ops.push((match_start, ScopeStackOp::Push(*s)));
519        }
520        if let Some(ref capture_map) = pat.captures {
521            // captures could appear in an arbitrary order, have to produce ops in right order
522            // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first
523            // we don't have to handle a capture matching multiple times, Sublime doesn't
524            let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
525            for &(cap_index, ref scopes) in capture_map.iter() {
526                if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
527                    // marking up empty captures causes pops to be sorted wrong
528                    if cap_start == cap_end {
529                        continue;
530                    }
531                    // println!("capture {:?} at {:?}-{:?}", scopes[0], cap_start, cap_end);
532                    for scope in scopes.iter() {
533                        map.push((
534                            (cap_start, -((cap_end - cap_start) as i32)),
535                            ScopeStackOp::Push(*scope),
536                        ));
537                    }
538                    map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
539                }
540            }
541            map.sort_by(|a, b| a.0.cmp(&b.0));
542            for ((index, _), op) in map.into_iter() {
543                ops.push((index, op));
544            }
545        }
546        if !pat.scope.is_empty() {
547            // println!("popping at {}", match_end);
548            ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
549        }
550        self.push_meta_ops(
551            false,
552            match_end,
553            level_context,
554            &pat.operation,
555            syntax_set,
556            ops,
557        )?;
558
559        self.perform_op(line, &reg_match.regions, pat, syntax_set)
560    }
561
562    fn push_meta_ops(
563        &self,
564        initial: bool,
565        index: usize,
566        cur_context: &Context,
567        match_op: &MatchOperation,
568        syntax_set: &SyntaxSet,
569        ops: &mut Vec<(usize, ScopeStackOp)>,
570    ) -> Result<(), ParsingError> {
571        // println!("metas ops for {:?}, initial: {}",
572        //          match_op,
573        //          initial);
574        // println!("{:?}", cur_context.meta_scope);
575        match *match_op {
576            MatchOperation::Pop => {
577                let v = if initial {
578                    &cur_context.meta_content_scope
579                } else {
580                    &cur_context.meta_scope
581                };
582                if !v.is_empty() {
583                    ops.push((index, ScopeStackOp::Pop(v.len())));
584                }
585
586                // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied
587                if !initial && cur_context.clear_scopes.is_some() {
588                    ops.push((index, ScopeStackOp::Restore))
589                }
590            }
591            // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops
592            // - the meta_content_scope of the current context is applied to the matched thing, unlike pop
593            // - the clear_scopes are applied after the matched token, unlike push
594            // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope.
595            MatchOperation::Push(ref context_refs) | MatchOperation::Set(ref context_refs) => {
596                let is_set = matches!(*match_op, MatchOperation::Set(_));
597                // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context
598                if initial {
599                    if is_set && cur_context.clear_scopes.is_some() {
600                        // cleared scopes from the old context are restored immediately
601                        ops.push((index, ScopeStackOp::Restore));
602                    }
603                    // add each context's meta scope
604                    for r in context_refs.iter() {
605                        let ctx = r.resolve(syntax_set)?;
606
607                        if !is_set {
608                            if let Some(clear_amount) = ctx.clear_scopes {
609                                ops.push((index, ScopeStackOp::Clear(clear_amount)));
610                            }
611                        }
612
613                        for scope in ctx.meta_scope.iter() {
614                            ops.push((index, ScopeStackOp::Push(*scope)));
615                        }
616                    }
617                } else {
618                    let repush = (is_set
619                        && (!cur_context.meta_scope.is_empty()
620                            || !cur_context.meta_content_scope.is_empty()))
621                        || context_refs.iter().any(|r| {
622                            let ctx = r.resolve(syntax_set).unwrap();
623
624                            !ctx.meta_content_scope.is_empty()
625                                || (ctx.clear_scopes.is_some() && is_set)
626                        });
627                    if repush {
628                        // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order
629                        let mut num_to_pop: usize = context_refs
630                            .iter()
631                            .map(|r| {
632                                let ctx = r.resolve(syntax_set).unwrap();
633                                ctx.meta_scope.len()
634                            })
635                            .sum();
636
637                        // also pop off the original context's meta scopes
638                        if is_set {
639                            num_to_pop +=
640                                cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
641                        }
642
643                        // do all the popping as one operation
644                        if num_to_pop > 0 {
645                            ops.push((index, ScopeStackOp::Pop(num_to_pop)));
646                        }
647
648                        // now we push meta scope and meta context scope for each context pushed
649                        for r in context_refs {
650                            let ctx = r.resolve(syntax_set)?;
651
652                            // for some reason, contrary to my reading of the docs, set does this after the token
653                            if is_set {
654                                if let Some(clear_amount) = ctx.clear_scopes {
655                                    ops.push((index, ScopeStackOp::Clear(clear_amount)));
656                                }
657                            }
658
659                            for scope in ctx.meta_scope.iter() {
660                                ops.push((index, ScopeStackOp::Push(*scope)));
661                            }
662                            for scope in ctx.meta_content_scope.iter() {
663                                ops.push((index, ScopeStackOp::Push(*scope)));
664                            }
665                        }
666                    }
667                }
668            }
669            MatchOperation::None => (),
670        }
671
672        Ok(())
673    }
674
675    /// Returns true if the stack was changed
676    fn perform_op(
677        &mut self,
678        line: &str,
679        regions: &Region,
680        pat: &MatchPattern,
681        syntax_set: &SyntaxSet,
682    ) -> Result<bool, ParsingError> {
683        let (ctx_refs, old_proto_ids) = match pat.operation {
684            MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
685            MatchOperation::Set(ref ctx_refs) => {
686                // a `with_prototype` stays active when the context is `set`
687                // until the context layer in the stack (where the `with_prototype`
688                // was initially applied) is popped off.
689                (ctx_refs, self.stack.pop().map(|s| s.prototypes))
690            }
691            MatchOperation::Pop => {
692                self.stack.pop();
693                return Ok(true);
694            }
695            MatchOperation::None => return Ok(false),
696        };
697        for (i, r) in ctx_refs.iter().enumerate() {
698            let mut proto_ids = if i == 0 {
699                // it is only necessary to preserve the old prototypes
700                // at the first stack frame pushed
701                old_proto_ids.clone().unwrap_or_else(Vec::new)
702            } else {
703                Vec::new()
704            };
705            if i == ctx_refs.len() - 1 {
706                // if a with_prototype was specified, and multiple contexts were pushed,
707                // then the with_prototype applies only to the last context pushed, i.e.
708                // top most on the stack after all the contexts are pushed - this is also
709                // referred to as the "target" of the push by sublimehq - see
710                // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info
711                if let Some(ref p) = pat.with_prototype {
712                    proto_ids.push(p.id()?);
713                }
714            }
715            let context_id = r.id()?;
716            let context = syntax_set.get_context(&context_id)?;
717            let captures = {
718                let mut uses_backrefs = context.uses_backrefs;
719                if !proto_ids.is_empty() {
720                    uses_backrefs = uses_backrefs
721                        || proto_ids
722                            .iter()
723                            .any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
724                }
725                if uses_backrefs {
726                    Some((regions.clone(), line.to_owned()))
727                } else {
728                    None
729                }
730            };
731            self.stack.push(StateLevel {
732                context: context_id,
733                prototypes: proto_ids,
734                captures,
735            });
736        }
737        Ok(true)
738    }
739}
740
741#[cfg(feature = "yaml-load")]
742#[cfg(test)]
743mod tests {
744    use super::*;
745    use crate::parsing::ScopeStackOp::{Clear, Pop, Push, Restore};
746    use crate::parsing::{Scope, ScopeStack, SyntaxSet, SyntaxSetBuilder};
747    use crate::util::debug_print_ops;
748    use crate::utils::testdata;
749
750    const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");
751    #[test]
752    fn can_parse_simple() {
753        let ss = &*testdata::PACKAGES_SYN_SET;
754        let mut state = {
755            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
756            ParseState::new(syntax)
757        };
758
759        let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
760        let test_ops1 = vec![
761            (0, Push(Scope::new("source.ruby.rails").unwrap())),
762            (0, Push(Scope::new("meta.module.ruby").unwrap())),
763            (0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
764            (6, Pop(2)),
765            (6, Push(Scope::new("meta.module.ruby").unwrap())),
766            (7, Pop(1)),
767            (7, Push(Scope::new("meta.module.ruby").unwrap())),
768            (7, Push(Scope::new("entity.name.module.ruby").unwrap())),
769            (7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
770            (10, Pop(1)),
771            (10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
772        ];
773        assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);
774
775        let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
776        let test_ops2 = vec![
777            (0, Push(Scope::new("meta.function.ruby").unwrap())),
778            (0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
779            (3, Pop(2)),
780            (3, Push(Scope::new("meta.function.ruby").unwrap())),
781            (4, Push(Scope::new("entity.name.function.ruby").unwrap())),
782            (7, Pop(1)),
783        ];
784        assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
785    }
786
787    #[test]
788    fn can_parse_yaml() {
789        let ps = &*testdata::PACKAGES_SYN_SET;
790        let mut state = {
791            let syntax = ps.find_syntax_by_name("YAML").unwrap();
792            ParseState::new(syntax)
793        };
794
795        assert_eq!(
796            ops(&mut state, "key: value\n", &ps),
797            vec![
798                (0, Push(Scope::new("source.yaml").unwrap())),
799                (
800                    0,
801                    Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())
802                ),
803                (0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
804                (3, Pop(2)),
805                (
806                    3,
807                    Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())
808                ),
809                (4, Pop(1)),
810                (
811                    5,
812                    Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())
813                ),
814                (10, Pop(1)),
815            ]
816        );
817    }
818
819    #[test]
820    fn can_parse_includes() {
821        let ss = &*testdata::PACKAGES_SYN_SET;
822        let mut state = {
823            let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
824            ParseState::new(syntax)
825        };
826
827        let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);
828
829        let mut test_stack = ScopeStack::new();
830        test_stack.push(Scope::new("text.html.ruby").unwrap());
831        test_stack.push(Scope::new("text.html.basic").unwrap());
832        test_stack.push(Scope::new("source.js.embedded.html").unwrap());
833        test_stack.push(Scope::new("source.js").unwrap());
834        test_stack.push(Scope::new("string.quoted.single.js").unwrap());
835        test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
836        test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());
837
838        let mut stack = ScopeStack::new();
839        for (_, op) in ops.iter() {
840            stack.apply(op).expect("#[cfg(test)]");
841        }
842        assert_eq!(stack, test_stack);
843    }
844
845    #[test]
846    fn can_parse_backrefs() {
847        let ss = &*testdata::PACKAGES_SYN_SET;
848        let mut state = {
849            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
850            ParseState::new(syntax)
851        };
852
853        // For parsing HEREDOC, the "SQL" is captured at the beginning and then used in another
854        // regex with a backref, to match the end of the HEREDOC. Note that there can be code
855        // after the marker (`.strip`) here.
856        assert_eq!(
857            ops(&mut state, "lol = <<-SQL.strip", &ss),
858            vec![
859                (0, Push(Scope::new("source.ruby.rails").unwrap())),
860                (
861                    4,
862                    Push(Scope::new("keyword.operator.assignment.ruby").unwrap())
863                ),
864                (5, Pop(1)),
865                (
866                    6,
867                    Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())
868                ),
869                (
870                    6,
871                    Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())
872                ),
873                (12, Pop(1)),
874                (12, Pop(1)),
875                (
876                    12,
877                    Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())
878                ),
879                (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
880                (12, Clear(ClearAmount::TopN(2))),
881                (12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
882                (13, Pop(1)),
883                (18, Restore),
884            ]
885        );
886
887        assert_eq!(ops(&mut state, "wow", &ss), vec![]);
888
889        assert_eq!(
890            ops(&mut state, "SQL", &ss),
891            vec![
892                (0, Pop(1)),
893                (
894                    0,
895                    Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())
896                ),
897                (3, Pop(1)),
898                (3, Pop(1)),
899            ]
900        );
901    }
902
903    #[test]
904    fn can_parse_preprocessor_rules() {
905        let ss = &*testdata::PACKAGES_SYN_SET;
906        let mut state = {
907            let syntax = ss.find_syntax_by_name("C").unwrap();
908            ParseState::new(syntax)
909        };
910
911        assert_eq!(
912            ops(&mut state, "#ifdef FOO", &ss),
913            vec![
914                (0, Push(Scope::new("source.c").unwrap())),
915                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
916                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
917                (6, Pop(1)),
918                (10, Pop(1)),
919            ]
920        );
921        assert_eq!(
922            ops(&mut state, "{", &ss),
923            vec![
924                (0, Push(Scope::new("meta.block.c").unwrap())),
925                (
926                    0,
927                    Push(Scope::new("punctuation.section.block.begin.c").unwrap())
928                ),
929                (1, Pop(1)),
930            ]
931        );
932        assert_eq!(
933            ops(&mut state, "#else", &ss),
934            vec![
935                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
936                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
937                (5, Pop(1)),
938                (5, Pop(1)),
939            ]
940        );
941        assert_eq!(
942            ops(&mut state, "{", &ss),
943            vec![
944                (0, Push(Scope::new("meta.block.c").unwrap())),
945                (
946                    0,
947                    Push(Scope::new("punctuation.section.block.begin.c").unwrap())
948                ),
949                (1, Pop(1)),
950            ]
951        );
952        assert_eq!(
953            ops(&mut state, "#endif", &ss),
954            vec![
955                (0, Pop(1)),
956                (0, Push(Scope::new("meta.block.c").unwrap())),
957                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
958                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
959                (6, Pop(2)),
960                (6, Pop(2)),
961                (6, Push(Scope::new("meta.block.c").unwrap())),
962            ]
963        );
964        assert_eq!(
965            ops(&mut state, "    foo;", &ss),
966            vec![
967                (7, Push(Scope::new("punctuation.terminator.c").unwrap())),
968                (8, Pop(1)),
969            ]
970        );
971        assert_eq!(
972            ops(&mut state, "}", &ss),
973            vec![
974                (
975                    0,
976                    Push(Scope::new("punctuation.section.block.end.c").unwrap())
977                ),
978                (1, Pop(1)),
979                (1, Pop(1)),
980            ]
981        );
982    }
983
984    #[test]
985    fn can_parse_issue25() {
986        let ss = &*testdata::PACKAGES_SYN_SET;
987        let mut state = {
988            let syntax = ss.find_syntax_by_name("C").unwrap();
989            ParseState::new(syntax)
990        };
991
992        // test fix for issue #25
993        assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
994    }
995
996    #[test]
997    fn can_compare_parse_states() {
998        let ss = &*testdata::PACKAGES_SYN_SET;
999        let syntax = ss.find_syntax_by_name("Java").unwrap();
1000        let mut state1 = ParseState::new(syntax);
1001        let mut state2 = ParseState::new(syntax);
1002
1003        assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
1004        assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);
1005
1006        assert_eq!(state1, state2);
1007        ops(&mut state1, "}", &ss);
1008        assert_ne!(state1, state2);
1009    }
1010
1011    #[test]
1012    fn can_parse_non_nested_clear_scopes() {
1013        let line = "'hello #simple_cleared_scopes_test world test \\n '";
1014        let expect = [
1015            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1016            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1017            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1018        ];
1019        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1020    }
1021
1022    #[test]
1023    fn can_parse_non_nested_too_many_clear_scopes() {
1024        let line = "'hello #too_many_cleared_scopes_test world test \\n '";
1025        let expect = [
1026            "<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1027            "<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1028            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1029        ];
1030        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1031    }
1032
1033    #[test]
1034    fn can_parse_nested_clear_scopes() {
1035        let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
1036        let expect = [
1037            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1038            "<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
1039            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1040            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1041        ];
1042        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1043    }
1044
1045    #[test]
1046    fn can_parse_infinite_loop() {
1047        let line = "#infinite_loop_test 123";
1048        let expect = ["<source.test>, <constant.numeric.test>"];
1049        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1050    }
1051
1052    #[test]
1053    fn can_parse_infinite_seeming_loop() {
1054        // See https://github.com/SublimeTextIssues/Core/issues/1190 for an
1055        // explanation.
1056        let line = "#infinite_seeming_loop_test hello";
1057        let expect = [
1058            "<source.test>, <keyword.test>",
1059            "<source.test>, <test>, <string.unquoted.test>",
1060            "<source.test>, <test>, <keyword.control.test>",
1061        ];
1062        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1063    }
1064
1065    #[test]
1066    fn can_parse_prototype_that_pops_main() {
1067        let syntax = r#"
1068name: test
1069scope: source.test
1070contexts:
1071  prototype:
1072    # This causes us to pop out of the main context. Sublime Text handles that
1073    # by pushing main back automatically.
1074    - match: (?=!)
1075      pop: true
1076  main:
1077    - match: foo
1078      scope: test.good
1079"#;
1080
1081        let line = "foo!";
1082        let expect = ["<source.test>, <test.good>"];
1083        expect_scope_stacks(line, &expect, syntax);
1084    }
1085
1086    #[test]
1087    fn can_parse_syntax_with_newline_in_character_class() {
1088        let syntax = r#"
1089name: test
1090scope: source.test
1091contexts:
1092  main:
1093    - match: foo[\n]
1094      scope: foo.end
1095    - match: foo
1096      scope: foo.any
1097"#;
1098
1099        let line = "foo";
1100        let expect = ["<source.test>, <foo.end>"];
1101        expect_scope_stacks(line, &expect, syntax);
1102
1103        let line = "foofoofoo";
1104        let expect = [
1105            "<source.test>, <foo.any>",
1106            "<source.test>, <foo.any>",
1107            "<source.test>, <foo.end>",
1108        ];
1109        expect_scope_stacks(line, &expect, syntax);
1110    }
1111
1112    #[test]
1113    fn can_parse_issue120() {
1114        let syntax = SyntaxDefinition::load_from_str(
1115            include_str!("../../testdata/embed_escape_test.sublime-syntax"),
1116            false,
1117            None,
1118        )
1119        .unwrap();
1120
1121        let line1 = "\"abctest\" foobar";
1122        let expect1 = [
1123            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
1124            "<meta.attribute-with-value.style.html>, <source.css>",
1125            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
1126            "<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
1127            "<top-level.test>",
1128        ];
1129
1130        expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());
1131
1132        let line2 = ">abctest</style>foobar";
1133        let expect2 = [
1134            "<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
1135            "<source.css.embedded.html>, <test.embedded>",
1136            "<top-level.test>",
1137        ];
1138        expect_scope_stacks_with_syntax(line2, &expect2, syntax);
1139    }
1140
1141    #[test]
1142    fn can_parse_non_consuming_pop_that_would_loop() {
1143        // See https://github.com/trishume/syntect/issues/127
1144        let syntax = r#"
1145name: test
1146scope: source.test
1147contexts:
1148  main:
1149    # This makes us go into "test" without consuming any characters
1150    - match: (?=hello)
1151      push: test
1152  test:
1153    # If we used this match, we'd go back to "main" without consuming anything,
1154    # and then back into "test", infinitely looping. ST detects this at this
1155    # point and ignores this match until at least one character matched.
1156    - match: (?!world)
1157      pop: true
1158    - match: \w+
1159      scope: test.matched
1160"#;
1161
1162        let line = "hello";
1163        let expect = ["<source.test>, <test.matched>"];
1164        expect_scope_stacks(line, &expect, syntax);
1165    }
1166
1167    #[test]
1168    fn can_parse_non_consuming_set_and_pop_that_would_loop() {
1169        let syntax = r#"
1170name: test
1171scope: source.test
1172contexts:
1173  main:
1174    # This makes us go into "a" without advancing
1175    - match: (?=test)
1176      push: a
1177  a:
1178    # This makes us go into "b" without advancing
1179    - match: (?=t)
1180      set: b
1181  b:
1182    # If we used this match, we'd go back to "main" without having advanced,
1183    # which means we'd have an infinite loop like with the previous test.
1184    # So even for a "set", we have to check if we're advancing or not.
1185    - match: (?=t)
1186      pop: true
1187    - match: \w+
1188      scope: test.matched
1189"#;
1190
1191        let line = "test";
1192        let expect = ["<source.test>, <test.matched>"];
1193        expect_scope_stacks(line, &expect, syntax);
1194    }
1195
1196    #[test]
1197    fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
1198        let syntax = r#"
1199name: test
1200scope: source.test
1201contexts:
1202  main:
1203    # This makes us go into "a", but we consumed a character
1204    - match: t
1205      push: a
1206    - match: \w+
1207      scope: test.matched
1208  a:
1209    # This makes us go into "b" without consuming
1210    - match: (?=e)
1211      set: b
1212  b:
1213    # This match does not result in an infinite loop because we already consumed
1214    # a character to get into "a", so it's ok to pop back into "main".
1215    - match: (?=e)
1216      pop: true
1217"#;
1218
1219        let line = "test";
1220        let expect = ["<source.test>, <test.matched>"];
1221        expect_scope_stacks(line, &expect, syntax);
1222    }
1223
1224    #[test]
1225    fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
1226        let syntax = r#"
1227name: test
1228scope: source.test
1229contexts:
1230  main:
1231    - match: (?=hello)
1232      push: a
1233    - match: \w+
1234      scope: test.matched
1235  a:
1236    - match: h
1237      set: b
1238  b:
1239    - match: (?=e)
1240      set: c
1241  c:
1242    # This is not an infinite loop because "a" consumed a character, so we can
1243    # actually pop back into main and then match the rest of the input.
1244    - match: (?=e)
1245      pop: true
1246"#;
1247
1248        let line = "hello";
1249        let expect = ["<source.test>, <test.matched>"];
1250        expect_scope_stacks(line, &expect, syntax);
1251    }
1252
1253    #[test]
1254    fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
1255        let syntax = r#"
1256name: test
1257scope: source.test
1258contexts:
1259  main:
1260    # This makes us go into "test" without consuming, even at the end of line
1261    - match: ""
1262      push: test
1263  test:
1264    - match: ""
1265      pop: true
1266    - match: \w+
1267      scope: test.matched
1268"#;
1269
1270        let line = "hello";
1271        let expect = ["<source.test>, <test.matched>"];
1272        expect_scope_stacks(line, &expect, syntax);
1273    }
1274
1275    #[test]
1276    fn can_parse_empty_but_consuming_set_that_does_not_loop() {
1277        let syntax = r#"
1278name: test
1279scope: source.test
1280contexts:
1281  main:
1282    - match: (?=hello)
1283      push: a
1284    - match: ello
1285      scope: test.good
1286  a:
1287    # This is an empty match, but it consumed a character (the "h")
1288    - match: (?=e)
1289      set: b
1290  b:
1291    # .. so it's ok to pop back to main from here
1292    - match: ""
1293      pop: true
1294    - match: ello
1295      scope: test.bad
1296"#;
1297
1298        let line = "hello";
1299        let expect = ["<source.test>, <test.good>"];
1300        expect_scope_stacks(line, &expect, syntax);
1301    }
1302
1303    #[test]
1304    fn can_parse_non_consuming_pop_that_does_not_loop() {
1305        let syntax = r#"
1306name: test
1307scope: source.test
1308contexts:
1309  main:
1310    # This is a non-consuming push, so "b" will need to check for a
1311    # non-consuming pop
1312    - match: (?=hello)
1313      push: [b, a]
1314    - match: ello
1315      scope: test.good
1316  a:
1317    # This pop is ok, it consumed "h"
1318    - match: (?=e)
1319      pop: true
1320  b:
1321    # This is non-consuming, and we set to "c"
1322    - match: (?=e)
1323      set: c
1324  c:
1325    # It's ok to pop back to "main" here because we consumed a character in the
1326    # meantime.
1327    - match: ""
1328      pop: true
1329    - match: ello
1330      scope: test.bad
1331"#;
1332
1333        let line = "hello";
1334        let expect = ["<source.test>, <test.good>"];
1335        expect_scope_stacks(line, &expect, syntax);
1336    }
1337
1338    #[test]
1339    fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
1340        let syntax = r#"
1341name: test
1342scope: source.test
1343contexts:
1344  main:
1345    - match: (?=hello)
1346      push: [b, a]
1347    - match: ello
1348      scope: test.good
1349  a:
1350    # This pop is ok, as we're not popping back to "main" yet (which would loop),
1351    # we're popping to "b"
1352    - match: ""
1353      pop: true
1354    - match: \w+
1355      scope: test.bad
1356  b:
1357    - match: \w+
1358      scope: test.good
1359"#;
1360
1361        let line = "hello";
1362        let expect = ["<source.test>, <test.good>"];
1363        expect_scope_stacks(line, &expect, syntax);
1364    }
1365
1366    #[test]
1367    fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
1368        let syntax = r#"
1369name: test
1370scope: source.test
1371contexts:
1372  main:
1373    - match: xxx
1374      scope: test.good
1375    - include: basic-identifiers
1376
1377  basic-identifiers:
1378    - match: '\w+::'
1379      scope: test.matched
1380      push: no-type-names
1381
1382  no-type-names:
1383      - include: basic-identifiers
1384      - match: \w+
1385        scope: test.matched.inside
1386      # This is a tricky one because when this is the best match,
1387      # we have two instances of "no-type-names" on the stack, so we're popping
1388      # back from "no-type-names" to another "no-type-names".
1389      - match: ''
1390        pop: true
1391"#;
1392
1393        let line = "foo::bar::* xxx";
1394        let expect = ["<source.test>, <test.good>"];
1395        expect_scope_stacks(line, &expect, syntax);
1396    }
1397
1398    #[test]
1399    fn can_parse_non_consuming_pop_order() {
1400        let syntax = r#"
1401name: test
1402scope: source.test
1403contexts:
1404  main:
1405    - match: (?=hello)
1406      push: test
1407  test:
1408    # This matches first
1409    - match: (?=e)
1410      push: good
1411    # But this (looping) match replaces it, because it's an earlier match
1412    - match: (?=h)
1413      pop: true
1414    # And this should not replace it, as it's a later match (only matches at
1415    # the same position can replace looping pops).
1416    - match: (?=o)
1417      push: bad
1418  good:
1419    - match: \w+
1420      scope: test.good
1421  bad:
1422    - match: \w+
1423      scope: test.bad
1424"#;
1425
1426        let line = "hello";
1427        let expect = ["<source.test>, <test.good>"];
1428        expect_scope_stacks(line, &expect, syntax);
1429    }
1430
1431    #[test]
1432    fn can_parse_prototype_with_embed() {
1433        let syntax = r#"
1434name: Javadoc
1435scope: text.html.javadoc
1436contexts:
1437  prototype:
1438    - match: \*
1439      scope: punctuation.definition.comment.javadoc
1440
1441  main:
1442    - meta_include_prototype: false
1443    - match: /\*\*
1444      scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
1445      embed: contents
1446      embed_scope: comment.block.documentation.javadoc text.html.javadoc
1447      escape: \*/
1448      escape_captures:
1449        0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc
1450
1451  contents:
1452    - match: ''
1453"#;
1454
1455        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1456        expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
1457    }
1458
1459    #[test]
1460    fn can_parse_context_included_in_prototype_via_named_reference() {
1461        let syntax = r#"
1462scope: source.test
1463contexts:
1464  prototype:
1465    - match: a
1466      push: a
1467    - match: b
1468      scope: test.bad
1469  main:
1470    - match: unused
1471  # This context is included in the prototype (see `push: a`).
1472  # Because of that, ST doesn't apply the prototype to this context, so if
1473  # we're in here the "b" shouldn't match.
1474  a:
1475    - match: a
1476      scope: test.good
1477"#;
1478
1479        let stack_states = stack_states(parse("aa b", syntax));
1480        assert_eq!(
1481            stack_states,
1482            vec![
1483                "<source.test>",
1484                "<source.test>, <test.good>",
1485                "<source.test>",
1486            ],
1487            "Expected test.bad to not match"
1488        );
1489    }
1490
1491    #[test]
1492    fn can_parse_with_prototype_set() {
1493        let syntax = r#"%YAML 1.2
1494---
1495scope: source.test-set-with-proto
1496contexts:
1497  main:
1498    - match: a
1499      scope: a
1500      set: next1
1501      with_prototype:
1502        - match: '1'
1503          scope: '1'
1504        - match: '2'
1505          scope: '2'
1506        - match: '3'
1507          scope: '3'
1508        - match: '4'
1509          scope: '4'
1510    - match: '5'
1511      scope: '5'
1512      set: [next3, next2]
1513      with_prototype:
1514        - match: c
1515          scope: cwith
1516  next1:
1517    - match: b
1518      scope: b
1519      set: next2
1520  next2:
1521    - match: c
1522      scope: c
1523      push: next3
1524    - match: e
1525      scope: e
1526      pop: true
1527    - match: f
1528      scope: f
1529      set: [next1, next2]
1530  next3:
1531    - match: d
1532      scope: d
1533    - match: (?=e)
1534      pop: true
1535    - match: c
1536      scope: cwithout
1537"#;
1538
1539        expect_scope_stacks_with_syntax(
1540            "a1b2c3d4e5",
1541            &[
1542                "<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>",
1543            ],
1544            SyntaxDefinition::load_from_str(syntax, true, None).unwrap(),
1545        );
1546        expect_scope_stacks_with_syntax(
1547            "5cfcecbedcdea",
1548            &[
1549                "<5>",
1550                "<cwith>",
1551                "<f>",
1552                "<e>",
1553                "<b>",
1554                "<d>",
1555                "<cwithout>",
1556                "<a>",
1557            ],
1558            SyntaxDefinition::load_from_str(syntax, true, None).unwrap(),
1559        );
1560    }
1561
1562    #[test]
1563    fn can_parse_issue176() {
1564        let syntax = r#"
1565scope: source.dummy
1566contexts:
1567  main:
1568    - match: (test)(?=(foo))(f)
1569      captures:
1570        1: test
1571        2: ignored
1572        3: f
1573      push:
1574        - match: (oo)
1575          captures:
1576            1: keyword
1577"#;
1578
1579        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1580        expect_scope_stacks_with_syntax(
1581            "testfoo",
1582            &["<test>", /*"<ignored>",*/ "<f>", "<keyword>"],
1583            syntax,
1584        );
1585    }
1586
1587    #[test]
1588    fn can_parse_two_with_prototypes_at_same_stack_level() {
1589        let syntax_yamlstr = r#"
1590%YAML 1.2
1591---
1592# See http://www.sublimetext.com/docs/3/syntax.html
1593scope: source.example-wp
1594contexts:
1595  main:
1596    - match: a
1597      scope: a
1598      push:
1599        - match: b
1600          scope: b
1601          set:
1602            - match: c
1603              scope: c
1604          with_prototype:
1605            - match: '2'
1606              scope: '2'
1607      with_prototype:
1608        - match: '1'
1609          scope: '1'
1610"#;
1611
1612        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1613        expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
1614    }
1615
1616    #[test]
1617    fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
1618        let syntax_yamlstr = r#"
1619%YAML 1.2
1620---
1621# See http://www.sublimetext.com/docs/3/syntax.html
1622scope: source.example-wp
1623contexts:
1624  main:
1625    - match: a
1626      scope: a
1627      push:
1628        - match: b
1629          scope: b
1630          set: [context1, context2, context3]
1631          with_prototype:
1632            - match: '2'
1633              scope: '2'
1634      with_prototype:
1635        - match: '1'
1636          scope: '1'
1637    - match: '1'
1638      scope: digit1
1639    - match: '2'
1640      scope: digit2
1641  context1:
1642    - match: e
1643      scope: e
1644      pop: true
1645    - match: '2'
1646      scope: digit2
1647  context2:
1648    - match: d
1649      scope: d
1650      pop: true
1651    - match: '2'
1652      scope: digit2
1653  context3:
1654    - match: c
1655      scope: c
1656      pop: true
1657"#;
1658
1659        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1660        expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
1661        expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
1662        expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
1663        expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
1664    }
1665
1666    #[test]
1667    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
1668        let syntax_yamlstr = r#"
1669%YAML 1.2
1670---
1671# See http://www.sublimetext.com/docs/3/syntax.html
1672scope: source.example-wp
1673contexts:
1674  main:
1675    - match: (a)
1676      scope: a
1677      push:
1678        - match: (b)
1679          scope: b
1680          set:
1681            - match: c
1682              scope: c
1683          with_prototype:
1684            - match: d
1685              scope: d
1686      with_prototype:
1687        - match: \1
1688          scope: '1'
1689          pop: true
1690"#;
1691
1692        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1693        expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
1694        expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
1695    }
1696
1697    #[test]
1698    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
1699        let syntax_yamlstr = r#"
1700%YAML 1.2
1701---
1702# See http://www.sublimetext.com/docs/3/syntax.html
1703scope: source.example-wp
1704contexts:
1705  main:
1706    - match: (a)(-)
1707      scope: a
1708      push:
1709        - match: (b)
1710          scope: b
1711          set:
1712            - match: c
1713              scope: c
1714          with_prototype:
1715            - match: d
1716              scope: d
1717      with_prototype:
1718        - match: \2
1719          scope: '2'
1720          pop: true
1721        - match: \1
1722          scope: '1'
1723          pop: true
1724"#;
1725
1726        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1727        expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
1728        // it seems that when ST encounters a non existing pop backreference, it just pops back to the with_prototype's original parent context - i.e. cdb is unscoped
1729        // TODO: it would be useful to have syntest functionality available here for easier testing and clarity
1730        expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
1731    }
1732
1733    #[test]
1734    fn can_parse_syntax_with_eol_and_newline() {
1735        let syntax = r#"
1736name: test
1737scope: source.test
1738contexts:
1739  main:
1740    - match: foo$\n
1741      scope: foo.newline
1742"#;
1743
1744        let line = "foo";
1745        let expect = ["<source.test>, <foo.newline>"];
1746        expect_scope_stacks(line, &expect, syntax);
1747    }
1748
1749    #[test]
1750    fn can_parse_syntax_with_eol_only() {
1751        let syntax = r#"
1752name: test
1753scope: source.test
1754contexts:
1755  main:
1756    - match: foo$
1757      scope: foo.newline
1758"#;
1759
1760        let line = "foo";
1761        let expect = ["<source.test>, <foo.newline>"];
1762        expect_scope_stacks(line, &expect, syntax);
1763    }
1764
1765    #[test]
1766    fn can_parse_syntax_with_beginning_of_line() {
1767        let syntax = r#"
1768name: test
1769scope: source.test
1770contexts:
1771  main:
1772    - match: \w+
1773      scope: word
1774      push:
1775        # this should not match at the end of the line
1776        - match: ^\s*$
1777          pop: true
1778        - match: =+
1779          scope: heading
1780          pop: true
1781    - match: .*
1782      scope: other
1783"#;
1784
1785        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1786        let syntax_set = link(syntax_newlines);
1787
1788        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1789        assert_eq!(
1790            ops(&mut state, "foo\n", &syntax_set),
1791            vec![
1792                (0, Push(Scope::new("source.test").unwrap())),
1793                (0, Push(Scope::new("word").unwrap())),
1794                (3, Pop(1))
1795            ]
1796        );
1797        assert_eq!(
1798            ops(&mut state, "===\n", &syntax_set),
1799            vec![(0, Push(Scope::new("heading").unwrap())), (3, Pop(1))]
1800        );
1801
1802        assert_eq!(
1803            ops(&mut state, "bar\n", &syntax_set),
1804            vec![(0, Push(Scope::new("word").unwrap())), (3, Pop(1))]
1805        );
1806        // This should result in popping out of the context
1807        assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
1808        // So now this matches other
1809        assert_eq!(
1810            ops(&mut state, "====\n", &syntax_set),
1811            vec![(0, Push(Scope::new("other").unwrap())), (4, Pop(1))]
1812        );
1813    }
1814
1815    #[test]
1816    fn can_parse_syntax_with_comment_and_eol() {
1817        let syntax = r#"
1818name: test
1819scope: source.test
1820contexts:
1821  main:
1822    - match: (//).*$
1823      scope: comment.line.double-slash
1824"#;
1825
1826        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1827        let syntax_set = link(syntax_newlines);
1828
1829        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1830        assert_eq!(
1831            ops(&mut state, "// foo\n", &syntax_set),
1832            vec![
1833                (0, Push(Scope::new("source.test").unwrap())),
1834                (0, Push(Scope::new("comment.line.double-slash").unwrap())),
1835                // 6 is important here, should not be 7. The pattern should *not* consume the newline,
1836                // but instead match before it. This is important for whitespace-sensitive syntaxes
1837                // where newlines terminate statements such as Scala.
1838                (6, Pop(1))
1839            ]
1840        );
1841    }
1842
1843    #[test]
1844    fn can_parse_text_with_unicode_to_skip() {
1845        let syntax = r#"
1846name: test
1847scope: source.test
1848contexts:
1849  main:
1850    - match: (?=.)
1851      push: test
1852  test:
1853    - match: (?=.)
1854      pop: true
1855    - match: x
1856      scope: test.good
1857"#;
1858
1859        // U+03C0 GREEK SMALL LETTER PI, 2 bytes in UTF-8
1860        expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
1861        // U+0800 SAMARITAN LETTER ALAF, 3 bytes in UTF-8
1862        expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
1863        // U+1F600 GRINNING FACE, 4 bytes in UTF-8
1864        expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
1865    }
1866
1867    #[test]
1868    fn can_include_backrefs() {
1869        let syntax = SyntaxDefinition::load_from_str(
1870            r#"
1871                name: Backref Include Test
1872                scope: source.backrefinc
1873                contexts:
1874                  main:
1875                    - match: (a)
1876                      scope: a
1877                      push: context1
1878                  context1:
1879                    - include: context2
1880                  context2:
1881                    - match: \1
1882                      scope: b
1883                      pop: true
1884                "#,
1885            true,
1886            None,
1887        )
1888        .unwrap();
1889
1890        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1891    }
1892
1893    #[test]
1894    fn can_include_nested_backrefs() {
1895        let syntax = SyntaxDefinition::load_from_str(
1896            r#"
1897                name: Backref Include Test
1898                scope: source.backrefinc
1899                contexts:
1900                  main:
1901                    - match: (a)
1902                      scope: a
1903                      push: context1
1904                  context1:
1905                    - include: context3
1906                  context3:
1907                    - include: context2
1908                  context2:
1909                    - match: \1
1910                      scope: b
1911                      pop: true
1912                "#,
1913            true,
1914            None,
1915        )
1916        .unwrap();
1917
1918        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1919    }
1920
1921    #[test]
1922    fn can_avoid_infinite_stack_depth() {
1923        let syntax = SyntaxDefinition::load_from_str(
1924            r#"
1925                name: Stack Depth Test
1926                scope: source.stack_depth
1927                contexts:
1928                  main:
1929                    - match: (a)
1930                      scope: a
1931                      push: context1
1932
1933                    
1934                  context1:
1935                    - match: b
1936                      scope: b
1937                    - match: ''
1938                      push: context1
1939                    - match: ''
1940                      pop: 1
1941                    - match: c
1942                      scope: c
1943                "#,
1944            true,
1945            None,
1946        )
1947        .unwrap();
1948
1949        let syntax_set = link(syntax);
1950        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1951        expect_scope_stacks_for_ops(ops(&mut state, "a bc\n", &syntax_set), &["<a>"]);
1952        expect_scope_stacks_for_ops(ops(&mut state, "bc\n", &syntax_set), &["<b>"]);
1953    }
1954
1955    fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
1956        println!("Parsing with newlines");
1957        let line_with_newline = format!("{}\n", line_without_newline);
1958        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1959        expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);
1960
1961        println!("Parsing without newlines");
1962        let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
1963        expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
1964    }
1965
1966    fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
1967        // check that each expected scope stack appears at least once while parsing the given test line
1968
1969        let syntax_set = link(syntax);
1970        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1971        let ops = ops(&mut state, line, &syntax_set);
1972        expect_scope_stacks_for_ops(ops, expect);
1973    }
1974
1975    fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
1976        let mut criteria_met = Vec::new();
1977        for stack_str in stack_states(ops) {
1978            println!("{}", stack_str);
1979            for expectation in expect.iter() {
1980                if stack_str.contains(expectation) {
1981                    criteria_met.push(expectation);
1982                }
1983            }
1984        }
1985        if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
1986            panic!("expected scope stack '{}' missing", missing);
1987        }
1988    }
1989
1990    fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
1991        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1992        let syntax_set = link(syntax);
1993
1994        let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
1995        ops(&mut state, line, &syntax_set)
1996    }
1997
1998    fn link(syntax: SyntaxDefinition) -> SyntaxSet {
1999        let mut builder = SyntaxSetBuilder::new();
2000        builder.add(syntax);
2001        builder.build()
2002    }
2003
2004    fn ops(
2005        state: &mut ParseState,
2006        line: &str,
2007        syntax_set: &SyntaxSet,
2008    ) -> Vec<(usize, ScopeStackOp)> {
2009        let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
2010        debug_print_ops(line, &ops);
2011        ops
2012    }
2013
2014    fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
2015        let mut states = Vec::new();
2016        let mut stack = ScopeStack::new();
2017        for (_, op) in ops.iter() {
2018            stack.apply(op).expect("#[cfg(test)]");
2019            let scopes: Vec<String> = stack
2020                .as_slice()
2021                .iter()
2022                .map(|s| format!("{:?}", s))
2023                .collect();
2024            let stack_str = scopes.join(", ");
2025            states.push(stack_str);
2026        }
2027        states
2028    }
2029}
syntect/parsing/parser.rs

syntect/parsing/
parser.rs