syntect/parsing/
syntax_set.rs

1use super::scope::*;
2use super::syntax_definition::*;
3use super::ParsingError;
4
5#[cfg(feature = "metadata")]
6use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
7
8#[cfg(feature = "yaml-load")]
9use super::super::LoadingError;
10
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fs::File;
13use std::io::{self, BufRead, BufReader};
14use std::mem;
15use std::path::Path;
16
17use super::regex::Regex;
18use crate::parsing::syntax_definition::ContextId;
19use once_cell::sync::OnceCell;
20use serde_derive::{Deserialize, Serialize};
21
22/// A syntax set holds multiple syntaxes that have been linked together.
23///
24/// Use a [`SyntaxSetBuilder`] to load syntax definitions and build a syntax set.
25///
26/// After building, the syntax set is immutable and can no longer be modified, but you can convert
27/// it back into a builder by using the [`into_builder`] method.
28///
29/// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
30/// [`into_builder`]: #method.into_builder
31#[derive(Debug, Serialize, Deserialize)]
32pub struct SyntaxSet {
33    syntaxes: Vec<SyntaxReference>,
34    /// Stores the syntax index for every path that was loaded
35    path_syntaxes: Vec<(String, usize)>,
36
37    #[serde(skip_serializing, skip_deserializing, default = "OnceCell::new")]
38    first_line_cache: OnceCell<FirstLineCache>,
39    /// Metadata, e.g. indent and commenting information.
40    ///
41    /// NOTE: if serializing, you should handle metadata manually; that is, you should serialize and
42    /// deserialize it separately. See `examples/gendata.rs` for an example.
43    #[cfg(feature = "metadata")]
44    #[serde(skip, default)]
45    pub(crate) metadata: Metadata,
46}
47
48/// A linked version of a [`SyntaxDefinition`] that is only useful as part of the
49/// [`SyntaxSet`] that contains it. See docs for [`SyntaxSetBuilder::build`] for
50/// more info.
51#[derive(Clone, Debug, Serialize, Deserialize)]
52pub struct SyntaxReference {
53    pub name: String,
54    pub file_extensions: Vec<String>,
55    pub scope: Scope,
56    pub first_line_match: Option<String>,
57    pub hidden: bool,
58    #[serde(serialize_with = "ordered_map")]
59    pub variables: HashMap<String, String>,
60    #[serde(skip)]
61    pub(crate) lazy_contexts: OnceCell<LazyContexts>,
62    pub(crate) serialized_lazy_contexts: Vec<u8>,
63}
64
65/// The lazy-loaded parts of a [`SyntaxReference`].
66#[derive(Clone, Debug, Serialize, Deserialize)]
67pub(crate) struct LazyContexts {
68    #[serde(serialize_with = "ordered_map")]
69    pub(crate) context_ids: HashMap<String, ContextId>,
70    pub(crate) contexts: Vec<Context>,
71}
72
73/// A syntax set builder is used for loading syntax definitions from the file
74/// system or by adding [`SyntaxDefinition`] objects.
75///
76/// Once all the syntaxes have been added, call [`build`] to turn the builder into
77/// a [`SyntaxSet`] that can be used for parsing or highlighting.
78///
79/// [`SyntaxDefinition`]: syntax_definition/struct.SyntaxDefinition.html
80/// [`build`]: #method.build
81/// [`SyntaxSet`]: struct.SyntaxSet.html
82#[derive(Clone, Default)]
83pub struct SyntaxSetBuilder {
84    syntaxes: Vec<SyntaxDefinition>,
85    path_syntaxes: Vec<(String, usize)>,
86    #[cfg(feature = "metadata")]
87    raw_metadata: LoadMetadata,
88
89    /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
90    /// from a `SyntaxSet` that already had metadata, we keep that metadata,
91    /// merging it with newly loaded metadata.
92    #[cfg(feature = "metadata")]
93    existing_metadata: Option<Metadata>,
94}
95
96#[cfg(feature = "yaml-load")]
97fn load_syntax_file(
98    p: &Path,
99    lines_include_newline: bool,
100) -> Result<SyntaxDefinition, LoadingError> {
101    let s = std::fs::read_to_string(p)?;
102
103    SyntaxDefinition::load_from_str(
104        &s,
105        lines_include_newline,
106        p.file_stem().and_then(|x| x.to_str()),
107    )
108    .map_err(|e| LoadingError::ParseSyntax(e, format!("{}", p.display())))
109}
110
111impl Clone for SyntaxSet {
112    fn clone(&self) -> SyntaxSet {
113        SyntaxSet {
114            syntaxes: self.syntaxes.clone(),
115            path_syntaxes: self.path_syntaxes.clone(),
116            // Will need to be re-initialized
117            first_line_cache: OnceCell::new(),
118            #[cfg(feature = "metadata")]
119            metadata: self.metadata.clone(),
120        }
121    }
122}
123
124impl Default for SyntaxSet {
125    fn default() -> Self {
126        SyntaxSet {
127            syntaxes: Vec::new(),
128            path_syntaxes: Vec::new(),
129            first_line_cache: OnceCell::new(),
130            #[cfg(feature = "metadata")]
131            metadata: Metadata::default(),
132        }
133    }
134}
135
136impl SyntaxSet {
137    pub fn new() -> SyntaxSet {
138        SyntaxSet::default()
139    }
140
141    /// Convenience constructor for creating a builder, then loading syntax
142    /// definitions from a folder and then building the syntax set.
143    ///
144    /// Note that this uses `lines_include_newline` set to `false`, see the
145    /// [`add_from_folder`] method docs on [`SyntaxSetBuilder`] for an explanation
146    /// as to why this might not be the best.
147    ///
148    /// [`add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
149    /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
150    #[cfg(feature = "yaml-load")]
151    pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
152        let mut builder = SyntaxSetBuilder::new();
153        builder.add_from_folder(folder, false)?;
154        Ok(builder.build())
155    }
156
157    /// The list of syntaxes in the set
158    pub fn syntaxes(&self) -> &[SyntaxReference] {
159        &self.syntaxes[..]
160    }
161
162    #[cfg(feature = "metadata")]
163    pub fn set_metadata(&mut self, metadata: Metadata) {
164        self.metadata = metadata;
165    }
166
167    /// The loaded metadata for this set.
168    #[cfg(feature = "metadata")]
169    pub fn metadata(&self) -> &Metadata {
170        &self.metadata
171    }
172
173    /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
174    ///
175    /// This and all similar methods below do a linear search of syntaxes, this should be fast
176    /// because there aren't many syntaxes, but don't think you can call it a bajillion times per
177    /// second.
178    pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
179        self.syntaxes.iter().rev().find(|&s| s.scope == scope)
180    }
181
182    pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
183        self.syntaxes.iter().rev().find(|&s| name == s.name)
184    }
185
186    pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
187        self.syntaxes.iter().rev().find(|&s| {
188            s.file_extensions
189                .iter()
190                .any(|e| e.eq_ignore_ascii_case(extension))
191        })
192    }
193
194    /// Searches for a syntax first by extension and then by case-insensitive name
195    ///
196    /// This is useful for things like Github-flavoured-markdown code block highlighting where all
197    /// you have to go on is a short token given by the user
198    pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
199        {
200            let ext_res = self.find_syntax_by_extension(s);
201            if ext_res.is_some() {
202                return ext_res;
203            }
204        }
205        self.syntaxes
206            .iter()
207            .rev()
208            .find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
209    }
210
211    /// Try to find the syntax for a file based on its first line
212    ///
213    /// This uses regexes that come with some sublime syntax grammars for matching things like
214    /// shebangs and mode lines like `-*- Mode: C -*-`
215    pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
216        let s = s.strip_prefix("\u{feff}").unwrap_or(s); // Strip UTF-8 BOM
217        let cache = self.first_line_cache();
218        for &(ref reg, i) in cache.regexes.iter().rev() {
219            if reg.search(s, 0, s.len(), None) {
220                return Some(&self.syntaxes[i]);
221            }
222        }
223        None
224    }
225
226    /// Searches for a syntax by it's original file path when it was first loaded from disk
227    ///
228    /// This is primarily useful for syntax tests. Some may specify a
229    /// `Packages/PackageName/SyntaxName.sublime-syntax` path, and others may just have
230    /// `SyntaxName.sublime-syntax`. This caters for these by matching the end of the path of the
231    /// loaded syntax definition files
232    // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
233    pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
234        let mut slash_path = "/".to_string();
235        slash_path.push_str(path);
236        self.path_syntaxes
237            .iter()
238            .rev()
239            .find(|t| t.0.ends_with(&slash_path) || t.0 == path)
240            .map(|&(_, i)| &self.syntaxes[i])
241    }
242
243    /// Convenience method that tries to find the syntax for a file path, first by extension/name
244    /// and then by first line of the file if that doesn't work.
245    ///
246    /// May IO Error because it sometimes tries to read the first line of the file.
247    ///
248    /// # Examples
249    ///
250    /// When determining how to highlight a file, use this in combination with a fallback to plain
251    /// text:
252    ///
253    /// ```
254    /// use syntect::parsing::SyntaxSet;
255    /// let ss = SyntaxSet::load_defaults_newlines();
256    /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
257    ///     .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
258    ///     .unwrap_or_else(|| ss.find_syntax_plain_text());
259    /// assert_eq!(syntax.name, "HTML (Rails)");
260    /// ```
261    pub fn find_syntax_for_file<P: AsRef<Path>>(
262        &self,
263        path_obj: P,
264    ) -> io::Result<Option<&SyntaxReference>> {
265        let path: &Path = path_obj.as_ref();
266        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
267        let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
268        let ext_syntax = self
269            .find_syntax_by_extension(file_name)
270            .or_else(|| self.find_syntax_by_extension(extension));
271        let line_syntax = if ext_syntax.is_none() {
272            let mut line = String::new();
273            let f = File::open(path)?;
274            let mut line_reader = BufReader::new(&f);
275            line_reader.read_line(&mut line)?;
276            self.find_syntax_by_first_line(&line)
277        } else {
278            None
279        };
280        let syntax = ext_syntax.or(line_syntax);
281        Ok(syntax)
282    }
283
284    /// Finds a syntax for plain text, which usually has no highlighting rules.
285    ///
286    /// This is good as a fallback when you can't find another syntax but you still want to use the
287    /// same highlighting pipeline code.
288    ///
289    /// This syntax should always be present, if not this method will panic. If the way you load
290    /// syntaxes doesn't create one, use [`add_plain_text_syntax`].
291    ///
292    /// # Examples
293    /// ```
294    /// use syntect::parsing::SyntaxSetBuilder;
295    /// let mut builder = SyntaxSetBuilder::new();
296    /// builder.add_plain_text_syntax();
297    /// let ss = builder.build();
298    /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
299    /// assert_eq!(syntax.name, "Plain Text");
300    /// ```
301    ///
302    /// [`add_plain_text_syntax`]: struct.SyntaxSetBuilder.html#method.add_plain_text_syntax
303    pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
304        self.find_syntax_by_name("Plain Text")
305            .expect("All syntax sets ought to have a plain text syntax")
306    }
307
308    /// Converts this syntax set into a builder so that more syntaxes can be
309    /// added to it.
310    ///
311    /// Note that newly added syntaxes can have references to existing syntaxes
312    /// in the set, but not the other way around.
313    pub fn into_builder(self) -> SyntaxSetBuilder {
314        #[cfg(feature = "metadata")]
315        let SyntaxSet {
316            syntaxes,
317            path_syntaxes,
318            metadata,
319            ..
320        } = self;
321        #[cfg(not(feature = "metadata"))]
322        let SyntaxSet {
323            syntaxes,
324            path_syntaxes,
325            ..
326        } = self;
327
328        let mut context_map = HashMap::new();
329        for (syntax_index, syntax) in syntaxes.iter().enumerate() {
330            for (context_index, context) in syntax.contexts().iter().enumerate() {
331                context_map.insert(
332                    ContextId {
333                        syntax_index,
334                        context_index,
335                    },
336                    context.clone(),
337                );
338            }
339        }
340
341        let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
342
343        for syntax in syntaxes {
344            let SyntaxReference {
345                name,
346                file_extensions,
347                scope,
348                first_line_match,
349                hidden,
350                variables,
351                serialized_lazy_contexts,
352                ..
353            } = syntax;
354
355            let lazy_contexts = LazyContexts::deserialize(&serialized_lazy_contexts[..]);
356            let mut builder_contexts = HashMap::with_capacity(lazy_contexts.context_ids.len());
357            for (name, context_id) in lazy_contexts.context_ids {
358                if let Some(context) = context_map.remove(&context_id) {
359                    builder_contexts.insert(name, context);
360                }
361            }
362
363            let syntax_definition = SyntaxDefinition {
364                name,
365                file_extensions,
366                scope,
367                first_line_match,
368                hidden,
369                variables,
370                contexts: builder_contexts,
371            };
372            builder_syntaxes.push(syntax_definition);
373        }
374
375        SyntaxSetBuilder {
376            syntaxes: builder_syntaxes,
377            path_syntaxes,
378            #[cfg(feature = "metadata")]
379            existing_metadata: Some(metadata),
380            #[cfg(feature = "metadata")]
381            raw_metadata: LoadMetadata::default(),
382        }
383    }
384
385    #[inline(always)]
386    pub(crate) fn get_context(&self, context_id: &ContextId) -> Result<&Context, ParsingError> {
387        let syntax = &self
388            .syntaxes
389            .get(context_id.syntax_index)
390            .ok_or(ParsingError::MissingContext(*context_id))?;
391        syntax
392            .contexts()
393            .get(context_id.context_index)
394            .ok_or(ParsingError::MissingContext(*context_id))
395    }
396
397    fn first_line_cache(&self) -> &FirstLineCache {
398        self.first_line_cache
399            .get_or_init(|| FirstLineCache::new(self.syntaxes()))
400    }
401
402    pub fn find_unlinked_contexts(&self) -> BTreeSet<String> {
403        let SyntaxSet { syntaxes, .. } = self;
404
405        let mut unlinked_contexts = BTreeSet::new();
406
407        for syntax in syntaxes {
408            let SyntaxReference { name, scope, .. } = syntax;
409
410            for context in syntax.contexts() {
411                Self::find_unlinked_contexts_in_context(
412                    name,
413                    scope,
414                    context,
415                    &mut unlinked_contexts,
416                );
417            }
418        }
419        unlinked_contexts
420    }
421
422    fn find_unlinked_contexts_in_context(
423        name: &str,
424        scope: &Scope,
425        context: &Context,
426        unlinked_contexts: &mut BTreeSet<String>,
427    ) {
428        for pattern in context.patterns.iter() {
429            let maybe_refs_to_check = match pattern {
430                Pattern::Match(match_pat) => match &match_pat.operation {
431                    MatchOperation::Push(context_refs) => Some(context_refs),
432                    MatchOperation::Set(context_refs) => Some(context_refs),
433                    _ => None,
434                },
435                _ => None,
436            };
437            for context_ref in maybe_refs_to_check.into_iter().flatten() {
438                match context_ref {
439                    ContextReference::Direct(_) => {}
440                    _ => {
441                        unlinked_contexts.insert(format!(
442                            "Syntax '{}' with scope '{}' has unresolved context reference {:?}",
443                            name, scope, &context_ref
444                        ));
445                    }
446                }
447            }
448        }
449    }
450}
451
452impl SyntaxReference {
453    pub(crate) fn context_ids(&self) -> &HashMap<String, ContextId> {
454        &self.lazy_contexts().context_ids
455    }
456
457    fn contexts(&self) -> &[Context] {
458        &self.lazy_contexts().contexts
459    }
460
461    fn lazy_contexts(&self) -> &LazyContexts {
462        self.lazy_contexts
463            .get_or_init(|| LazyContexts::deserialize(&self.serialized_lazy_contexts[..]))
464    }
465}
466
467impl LazyContexts {
468    fn deserialize(data: &[u8]) -> LazyContexts {
469        crate::dumps::from_reader(data).expect("data is not corrupt or out of sync with the code")
470    }
471}
472
473impl SyntaxSetBuilder {
474    pub fn new() -> SyntaxSetBuilder {
475        SyntaxSetBuilder::default()
476    }
477
478    /// Add a syntax to the set.
479    pub fn add(&mut self, syntax: SyntaxDefinition) {
480        self.syntaxes.push(syntax);
481    }
482
483    /// The list of syntaxes added so far.
484    pub fn syntaxes(&self) -> &[SyntaxDefinition] {
485        &self.syntaxes[..]
486    }
487
488    /// A rarely useful method that loads in a syntax with no highlighting rules for plain text
489    ///
490    /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some reason
491    /// the default Sublime plain text syntax is still in `.tmLanguage` format.
492    #[cfg(feature = "yaml-load")]
493    pub fn add_plain_text_syntax(&mut self) {
494        let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
495                 {main: []}";
496        let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
497        self.syntaxes.push(syn);
498    }
499
500    /// Loads all the `.sublime-syntax` files in a folder into this builder.
501    ///
502    /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text
503    /// normally passes line strings including newline characters (`\n`) to its regex engine. This
504    /// results in many syntaxes having regexes matching `\n`, which doesn't work if you don't pass
505    /// in newlines. It is recommended that if you can you pass in lines with newlines if you can
506    /// and pass `true` for this parameter. If that is inconvenient pass `false` and the loader
507    /// will do some hacky find and replaces on the match regexes that seem to work for the default
508    /// syntax set, but may not work for any other syntaxes.
509    ///
510    /// In the future I might include a "slow mode" that copies the lines passed in and appends a
511    /// newline if there isn't one, but in the interest of performance currently this hacky fix will
512    /// have to do.
513    #[cfg(feature = "yaml-load")]
514    pub fn add_from_folder<P: AsRef<Path>>(
515        &mut self,
516        folder: P,
517        lines_include_newline: bool,
518    ) -> Result<(), LoadingError> {
519        for entry in crate::utils::walk_dir(folder).sort_by(|a, b| a.file_name().cmp(b.file_name()))
520        {
521            let entry = entry.map_err(LoadingError::WalkDir)?;
522            if entry
523                .path()
524                .extension()
525                .is_some_and(|e| e == "sublime-syntax")
526            {
527                let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
528                if let Some(path_str) = entry.path().to_str() {
529                    // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
530                    // can still be loaded the same way.
531                    let path = Path::new(path_str);
532                    let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
533                    self.path_syntaxes
534                        .push((path_parts.join("/").to_string(), self.syntaxes.len()));
535                }
536                self.syntaxes.push(syntax);
537            }
538
539            #[cfg(feature = "metadata")]
540            {
541                if entry.path().extension() == Some("tmPreferences".as_ref()) {
542                    match RawMetadataEntry::load(entry.path()) {
543                        Ok(meta) => self.raw_metadata.add_raw(meta),
544                        Err(_err) => (),
545                    }
546                }
547            }
548        }
549
550        Ok(())
551    }
552
553    /// Build a [`SyntaxSet`] from the syntaxes that have been added to this
554    /// builder.
555    ///
556    /// ### Linking
557    ///
558    /// The contexts in syntaxes can reference other contexts in the same syntax
559    /// or even other syntaxes. For example, a HTML syntax can reference a CSS
560    /// syntax so that CSS blocks in HTML work as expected.
561    ///
562    /// Those references work in various ways and involve one or two lookups.
563    /// To avoid having to do these lookups during parsing/highlighting, the
564    /// references are changed to directly reference contexts via index. That's
565    /// called linking.
566    ///
567    /// Linking is done in this build step. So in order to get the best
568    /// performance, you should try to avoid calling this too much. Ideally,
569    /// create a [`SyntaxSet`] once and then use it many times. If you can,
570    /// serialize a [`SyntaxSet`] for your program and when you run the program,
571    /// directly load the [`SyntaxSet`].
572    ///
573    /// [`SyntaxSet`]: struct.SyntaxSet.html
574    pub fn build(self) -> SyntaxSet {
575        #[cfg(not(feature = "metadata"))]
576        let SyntaxSetBuilder {
577            syntaxes: syntax_definitions,
578            path_syntaxes,
579        } = self;
580        #[cfg(feature = "metadata")]
581        let SyntaxSetBuilder {
582            syntaxes: syntax_definitions,
583            path_syntaxes,
584            raw_metadata,
585            existing_metadata,
586        } = self;
587
588        let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
589        let mut all_context_ids = Vec::new();
590        let mut all_contexts = vec![Vec::new(); syntax_definitions.len()];
591
592        for (syntax_index, syntax_definition) in syntax_definitions.into_iter().enumerate() {
593            let SyntaxDefinition {
594                name,
595                file_extensions,
596                scope,
597                first_line_match,
598                hidden,
599                variables,
600                contexts,
601            } = syntax_definition;
602
603            let mut context_ids = HashMap::new();
604
605            let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
606            // Sort the values of the HashMap so that the contexts in the
607            // resulting SyntaxSet have a deterministic order for serializing.
608            // Because we're sorting by the keys which are unique, we can use
609            // an unstable sort.
610            contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(name_b));
611            for (name, context) in contexts {
612                let context_index = all_contexts[syntax_index].len();
613                context_ids.insert(
614                    name,
615                    ContextId {
616                        syntax_index,
617                        context_index,
618                    },
619                );
620                all_contexts[syntax_index].push(context);
621            }
622
623            let syntax = SyntaxReference {
624                name,
625                file_extensions,
626                scope,
627                first_line_match,
628                hidden,
629                variables,
630                lazy_contexts: OnceCell::new(),
631                serialized_lazy_contexts: Vec::new(), // initialized in the last step
632            };
633            syntaxes.push(syntax);
634            all_context_ids.push(context_ids);
635        }
636
637        let mut found_more_backref_includes = true;
638        for (syntax_index, _syntax) in syntaxes.iter().enumerate() {
639            let mut no_prototype = HashSet::new();
640            let prototype = all_context_ids[syntax_index].get("prototype");
641            if let Some(prototype_id) = prototype {
642                // TODO: We could do this after parsing YAML, instead of here?
643                Self::recursively_mark_no_prototype(
644                    prototype_id,
645                    &all_context_ids[syntax_index],
646                    &all_contexts,
647                    &mut no_prototype,
648                );
649            }
650
651            for context_id in all_context_ids[syntax_index].values() {
652                let context = &mut all_contexts[context_id.syntax_index][context_id.context_index];
653                if let Some(prototype_id) = prototype {
654                    if context.meta_include_prototype && !no_prototype.contains(context_id) {
655                        context.prototype = Some(*prototype_id);
656                    }
657                }
658                Self::link_context(context, syntax_index, &all_context_ids, &syntaxes);
659
660                if context.uses_backrefs {
661                    found_more_backref_includes = true;
662                }
663            }
664        }
665
666        // We need to recursively mark contexts that include contexts which
667        // use backreferences as using backreferences. In theory we could use
668        // a more efficient method here like doing a toposort or constructing
669        // a representation with reversed edges and then tracing in the
670        // opposite direction, but I benchmarked this and it adds <2% to link
671        // time on the default syntax set, and linking doesn't even happen
672        // when loading from a binary dump.
673        while found_more_backref_includes {
674            found_more_backref_includes = false;
675            // find any contexts which include a context which uses backrefs
676            // and mark those as using backrefs - to support nested includes
677            for syntax_index in 0..syntaxes.len() {
678                for context_index in 0..all_contexts[syntax_index].len() {
679                    let context = &all_contexts[syntax_index][context_index];
680                    if !context.uses_backrefs && context.patterns.iter().any(|pattern| {
681                        matches!(pattern, Pattern::Include(ContextReference::Direct(id)) if all_contexts[id.syntax_index][id.context_index].uses_backrefs)
682                    }) {
683                        let context = &mut all_contexts[syntax_index][context_index];
684                        context.uses_backrefs = true;
685                        // look for contexts including this context
686                        found_more_backref_includes = true;
687                    }
688                }
689            }
690        }
691
692        #[cfg(feature = "metadata")]
693        let metadata = match existing_metadata {
694            Some(existing) => existing.merged_with_raw(raw_metadata),
695            None => raw_metadata.into(),
696        };
697
698        // The combination of
699        //  * the algorithms above
700        //  * the borrow checker
701        // makes it necessary to set these up as the last step.
702        for syntax in &mut syntaxes {
703            let lazy_contexts = LazyContexts {
704                context_ids: all_context_ids.remove(0),
705                contexts: all_contexts.remove(0),
706            };
707
708            syntax.serialized_lazy_contexts = crate::dumps::dump_binary(&lazy_contexts);
709        }
710
711        SyntaxSet {
712            syntaxes,
713            path_syntaxes,
714            first_line_cache: OnceCell::new(),
715            #[cfg(feature = "metadata")]
716            metadata,
717        }
718    }
719
720    /// Anything recursively included by the prototype shouldn't include the prototype.
721    /// This marks them as such.
722    fn recursively_mark_no_prototype(
723        context_id: &ContextId,
724        syntax_context_ids: &HashMap<String, ContextId>,
725        all_contexts: &[Vec<Context>],
726        no_prototype: &mut HashSet<ContextId>,
727    ) {
728        let first_time = no_prototype.insert(*context_id);
729        if !first_time {
730            return;
731        }
732
733        for pattern in &all_contexts[context_id.syntax_index][context_id.context_index].patterns {
734            match *pattern {
735                // Apparently inline blocks also don't include the prototype when within the prototype.
736                // This is really weird, but necessary to run the YAML syntax.
737                Pattern::Match(ref match_pat) => {
738                    let maybe_context_refs = match match_pat.operation {
739                        MatchOperation::Push(ref context_refs)
740                        | MatchOperation::Set(ref context_refs) => Some(context_refs),
741                        MatchOperation::Pop | MatchOperation::None => None,
742                    };
743                    if let Some(context_refs) = maybe_context_refs {
744                        for context_ref in context_refs.iter() {
745                            match context_ref {
746                                ContextReference::Inline(ref s)
747                                | ContextReference::Named(ref s) => {
748                                    if let Some(i) = syntax_context_ids.get(s) {
749                                        Self::recursively_mark_no_prototype(
750                                            i,
751                                            syntax_context_ids,
752                                            all_contexts,
753                                            no_prototype,
754                                        );
755                                    }
756                                }
757                                ContextReference::Direct(ref id) => {
758                                    Self::recursively_mark_no_prototype(
759                                        id,
760                                        syntax_context_ids,
761                                        all_contexts,
762                                        no_prototype,
763                                    );
764                                }
765                                _ => (),
766                            }
767                        }
768                    }
769                }
770                Pattern::Include(ref reference) => match reference {
771                    ContextReference::Named(ref s) => {
772                        if let Some(id) = syntax_context_ids.get(s) {
773                            Self::recursively_mark_no_prototype(
774                                id,
775                                syntax_context_ids,
776                                all_contexts,
777                                no_prototype,
778                            );
779                        }
780                    }
781                    ContextReference::Direct(ref id) => {
782                        Self::recursively_mark_no_prototype(
783                            id,
784                            syntax_context_ids,
785                            all_contexts,
786                            no_prototype,
787                        );
788                    }
789                    _ => (),
790                },
791            }
792        }
793    }
794
795    fn link_context(
796        context: &mut Context,
797        syntax_index: usize,
798        all_context_ids: &[HashMap<String, ContextId>],
799        syntaxes: &[SyntaxReference],
800    ) {
801        for pattern in &mut context.patterns {
802            match *pattern {
803                Pattern::Match(ref mut match_pat) => {
804                    Self::link_match_pat(match_pat, syntax_index, all_context_ids, syntaxes)
805                }
806                Pattern::Include(ref mut context_ref) => {
807                    Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes)
808                }
809            }
810        }
811    }
812
813    fn link_ref(
814        context_ref: &mut ContextReference,
815        syntax_index: usize,
816        all_context_ids: &[HashMap<String, ContextId>],
817        syntaxes: &[SyntaxReference],
818    ) {
819        // println!("{:?}", context_ref);
820        use super::syntax_definition::ContextReference::*;
821        let linked_context_id = match *context_ref {
822            Named(ref s) | Inline(ref s) => {
823                // This isn't actually correct, but it is better than nothing/crashing.
824                // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
825                // Fixes issue #30
826                if s == "$top_level_main" {
827                    all_context_ids[syntax_index].get("main")
828                } else {
829                    all_context_ids[syntax_index].get(s)
830                }
831            }
832            ByScope {
833                scope,
834                ref sub_context,
835                with_escape,
836            } => Self::with_plain_text_fallback(
837                all_context_ids,
838                syntaxes,
839                with_escape,
840                Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| {
841                    index_and_syntax.1.scope == scope
842                }),
843            ),
844            File {
845                ref name,
846                ref sub_context,
847                with_escape,
848            } => Self::with_plain_text_fallback(
849                all_context_ids,
850                syntaxes,
851                with_escape,
852                Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| {
853                    &index_and_syntax.1.name == name
854                }),
855            ),
856            Direct(_) => None,
857        };
858        if let Some(context_id) = linked_context_id {
859            let mut new_ref = Direct(*context_id);
860            mem::swap(context_ref, &mut new_ref);
861        }
862    }
863
864    fn with_plain_text_fallback<'a>(
865        all_context_ids: &'a [HashMap<String, ContextId>],
866        syntaxes: &'a [SyntaxReference],
867        with_escape: bool,
868        context_id: Option<&'a ContextId>,
869    ) -> Option<&'a ContextId> {
870        context_id.or_else(|| {
871            if with_escape {
872                // If we keep this reference unresolved, syntect will crash
873                // when it encounters the reference. Rather than crashing,
874                // we instead fall back to "Plain Text". This seems to be
875                // how Sublime Text behaves. It should be a safe thing to do
876                // since `embed`s always includes an `escape` to get out of
877                // the `embed`.
878                Self::find_id(&None, all_context_ids, syntaxes, |index_and_syntax| {
879                    index_and_syntax.1.name == "Plain Text"
880                })
881            } else {
882                None
883            }
884        })
885    }
886
887    fn find_id<'a>(
888        sub_context: &Option<String>,
889        all_context_ids: &'a [HashMap<String, ContextId>],
890        syntaxes: &'a [SyntaxReference],
891        predicate: impl FnMut(&(usize, &SyntaxReference)) -> bool,
892    ) -> Option<&'a ContextId> {
893        let context_name = sub_context.as_ref().map_or("main", |x| &**x);
894        syntaxes
895            .iter()
896            .enumerate()
897            .rev()
898            .find(predicate)
899            .and_then(|index_and_syntax| all_context_ids[index_and_syntax.0].get(context_name))
900    }
901
902    fn link_match_pat(
903        match_pat: &mut MatchPattern,
904        syntax_index: usize,
905        all_context_ids: &[HashMap<String, ContextId>],
906        syntaxes: &[SyntaxReference],
907    ) {
908        let maybe_context_refs = match match_pat.operation {
909            MatchOperation::Push(ref mut context_refs)
910            | MatchOperation::Set(ref mut context_refs) => Some(context_refs),
911            MatchOperation::Pop | MatchOperation::None => None,
912        };
913        if let Some(context_refs) = maybe_context_refs {
914            for context_ref in context_refs.iter_mut() {
915                Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes);
916            }
917        }
918        if let Some(ref mut context_ref) = match_pat.with_prototype {
919            Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes);
920        }
921    }
922}
923
924#[derive(Debug)]
925struct FirstLineCache {
926    /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
927    regexes: Vec<(Regex, usize)>,
928}
929
930impl FirstLineCache {
931    fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
932        let mut regexes = Vec::new();
933        for (i, syntax) in syntaxes.iter().enumerate() {
934            if let Some(ref reg_str) = syntax.first_line_match {
935                let reg = Regex::new(reg_str.into());
936                regexes.push((reg, i));
937            }
938        }
939        FirstLineCache { regexes }
940    }
941}
942
943#[cfg(feature = "yaml-load")]
944#[cfg(test)]
945mod tests {
946    use super::*;
947    use crate::{
948        parsing::{syntax_definition, ParseState, Scope},
949        utils::testdata,
950    };
951    use std::collections::HashMap;
952
953    #[test]
954    fn can_load() {
955        let mut builder = testdata::PACKAGES_SYN_SET.to_owned().into_builder();
956
957        let cmake_dummy_syntax = SyntaxDefinition {
958            name: "CMake".to_string(),
959            file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
960            scope: Scope::new("source.cmake").unwrap(),
961            first_line_match: None,
962            hidden: false,
963            variables: HashMap::new(),
964            contexts: HashMap::new(),
965        };
966
967        builder.add(cmake_dummy_syntax);
968        builder.add_plain_text_syntax();
969
970        let ps = builder.build();
971
972        assert_eq!(
973            &ps.find_syntax_by_first_line("#!/usr/bin/env node")
974                .unwrap()
975                .name,
976            "JavaScript"
977        );
978        let rails_scope = Scope::new("source.ruby.rails").unwrap();
979        let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
980        ps.find_syntax_plain_text();
981        assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
982        assert_eq!(&ps.find_syntax_by_extension("RAKE").unwrap().name, "Ruby");
983        assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
984        assert_eq!(
985            &ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line")
986                .unwrap()
987                .name,
988            "C"
989        );
990        assert_eq!(
991            &ps.find_syntax_for_file("testdata/parser.rs")
992                .unwrap()
993                .unwrap()
994                .name,
995            "Rust"
996        );
997        assert_eq!(
998            &ps.find_syntax_for_file("testdata/test_first_line.test")
999                .expect("Error finding syntax for file")
1000                .expect("No syntax found for file")
1001                .name,
1002            "Ruby"
1003        );
1004        assert_eq!(
1005            &ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
1006            "Bourne Again Shell (bash)"
1007        );
1008        assert_eq!(
1009            &ps.find_syntax_for_file("CMakeLists.txt")
1010                .unwrap()
1011                .unwrap()
1012                .name,
1013            "CMake"
1014        );
1015        assert_eq!(
1016            &ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
1017            "CMake"
1018        );
1019        assert_eq!(
1020            &ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name,
1021            "Ruby"
1022        );
1023        assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
1024        assert_eq!(
1025            &ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax")
1026                .unwrap()
1027                .name,
1028            "Rust"
1029        );
1030        // println!("{:#?}", syntax);
1031        assert_eq!(syntax.scope, rails_scope);
1032        // unreachable!();
1033        let main_context = ps
1034            .get_context(&syntax.context_ids()["main"])
1035            .expect("#[cfg(test)]");
1036        let count = syntax_definition::context_iter(&ps, main_context).count();
1037        assert_eq!(count, 109);
1038    }
1039
1040    #[test]
1041    fn can_clone() {
1042        let cloned_syntax_set = {
1043            let mut builder = SyntaxSetBuilder::new();
1044            builder.add(syntax_a());
1045            builder.add(syntax_b());
1046
1047            let syntax_set_original = builder.build();
1048            #[allow(clippy::redundant_clone)] // We want to test .clone()
1049            syntax_set_original.clone()
1050            // Note: The original syntax set is dropped
1051        };
1052
1053        let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
1054        let mut parse_state = ParseState::new(syntax);
1055        let ops = parse_state
1056            .parse_line("a go_b b", &cloned_syntax_set)
1057            .expect("#[cfg(test)]");
1058        let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
1059        assert_ops_contain(&ops, &expected);
1060    }
1061
1062    #[test]
1063    fn can_list_added_syntaxes() {
1064        let mut builder = SyntaxSetBuilder::new();
1065        builder.add(syntax_a());
1066        builder.add(syntax_b());
1067        let syntaxes = builder.syntaxes();
1068
1069        assert_eq!(syntaxes.len(), 2);
1070        assert_eq!(syntaxes[0].name, "A");
1071        assert_eq!(syntaxes[1].name, "B");
1072    }
1073
1074    #[test]
1075    fn can_add_more_syntaxes_with_builder() {
1076        let syntax_set_original = {
1077            let mut builder = SyntaxSetBuilder::new();
1078            builder.add(syntax_a());
1079            builder.add(syntax_b());
1080            builder.build()
1081        };
1082
1083        let mut builder = syntax_set_original.into_builder();
1084
1085        let syntax_c = SyntaxDefinition::load_from_str(
1086            r#"
1087        name: C
1088        scope: source.c
1089        file_extensions: [c]
1090        contexts:
1091          main:
1092            - match: 'c'
1093              scope: c
1094            - match: 'go_a'
1095              push: scope:source.a#main
1096        "#,
1097            true,
1098            None,
1099        )
1100        .unwrap();
1101
1102        builder.add(syntax_c);
1103
1104        let syntax_set = builder.build();
1105
1106        let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
1107        let mut parse_state = ParseState::new(syntax);
1108        let ops = parse_state
1109            .parse_line("c go_a a go_b b", &syntax_set)
1110            .expect("#[cfg(test)]");
1111        let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
1112        assert_ops_contain(&ops, &expected);
1113    }
1114
1115    #[test]
1116    fn falls_back_to_plain_text_when_embedded_scope_is_missing() {
1117        test_plain_text_fallback(
1118            r#"
1119        name: Z
1120        scope: source.z
1121        file_extensions: [z]
1122        contexts:
1123          main:
1124            - match: 'z'
1125              scope: z
1126            - match: 'go_x'
1127              embed: scope:does.not.exist
1128              escape: 'leave_x'
1129        "#,
1130        );
1131    }
1132
1133    #[test]
1134    fn falls_back_to_plain_text_when_embedded_file_is_missing() {
1135        test_plain_text_fallback(
1136            r#"
1137        name: Z
1138        scope: source.z
1139        file_extensions: [z]
1140        contexts:
1141          main:
1142            - match: 'z'
1143              scope: z
1144            - match: 'go_x'
1145              embed: DoesNotExist.sublime-syntax
1146              escape: 'leave_x'
1147        "#,
1148        );
1149    }
1150
1151    fn test_plain_text_fallback(syntax_definition: &str) {
1152        let syntax = SyntaxDefinition::load_from_str(syntax_definition, true, None).unwrap();
1153
1154        let mut builder = SyntaxSetBuilder::new();
1155        builder.add_plain_text_syntax();
1156        builder.add(syntax);
1157        let syntax_set = builder.build();
1158
1159        let syntax = syntax_set.find_syntax_by_extension("z").unwrap();
1160        let mut parse_state = ParseState::new(syntax);
1161        let ops = parse_state
1162            .parse_line("z go_x x leave_x z", &syntax_set)
1163            .unwrap();
1164        let expected_ops = vec![
1165            (0, ScopeStackOp::Push(Scope::new("source.z").unwrap())),
1166            (0, ScopeStackOp::Push(Scope::new("z").unwrap())),
1167            (1, ScopeStackOp::Pop(1)),
1168            (6, ScopeStackOp::Push(Scope::new("text.plain").unwrap())),
1169            (9, ScopeStackOp::Pop(1)),
1170            (17, ScopeStackOp::Push(Scope::new("z").unwrap())),
1171            (18, ScopeStackOp::Pop(1)),
1172        ];
1173        assert_eq!(ops, expected_ops);
1174    }
1175
1176    #[test]
1177    fn can_find_unlinked_contexts() {
1178        let syntax_set = {
1179            let mut builder = SyntaxSetBuilder::new();
1180            builder.add(syntax_a());
1181            builder.add(syntax_b());
1182            builder.build()
1183        };
1184
1185        let unlinked_contexts = syntax_set.find_unlinked_contexts();
1186        assert_eq!(unlinked_contexts.len(), 0);
1187
1188        let syntax_set = {
1189            let mut builder = SyntaxSetBuilder::new();
1190            builder.add(syntax_a());
1191            builder.build()
1192        };
1193
1194        let unlinked_contexts: Vec<String> =
1195            syntax_set.find_unlinked_contexts().into_iter().collect();
1196        assert_eq!(unlinked_contexts.len(), 1);
1197        assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: <source.b>, sub_context: Some(\"main\"), with_escape: false }");
1198    }
1199
1200    #[test]
1201    fn can_use_in_multiple_threads() {
1202        use rayon::prelude::*;
1203
1204        let syntax_set = {
1205            let mut builder = SyntaxSetBuilder::new();
1206            builder.add(syntax_a());
1207            builder.add(syntax_b());
1208            builder.build()
1209        };
1210
1211        let lines = vec!["a a a", "a go_b b", "go_b b", "go_b b  b"];
1212
1213        let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
1214            .par_iter()
1215            .map(|line| {
1216                let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1217                let mut parse_state = ParseState::new(syntax);
1218                parse_state
1219                    .parse_line(line, &syntax_set)
1220                    .expect("#[cfg(test)]")
1221            })
1222            .collect();
1223
1224        assert_ops_contain(
1225            &results[0],
1226            &(4, ScopeStackOp::Push(Scope::new("a").unwrap())),
1227        );
1228        assert_ops_contain(
1229            &results[1],
1230            &(7, ScopeStackOp::Push(Scope::new("b").unwrap())),
1231        );
1232        assert_ops_contain(
1233            &results[2],
1234            &(5, ScopeStackOp::Push(Scope::new("b").unwrap())),
1235        );
1236        assert_ops_contain(
1237            &results[3],
1238            &(8, ScopeStackOp::Push(Scope::new("b").unwrap())),
1239        );
1240    }
1241
1242    #[test]
1243    fn is_sync() {
1244        check_sync::<SyntaxSet>();
1245    }
1246
1247    #[test]
1248    fn is_send() {
1249        check_send::<SyntaxSet>();
1250    }
1251
1252    #[test]
1253    fn can_override_syntaxes() {
1254        let syntax_set = {
1255            let mut builder = SyntaxSetBuilder::new();
1256            builder.add(syntax_a());
1257            builder.add(syntax_b());
1258
1259            let syntax_a2 = SyntaxDefinition::load_from_str(
1260                r#"
1261                name: A improved
1262                scope: source.a
1263                file_extensions: [a]
1264                first_line_match: syntax\s+a
1265                contexts:
1266                  main:
1267                    - match: a
1268                      scope: a2
1269                    - match: go_b
1270                      push: scope:source.b#main
1271                "#,
1272                true,
1273                None,
1274            )
1275            .unwrap();
1276
1277            builder.add(syntax_a2);
1278
1279            let syntax_c = SyntaxDefinition::load_from_str(
1280                r#"
1281                name: C
1282                scope: source.c
1283                file_extensions: [c]
1284                first_line_match: syntax\s+.*
1285                contexts:
1286                  main:
1287                    - match: c
1288                      scope: c
1289                    - match: go_a
1290                      push: scope:source.a#main
1291                "#,
1292                true,
1293                None,
1294            )
1295            .unwrap();
1296
1297            builder.add(syntax_c);
1298
1299            builder.build()
1300        };
1301
1302        let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1303        assert_eq!(syntax.name, "A improved");
1304        syntax = syntax_set
1305            .find_syntax_by_scope(Scope::new("source.a").unwrap())
1306            .unwrap();
1307        assert_eq!(syntax.name, "A improved");
1308        syntax = syntax_set.find_syntax_by_first_line("syntax a").unwrap();
1309        assert_eq!(syntax.name, "C");
1310
1311        let mut parse_state = ParseState::new(syntax);
1312        let ops = parse_state
1313            .parse_line("c go_a a", &syntax_set)
1314            .expect("msg");
1315        let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
1316        assert_ops_contain(&ops, &expected);
1317    }
1318
1319    #[test]
1320    fn can_parse_issue219() {
1321        // Go to builder and back after loading so that build() gets Direct references instead of
1322        // Named ones. The bug was that Direct references were not handled when marking as
1323        // "no prototype", so prototype contexts accidentally had the prototype set, which made
1324        // the parser loop forever.
1325        let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
1326        let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
1327
1328        let mut parse_state = ParseState::new(syntax);
1329        let ops = parse_state
1330            .parse_line("# test\n", &syntax_set)
1331            .expect("#[cfg(test)]");
1332        let expected = (
1333            0,
1334            ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()),
1335        );
1336        assert_ops_contain(&ops, &expected);
1337    }
1338
1339    #[test]
1340    fn no_prototype_for_contexts_included_from_prototype() {
1341        let mut builder = SyntaxSetBuilder::new();
1342        let syntax = SyntaxDefinition::load_from_str(
1343            r#"
1344                name: Test Prototype
1345                scope: source.test
1346                file_extensions: [test]
1347                contexts:
1348                  prototype:
1349                    - include: included_from_prototype
1350                  main:
1351                    - match: main
1352                    - match: other
1353                      push: other
1354                  other:
1355                    - match: o
1356                  included_from_prototype:
1357                    - match: p
1358                      scope: p
1359                "#,
1360            true,
1361            None,
1362        )
1363        .unwrap();
1364        builder.add(syntax);
1365        let ss = builder.build();
1366
1367        // "main" and "other" should have context set, "prototype" and "included_from_prototype"
1368        // must not have a prototype set.
1369        assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
1370
1371        // Building again should have the same result. The difference is that after the first
1372        // build(), the references have been replaced with Direct references, so the code needs to
1373        // handle that correctly.
1374        let rebuilt = ss.into_builder().build();
1375        assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
1376    }
1377
1378    #[test]
1379    fn no_prototype_for_contexts_inline_in_prototype() {
1380        let mut builder = SyntaxSetBuilder::new();
1381        let syntax = SyntaxDefinition::load_from_str(
1382            r#"
1383                name: Test Prototype
1384                scope: source.test
1385                file_extensions: [test]
1386                contexts:
1387                  prototype:
1388                    - match: p
1389                      push:
1390                        - match: p2
1391                  main:
1392                    - match: main
1393                "#,
1394            true,
1395            None,
1396        )
1397        .unwrap();
1398        builder.add(syntax);
1399        let ss = builder.build();
1400
1401        assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
1402
1403        let rebuilt = ss.into_builder().build();
1404        assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
1405    }
1406
1407    #[test]
1408    fn find_syntax_set_from_line_with_bom() {
1409        // Regression test for #529
1410        let syntax_set = SyntaxSet::load_defaults_newlines();
1411        let syntax_ref = syntax_set
1412            .find_syntax_by_first_line("\u{feff}<?xml version=\"1.0\"?>")
1413            .unwrap();
1414        assert_eq!(syntax_ref.name, "XML");
1415    }
1416
1417    fn assert_ops_contain(ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp)) {
1418        assert!(
1419            ops.contains(expected),
1420            "expected operations to contain {:?}: {:?}",
1421            expected,
1422            ops
1423        );
1424    }
1425
1426    fn assert_prototype_only_on(
1427        expected: &[&str],
1428        syntax_set: &SyntaxSet,
1429        syntax: &SyntaxReference,
1430    ) {
1431        for (name, id) in syntax.context_ids() {
1432            if name == "__main" || name == "__start" {
1433                // Skip special contexts
1434                continue;
1435            }
1436            let context = syntax_set.get_context(id).expect("#[cfg(test)]");
1437            if expected.contains(&name.as_str()) {
1438                assert!(
1439                    context.prototype.is_some(),
1440                    "Expected context {} to have prototype",
1441                    name
1442                );
1443            } else {
1444                assert!(
1445                    context.prototype.is_none(),
1446                    "Expected context {} to not have prototype",
1447                    name
1448                );
1449            }
1450        }
1451    }
1452
1453    fn check_send<T: Send>() {}
1454
1455    fn check_sync<T: Sync>() {}
1456
1457    fn syntax_a() -> SyntaxDefinition {
1458        SyntaxDefinition::load_from_str(
1459            r#"
1460            name: A
1461            scope: source.a
1462            file_extensions: [a]
1463            contexts:
1464              main:
1465                - match: 'a'
1466                  scope: a
1467                - match: 'go_b'
1468                  push: scope:source.b#main
1469            "#,
1470            true,
1471            None,
1472        )
1473        .unwrap()
1474    }
1475
1476    fn syntax_b() -> SyntaxDefinition {
1477        SyntaxDefinition::load_from_str(
1478            r#"
1479            name: B
1480            scope: source.b
1481            file_extensions: [b]
1482            contexts:
1483              main:
1484                - match: 'b'
1485                  scope: b
1486            "#,
1487            true,
1488            None,
1489        )
1490        .unwrap()
1491    }
1492}