syntect/highlighting/
highlighter.rs

1//! Iterators and data structures for transforming parsing information into styled text.
2
3// Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/highlighter.rs
4// released under the MIT license by @defuz
5
6use std::iter::Iterator;
7use std::ops::Range;
8
9use super::selector::ScopeSelector;
10use super::style::{Color, FontStyle, Style, StyleModifier};
11use super::theme::{Theme, ThemeItem};
12use crate::parsing::{
13    BasicScopeStackOp, MatchPower, Scope, ScopeStack, ScopeStackOp, ATOM_LEN_BITS,
14};
15
16/// Basically a wrapper around a [`Theme`] preparing it to be used for highlighting.
17///
18/// This is part of the API to preserve the possibility of caching matches of the
19/// selectors of the theme on various scope paths or setting up some kind of
20/// accelerator structure.
21///
22/// So for now this does very little but eventually if you keep it around between
23/// highlighting runs it will preserve its cache.
24///
25/// [`Theme`]: struct.Theme.html
26#[derive(Debug)]
27pub struct Highlighter<'a> {
28    theme: &'a Theme,
29    /// Cache of the selectors in the theme that are only one scope
30    /// In most themes this is the majority, hence the usefullness
31    single_selectors: Vec<(Scope, StyleModifier)>,
32    multi_selectors: Vec<(ScopeSelector, StyleModifier)>,
33    // TODO single_cache: HashMap<Scope, StyleModifier, BuildHasherDefault<FnvHasher>>,
34}
35
36/// Keeps a stack of scopes and styles as state between highlighting different lines.
37///
38/// If you are highlighting an entire file you create one of these at the start and use it
39/// all the way to the end.
40///
41/// # Caching
42///
43/// One reason this is exposed is that since it implements `Clone` you can actually cache these
44/// (probably along with a [`ParseState`]) and only re-start highlighting from the point of a
45/// change. You could also do something fancy like only highlight a bit past the end of a user's
46/// screen and resume highlighting when they scroll down on large files.
47///
48/// Alternatively you can save space by caching only the `path` field of this struct then re-create
49/// the `HighlightState` when needed by passing that stack as the `initial_stack` parameter to the
50/// [`new`] method. This takes less space but a small amount of time to re-create the style stack.
51///
52/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to
53/// do so eventually. It is not recommended that you try caching the first time you implement
54/// highlighting.
55///
56/// [`ParseState`]: ../parsing/struct.ParseState.html
57/// [`new`]: #method.new
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct HighlightState {
60    styles: Vec<Style>,
61    single_caches: Vec<ScoredStyle>,
62    pub path: ScopeStack,
63}
64
65/// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
66///
67/// Yields the [`Style`], the text and well as the `Range` of the text in the source string.
68///
69/// It splits a line of text into different pieces each with a [`Style`]
70///
71/// [`HighlightState`]: struct.HighlightState.html
72/// [`Style`]: struct.Style.html
73#[derive(Debug)]
74pub struct RangedHighlightIterator<'a, 'b> {
75    index: usize,
76    pos: usize,
77    changes: &'a [(usize, ScopeStackOp)],
78    text: &'b str,
79    highlighter: &'a Highlighter<'a>,
80    state: &'a mut HighlightState,
81}
82
83/// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
84///
85/// This is a backwards compatible shim on top of the [`RangedHighlightIterator`] which only
86/// yields the [`Style`] and the text of the token, not the range.
87///
88/// It splits a line of text into different pieces each with a [`Style`].
89///
90/// [`HighlightState`]: struct.HighlightState.html
91/// [`RangedHighlightIterator`]: struct.RangedHighlightIterator.html
92/// [`Style`]: struct.Style.html
93#[derive(Debug)]
94pub struct HighlightIterator<'a, 'b> {
95    ranged_iterator: RangedHighlightIterator<'a, 'b>,
96}
97
98impl HighlightState {
99    /// Note that the [`Highlighter`] is not stored; it is used to construct the initial stack
100    /// of styles.
101    ///
102    /// Most of the time you'll want to pass an empty stack as `initial_stack`, but see the docs for
103    /// [`HighlightState`] for a discussion of advanced caching use cases.
104    ///
105    /// [`Highlighter`]: struct.Highlighter.html
106    /// [`HighlightState`]: struct.HighlightState.html
107    pub fn new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState {
108        let mut styles = vec![highlighter.get_default()];
109        let mut single_caches = vec![ScoredStyle::from_style(styles[0])];
110        for i in 0..initial_stack.len() {
111            let prefix = initial_stack.bottom_n(i + 1);
112            let new_cache = highlighter.update_single_cache_for_push(&single_caches[i], prefix);
113            styles.push(highlighter.finalize_style_with_multis(&new_cache, prefix));
114            single_caches.push(new_cache);
115        }
116
117        HighlightState {
118            styles,
119            single_caches,
120            path: initial_stack,
121        }
122    }
123}
124
125impl<'a, 'b> RangedHighlightIterator<'a, 'b> {
126    pub fn new(
127        state: &'a mut HighlightState,
128        changes: &'a [(usize, ScopeStackOp)],
129        text: &'b str,
130        highlighter: &'a Highlighter<'_>,
131    ) -> RangedHighlightIterator<'a, 'b> {
132        RangedHighlightIterator {
133            index: 0,
134            pos: 0,
135            changes,
136            text,
137            highlighter,
138            state,
139        }
140    }
141}
142
143impl<'b> Iterator for RangedHighlightIterator<'_, 'b> {
144    type Item = (Style, &'b str, Range<usize>);
145
146    /// Yields the next token of text and the associated `Style` to render that text with.
147    /// the concatenation of the strings in each token will make the original string.
148    fn next(&mut self) -> Option<(Style, &'b str, Range<usize>)> {
149        if self.pos == self.text.len() && self.index >= self.changes.len() {
150            return None;
151        }
152        let (end, command) = if self.index < self.changes.len() {
153            self.changes[self.index].clone()
154        } else {
155            (self.text.len(), ScopeStackOp::Noop)
156        };
157        // println!("{} - {:?}   {}:{}", self.index, self.pos, self.state.path.len(), self.state.styles.len());
158        let style = *self.state.styles.last().unwrap_or(&Style::default());
159        let text = &self.text[self.pos..end];
160        let range = Range {
161            start: self.pos,
162            end,
163        };
164        {
165            // closures mess with the borrow checker's ability to see different struct fields
166            let m_path = &mut self.state.path;
167            let m_styles = &mut self.state.styles;
168            let m_caches = &mut self.state.single_caches;
169            let highlighter = &self.highlighter;
170            m_path
171                .apply_with_hook(&command, |op, cur_stack| {
172                    // println!("{:?} - {:?}", op, cur_stack);
173                    match op {
174                        BasicScopeStackOp::Push(_) => {
175                            // we can push multiple times so this might have changed
176                            let new_cache = {
177                                if let Some(prev_cache) = m_caches.last() {
178                                    highlighter.update_single_cache_for_push(prev_cache, cur_stack)
179                                } else {
180                                    highlighter.update_single_cache_for_push(
181                                        &ScoredStyle::from_style(highlighter.get_default()),
182                                        cur_stack,
183                                    )
184                                }
185                            };
186                            m_styles.push(
187                                highlighter.finalize_style_with_multis(&new_cache, cur_stack),
188                            );
189                            m_caches.push(new_cache);
190                        }
191                        BasicScopeStackOp::Pop => {
192                            m_styles.pop();
193                            m_caches.pop();
194                        }
195                    }
196                })
197                .ok()?;
198        }
199        self.pos = end;
200        self.index += 1;
201        if text.is_empty() {
202            self.next()
203        } else {
204            Some((style, text, range))
205        }
206    }
207}
208impl<'a, 'b> HighlightIterator<'a, 'b> {
209    pub fn new(
210        state: &'a mut HighlightState,
211        changes: &'a [(usize, ScopeStackOp)],
212        text: &'b str,
213        highlighter: &'a Highlighter<'_>,
214    ) -> HighlightIterator<'a, 'b> {
215        HighlightIterator {
216            ranged_iterator: RangedHighlightIterator {
217                index: 0,
218                pos: 0,
219                changes,
220                text,
221                highlighter,
222                state,
223            },
224        }
225    }
226}
227
228impl<'b> Iterator for HighlightIterator<'_, 'b> {
229    type Item = (Style, &'b str);
230
231    /// Yields the next token of text and the associated `Style` to render that text with.
232    /// the concatenation of the strings in each token will make the original string.
233    fn next(&mut self) -> Option<(Style, &'b str)> {
234        self.ranged_iterator.next().map(|e| (e.0, e.1))
235    }
236}
237
238#[derive(Debug, Clone, PartialEq, Eq)]
239pub struct ScoredStyle {
240    pub foreground: (MatchPower, Color),
241    pub background: (MatchPower, Color),
242    pub font_style: (MatchPower, FontStyle),
243}
244
245#[inline]
246fn update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower) {
247    if score > scored.0 {
248        if let Some(u) = update {
249            scored.0 = score;
250            scored.1 = u.clone();
251        }
252    }
253}
254
255impl ScoredStyle {
256    fn apply(&mut self, other: &StyleModifier, score: MatchPower) {
257        update_scored(&mut self.foreground, &other.foreground, score);
258        update_scored(&mut self.background, &other.background, score);
259        update_scored(&mut self.font_style, &other.font_style, score);
260    }
261
262    fn to_style(&self) -> Style {
263        Style {
264            foreground: self.foreground.1,
265            background: self.background.1,
266            font_style: self.font_style.1,
267        }
268    }
269
270    fn from_style(style: Style) -> ScoredStyle {
271        ScoredStyle {
272            foreground: (MatchPower(-1.0), style.foreground),
273            background: (MatchPower(-1.0), style.background),
274            font_style: (MatchPower(-1.0), style.font_style),
275        }
276    }
277}
278
279impl<'a> Highlighter<'a> {
280    pub fn new(theme: &'a Theme) -> Highlighter<'a> {
281        let mut single_selectors = Vec::new();
282        let mut multi_selectors = Vec::new();
283        for item in &theme.scopes {
284            for sel in &item.scope.selectors {
285                if let Some(scope) = sel.extract_single_scope() {
286                    single_selectors.push((scope, item.style));
287                } else {
288                    multi_selectors.push((sel.clone(), item.style));
289                }
290            }
291        }
292        // So that deeper matching selectors get checked first
293        single_selectors.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
294
295        Highlighter {
296            theme,
297            single_selectors,
298            multi_selectors,
299        }
300    }
301
302    /// The default style in the absence of any matched rules.
303    /// Basically what plain text gets highlighted as.
304    pub fn get_default(&self) -> Style {
305        Style {
306            foreground: self.theme.settings.foreground.unwrap_or(Color::BLACK),
307            background: self.theme.settings.background.unwrap_or(Color::WHITE),
308            font_style: FontStyle::empty(),
309        }
310    }
311
312    fn update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle {
313        let mut new_style = cur.clone();
314
315        let last_scope = path[path.len() - 1];
316        for &(scope, ref modif) in self
317            .single_selectors
318            .iter()
319            .filter(|a| a.0.is_prefix_of(last_scope))
320        {
321            let single_score = f64::from(scope.len())
322                * f64::from(ATOM_LEN_BITS * ((path.len() - 1) as u16)).exp2();
323            new_style.apply(modif, MatchPower(single_score));
324        }
325
326        new_style
327    }
328
329    fn finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style {
330        let mut new_style = cur.clone();
331
332        let mult_iter = self
333            .multi_selectors
334            .iter()
335            .filter_map(|(sel, style)| sel.does_match(path).map(|score| (score, style)));
336        for (score, modif) in mult_iter {
337            new_style.apply(modif, score);
338        }
339
340        new_style.to_style()
341    }
342
343    /// Returns the fully resolved style for the given stack.
344    ///
345    /// This operation is convenient but expensive. For reasonable performance,
346    /// the caller should be caching results.
347    pub fn style_for_stack(&self, stack: &[Scope]) -> Style {
348        let mut single_cache = ScoredStyle::from_style(self.get_default());
349        for i in 0..stack.len() {
350            single_cache = self.update_single_cache_for_push(&single_cache, &stack[0..i + 1]);
351        }
352        self.finalize_style_with_multis(&single_cache, stack)
353    }
354
355    /// Returns a [`StyleModifier`] which, if applied to the default style,
356    /// would generate the fully resolved style for this stack.
357    ///
358    /// This is made available to applications that are using syntect styles
359    /// in combination with style information from other sources.
360    ///
361    /// This operation is convenient but expensive. For reasonable performance,
362    /// the caller should be caching results. It's likely slower than [`style_for_stack`].
363    ///
364    /// [`StyleModifier`]: struct.StyleModifier.html
365    /// [`style_for_stack`]: #method.style_for_stack
366    pub fn style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier {
367        let mut matching_items: Vec<(MatchPower, &ThemeItem)> = self
368            .theme
369            .scopes
370            .iter()
371            .filter_map(|item| item.scope.does_match(path).map(|score| (score, item)))
372            .collect();
373        matching_items.sort_by_key(|&(score, _)| score);
374        let sorted = matching_items.iter().map(|(_, item)| item);
375
376        let mut modifier = StyleModifier {
377            background: None,
378            foreground: None,
379            font_style: None,
380        };
381        for item in sorted {
382            modifier = modifier.apply(item.style);
383        }
384        modifier
385    }
386}
387
388#[cfg(all(feature = "default-syntaxes", feature = "default-themes"))]
389#[cfg(test)]
390mod tests {
391    use super::*;
392    use crate::highlighting::{Color, FontStyle, Style, ThemeSet};
393    use crate::parsing::{ParseState, ScopeStack};
394    use crate::utils::testdata;
395
396    #[test]
397    fn can_parse() {
398        let ps = &*testdata::PACKAGES_SYN_SET;
399        let mut state = {
400            let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
401            ParseState::new(syntax)
402        };
403        let ts = ThemeSet::load_defaults();
404        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
405
406        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
407        let line = "module Bob::Wow::Troll::Five; 5; end";
408        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
409        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
410        let regions: Vec<(Style, &str)> = iter.collect();
411        // println!("{:#?}", regions);
412        assert_eq!(
413            regions[11],
414            (
415                Style {
416                    foreground: Color {
417                        r: 208,
418                        g: 135,
419                        b: 112,
420                        a: 0xFF,
421                    },
422                    background: Color {
423                        r: 43,
424                        g: 48,
425                        b: 59,
426                        a: 0xFF,
427                    },
428                    font_style: FontStyle::empty(),
429                },
430                "5"
431            )
432        );
433    }
434
435    #[test]
436    fn can_parse_with_highlight_state_from_cache() {
437        let ps = &*testdata::PACKAGES_SYN_SET;
438        let mut state = {
439            let syntax = ps
440                .find_syntax_by_scope(Scope::new("source.python").unwrap())
441                .unwrap();
442            ParseState::new(syntax)
443        };
444        let ts = ThemeSet::load_defaults();
445        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
446
447        // We start by parsing a python multiline-comment: """
448        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
449        let line = r#"""""#;
450        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
451        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
452        assert_eq!(1, iter.count());
453        let path = highlight_state.path;
454
455        // We then parse the next line with a highlight state built from the previous state
456        let mut highlight_state = HighlightState::new(&highlighter, path);
457        let line = "multiline comment";
458        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
459        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
460        let regions: Vec<(Style, &str)> = iter.collect();
461
462        // We expect the line to be styled as a comment.
463        assert_eq!(
464            regions[0],
465            (
466                Style {
467                    foreground: Color {
468                        // (Comment: #65737E)
469                        r: 101,
470                        g: 115,
471                        b: 126,
472                        a: 0xFF,
473                    },
474                    background: Color {
475                        r: 43,
476                        g: 48,
477                        b: 59,
478                        a: 0xFF,
479                    },
480                    font_style: FontStyle::empty(),
481                },
482                "multiline comment"
483            )
484        );
485    }
486
487    // see issues #133 and #203, this test tests the fixes for those issues
488    #[test]
489    fn tricky_cases() {
490        use crate::highlighting::{ScopeSelectors, ThemeSettings};
491        use crate::parsing::ScopeStack;
492        use std::str::FromStr;
493        let c1 = Color {
494            r: 1,
495            g: 1,
496            b: 1,
497            a: 255,
498        };
499        let c2 = Color {
500            r: 2,
501            g: 2,
502            b: 2,
503            a: 255,
504        };
505        let def_bg = Color {
506            r: 255,
507            g: 255,
508            b: 255,
509            a: 255,
510        };
511        let test_color_scheme = Theme {
512            name: None,
513            author: None,
514            settings: ThemeSettings::default(),
515            scopes: vec![
516                ThemeItem {
517                    scope: ScopeSelectors::from_str("comment.line").unwrap(),
518                    style: StyleModifier {
519                        foreground: Some(c1),
520                        background: None,
521                        font_style: None,
522                    },
523                },
524                ThemeItem {
525                    scope: ScopeSelectors::from_str("comment").unwrap(),
526                    style: StyleModifier {
527                        foreground: Some(c2),
528                        background: None,
529                        font_style: Some(FontStyle::ITALIC),
530                    },
531                },
532                ThemeItem {
533                    scope: ScopeSelectors::from_str("comment.line.rs - keyword").unwrap(),
534                    style: StyleModifier {
535                        foreground: None,
536                        background: Some(c1),
537                        font_style: None,
538                    },
539                },
540                ThemeItem {
541                    scope: ScopeSelectors::from_str("no.match").unwrap(),
542                    style: StyleModifier {
543                        foreground: None,
544                        background: Some(c2),
545                        font_style: Some(FontStyle::UNDERLINE),
546                    },
547                },
548            ],
549        };
550        let highlighter = Highlighter::new(&test_color_scheme);
551
552        use crate::parsing::ScopeStackOp::*;
553        let ops = [
554            // three rules apply at once here, two singles and one multi
555            (0, Push(Scope::new("comment.line.rs").unwrap())),
556            // multi un-applies
557            (1, Push(Scope::new("keyword.control.rs").unwrap())),
558            (2, Pop(1)),
559        ];
560
561        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
562        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], "abcdef", &highlighter);
563        let regions: Vec<Style> = iter.map(|(s, _)| s).collect();
564
565        // println!("{:#?}", regions);
566        assert_eq!(
567            regions,
568            vec![
569                Style {
570                    foreground: c1,
571                    background: c1,
572                    font_style: FontStyle::ITALIC
573                },
574                Style {
575                    foreground: c1,
576                    background: def_bg,
577                    font_style: FontStyle::ITALIC
578                },
579                Style {
580                    foreground: c1,
581                    background: c1,
582                    font_style: FontStyle::ITALIC
583                },
584            ]
585        );
586
587        let full_stack = ScopeStack::from_str("comment.line.rs keyword.control.rs").unwrap();
588        let full_style = highlighter.style_for_stack(full_stack.as_slice());
589        assert_eq!(
590            full_style,
591            Style {
592                foreground: c1,
593                background: def_bg,
594                font_style: FontStyle::ITALIC
595            }
596        );
597        let full_mod = highlighter.style_mod_for_stack(full_stack.as_slice());
598        assert_eq!(
599            full_mod,
600            StyleModifier {
601                foreground: Some(c1),
602                background: None,
603                font_style: Some(FontStyle::ITALIC)
604            }
605        );
606    }
607
608    #[test]
609    fn test_ranges() {
610        let ps = &*testdata::PACKAGES_SYN_SET;
611        let mut state = {
612            let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
613            ParseState::new(syntax)
614        };
615        let ts = ThemeSet::load_defaults();
616        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
617
618        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
619        let line = "module Bob::Wow::Troll::Five; 5; end";
620        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
621        let iter = RangedHighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
622        let regions: Vec<(Style, &str, Range<usize>)> = iter.collect();
623        // println!("{:#?}", regions);
624        assert_eq!(
625            regions[11],
626            (
627                Style {
628                    foreground: Color {
629                        r: 208,
630                        g: 135,
631                        b: 112,
632                        a: 0xFF,
633                    },
634                    background: Color {
635                        r: 43,
636                        g: 48,
637                        b: 59,
638                        a: 0xFF,
639                    },
640                    font_style: FontStyle::empty(),
641                },
642                "5",
643                Range { start: 30, end: 31 }
644            )
645        );
646    }
647}