syntect/highlighting/
highlighter.rs

1//! Iterators and data structures for transforming parsing information into styled text.
2
3// Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/highlighter.rs
4// released under the MIT license by @defuz
5
6use std::iter::Iterator;
7use std::ops::Range;
8
9use crate::parsing::{Scope, ScopeStack, BasicScopeStackOp, ScopeStackOp, MatchPower, ATOM_LEN_BITS};
10use super::selector::ScopeSelector;
11use super::theme::{Theme, ThemeItem};
12use super::style::{Color, FontStyle, Style, StyleModifier};
13
14/// Basically a wrapper around a [`Theme`] preparing it to be used for highlighting.
15///
16/// This is part of the API to preserve the possibility of caching matches of the
17/// selectors of the theme on various scope paths or setting up some kind of
18/// accelerator structure.
19///
20/// So for now this does very little but eventually if you keep it around between
21/// highlighting runs it will preserve its cache.
22///
23/// [`Theme`]: struct.Theme.html
24#[derive(Debug)]
25pub struct Highlighter<'a> {
26    theme: &'a Theme,
27    /// Cache of the selectors in the theme that are only one scope
28    /// In most themes this is the majority, hence the usefullness
29    single_selectors: Vec<(Scope, StyleModifier)>,
30    multi_selectors: Vec<(ScopeSelector, StyleModifier)>,
31    // TODO single_cache: HashMap<Scope, StyleModifier, BuildHasherDefault<FnvHasher>>,
32}
33
34/// Keeps a stack of scopes and styles as state between highlighting different lines.
35///
36/// If you are highlighting an entire file you create one of these at the start and use it
37/// all the way to the end.
38///
39/// # Caching
40///
41/// One reason this is exposed is that since it implements `Clone` you can actually cache these
42/// (probably along with a [`ParseState`]) and only re-start highlighting from the point of a
43/// change. You could also do something fancy like only highlight a bit past the end of a user's
44/// screen and resume highlighting when they scroll down on large files.
45///
46/// Alternatively you can save space by caching only the `path` field of this struct then re-create
47/// the `HighlightState` when needed by passing that stack as the `initial_stack` parameter to the
48/// [`new`] method. This takes less space but a small amount of time to re-create the style stack.
49///
50/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to
51/// do so eventually. It is not recommended that you try caching the first time you implement
52/// highlighting.
53///
54/// [`ParseState`]: ../parsing/struct.ParseState.html
55/// [`new`]: #method.new
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct HighlightState {
58    styles: Vec<Style>,
59    single_caches: Vec<ScoredStyle>,
60    pub path: ScopeStack,
61}
62
63/// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
64///
65/// Yields the [`Style`], the text and well as the `Range` of the text in the source string.
66///
67/// It splits a line of text into different pieces each with a [`Style`]
68///
69/// [`HighlightState`]: struct.HighlightState.html
70/// [`Style`]: struct.Style.html
71#[derive(Debug)]
72pub struct RangedHighlightIterator<'a, 'b> {
73    index: usize,
74    pos: usize,
75    changes: &'a [(usize, ScopeStackOp)],
76    text: &'b str,
77    highlighter: &'a Highlighter<'a>,
78    state: &'a mut HighlightState,
79}
80
81/// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
82///
83/// This is a backwards compatible shim on top of the [`RangedHighlightIterator`] which only
84/// yields the [`Style`] and the text of the token, not the range.
85///
86/// It splits a line of text into different pieces each with a [`Style`].
87///
88/// [`HighlightState`]: struct.HighlightState.html
89/// [`RangedHighlightIterator`]: struct.RangedHighlightIterator.html
90/// [`Style`]: struct.Style.html
91#[derive(Debug)]
92pub struct HighlightIterator<'a, 'b> {
93    ranged_iterator: RangedHighlightIterator<'a, 'b>
94}
95
96impl HighlightState {
97    /// Note that the [`Highlighter`] is not stored; it is used to construct the initial stack
98    /// of styles.
99    ///
100    /// Most of the time you'll want to pass an empty stack as `initial_stack`, but see the docs for
101    /// [`HighlightState`] for a discussion of advanced caching use cases.
102    ///
103    /// [`Highlighter`]: struct.Highlighter.html
104    /// [`HighlightState`]: struct.HighlightState.html
105    pub fn new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState {
106        let mut styles = vec![highlighter.get_default()];
107        let mut single_caches = vec![ScoredStyle::from_style(styles[0])];
108        for i in 0..initial_stack.len() {
109            let prefix = initial_stack.bottom_n(i + 1);
110            let new_cache = highlighter.update_single_cache_for_push(&single_caches[i], prefix);
111            styles.push(highlighter.finalize_style_with_multis(&new_cache, prefix));
112            single_caches.push(new_cache);
113        }
114
115        HighlightState {
116            styles,
117            single_caches,
118            path: initial_stack,
119        }
120    }
121}
122
123impl<'a, 'b> RangedHighlightIterator<'a, 'b> {
124    pub fn new(state: &'a mut HighlightState,
125               changes: &'a [(usize, ScopeStackOp)],
126               text: &'b str,
127               highlighter: &'a Highlighter<'_>)
128               -> RangedHighlightIterator<'a, 'b> {
129        RangedHighlightIterator {
130            index: 0,
131            pos: 0,
132            changes,
133            text,
134            highlighter,
135            state,
136        }
137    }
138}
139
140impl<'a, 'b> Iterator for RangedHighlightIterator<'a, 'b> {
141    type Item = (Style, &'b str, Range<usize>);
142
143    /// Yields the next token of text and the associated `Style` to render that text with.
144    /// the concatenation of the strings in each token will make the original string.
145    fn next(&mut self) -> Option<(Style, &'b str, Range<usize>)> {
146        if self.pos == self.text.len() && self.index >= self.changes.len() {
147            return None;
148        }
149        let (end, command) = if self.index < self.changes.len() {
150            self.changes[self.index].clone()
151        } else {
152            (self.text.len(), ScopeStackOp::Noop)
153        };
154        // println!("{} - {:?}   {}:{}", self.index, self.pos, self.state.path.len(), self.state.styles.len());
155        let style = *self.state.styles.last().unwrap_or(&Style::default());
156        let text = &self.text[self.pos..end];
157        let range = Range { start: self.pos, end };
158        {
159            // closures mess with the borrow checker's ability to see different struct fields
160            let m_path = &mut self.state.path;
161            let m_styles = &mut self.state.styles;
162            let m_caches = &mut self.state.single_caches;
163            let highlighter = &self.highlighter;
164            m_path.apply_with_hook(&command, |op, cur_stack| {
165                // println!("{:?} - {:?}", op, cur_stack);
166                match op {
167                    BasicScopeStackOp::Push(_) => {
168                        // we can push multiple times so this might have changed
169                        let new_cache = {
170                            if let Some(prev_cache) = m_caches.last() {
171                                highlighter.update_single_cache_for_push(prev_cache, cur_stack)
172                            } else {
173                                highlighter.update_single_cache_for_push(&ScoredStyle::from_style(highlighter.get_default()), cur_stack)
174                            }
175                        };
176                        m_styles.push(highlighter.finalize_style_with_multis(&new_cache, cur_stack));
177                        m_caches.push(new_cache);
178                    }
179                    BasicScopeStackOp::Pop => {
180                        m_styles.pop();
181                        m_caches.pop();
182                    }
183                }
184            }).ok()?;
185        }
186        self.pos = end;
187        self.index += 1;
188        if text.is_empty() {
189            self.next()
190        } else {
191            Some((style, text, range))
192        }
193    }
194}
195impl<'a, 'b> HighlightIterator<'a, 'b> {
196    pub fn new(state: &'a mut HighlightState,
197               changes: &'a [(usize, ScopeStackOp)],
198               text: &'b str,
199               highlighter: &'a Highlighter<'_>)
200        -> HighlightIterator<'a, 'b> {
201            HighlightIterator {
202                ranged_iterator: RangedHighlightIterator {
203                    index: 0,
204                    pos: 0,
205                    changes,
206                    text,
207                    highlighter,
208                    state
209                }
210            }
211    }
212}
213
214impl<'a, 'b> Iterator for HighlightIterator<'a, 'b> {
215    type Item = (Style, &'b str);
216
217    /// Yields the next token of text and the associated `Style` to render that text with.
218    /// the concatenation of the strings in each token will make the original string.
219    fn next(&mut self) -> Option<(Style, &'b str)> {
220        self.ranged_iterator.next().map(|e| (e.0, e.1))
221    }
222}
223
224#[derive(Debug, Clone, PartialEq, Eq)]
225pub struct ScoredStyle {
226    pub foreground: (MatchPower, Color),
227    pub background: (MatchPower, Color),
228    pub font_style: (MatchPower, FontStyle),
229}
230
231#[inline]
232fn update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower) {
233    if score > scored.0 {
234        if let Some(u) = update {
235            scored.0 = score;
236            scored.1 = u.clone();
237        }
238    }
239}
240
241impl ScoredStyle {
242    fn apply(&mut self, other: &StyleModifier, score: MatchPower) {
243        update_scored(&mut self.foreground, &other.foreground, score);
244        update_scored(&mut self.background, &other.background, score);
245        update_scored(&mut self.font_style, &other.font_style, score);
246    }
247
248    fn to_style(&self) -> Style {
249        Style {
250            foreground: self.foreground.1,
251            background: self.background.1,
252            font_style: self.font_style.1,
253        }
254    }
255
256    fn from_style(style: Style) -> ScoredStyle {
257        ScoredStyle {
258            foreground: (MatchPower(-1.0), style.foreground),
259            background: (MatchPower(-1.0), style.background),
260            font_style: (MatchPower(-1.0), style.font_style),
261        }
262    }
263}
264
265impl<'a> Highlighter<'a> {
266    pub fn new(theme: &'a Theme) -> Highlighter<'a> {
267        let mut single_selectors = Vec::new();
268        let mut multi_selectors = Vec::new();
269        for item in &theme.scopes {
270            for sel in &item.scope.selectors {
271                if let Some(scope) = sel.extract_single_scope() {
272                    single_selectors.push((scope, item.style));
273                } else {
274                    multi_selectors.push((sel.clone(), item.style));
275                }
276            }
277        }
278        // So that deeper matching selectors get checked first
279        single_selectors.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
280
281        Highlighter {
282            theme,
283            single_selectors,
284            multi_selectors,
285        }
286    }
287
288    /// The default style in the absence of any matched rules.
289    /// Basically what plain text gets highlighted as.
290    pub fn get_default(&self) -> Style {
291        Style {
292            foreground: self.theme.settings.foreground.unwrap_or(Color::BLACK),
293            background: self.theme.settings.background.unwrap_or(Color::WHITE),
294            font_style: FontStyle::empty(),
295        }
296    }
297
298    fn update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle {
299        let mut new_style = cur.clone();
300
301        let last_scope = path[path.len() - 1];
302        for &(scope, ref modif) in self.single_selectors.iter().filter(|a| a.0.is_prefix_of(last_scope)) {
303            let single_score = f64::from(scope.len()) *
304                               f64::from(ATOM_LEN_BITS * ((path.len() - 1) as u16)).exp2();
305            new_style.apply(modif, MatchPower(single_score));
306        }
307
308        new_style
309    }
310
311    fn finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style {
312        let mut new_style = cur.clone();
313
314        let mult_iter = self.multi_selectors
315            .iter()
316            .filter_map(|(sel, style)| sel.does_match(path).map(|score| (score, style)));
317        for (score, modif) in mult_iter {
318            new_style.apply(modif, score);
319        }
320
321        new_style.to_style()
322    }
323
324    /// Returns the fully resolved style for the given stack.
325    ///
326    /// This operation is convenient but expensive. For reasonable performance,
327    /// the caller should be caching results.
328    pub fn style_for_stack(&self, stack: &[Scope]) -> Style {
329        let mut single_cache = ScoredStyle::from_style(self.get_default());
330        for i in 0..stack.len() {
331            single_cache = self.update_single_cache_for_push(&single_cache, &stack[0..i+1]);
332        }
333        self.finalize_style_with_multis(&single_cache, stack)
334    }
335
336    /// Returns a [`StyleModifier`] which, if applied to the default style,
337    /// would generate the fully resolved style for this stack.
338    ///
339    /// This is made available to applications that are using syntect styles
340    /// in combination with style information from other sources.
341    ///
342    /// This operation is convenient but expensive. For reasonable performance,
343    /// the caller should be caching results. It's likely slower than [`style_for_stack`].
344    ///
345    /// [`StyleModifier`]: struct.StyleModifier.html
346    /// [`style_for_stack`]: #method.style_for_stack
347    pub fn style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier {
348        let mut matching_items : Vec<(MatchPower, &ThemeItem)> = self.theme
349            .scopes
350            .iter()
351            .filter_map(|item| {
352                item.scope
353                    .does_match(path)
354                    .map(|score| (score, item))
355            })
356            .collect();
357        matching_items.sort_by_key(|&(score, _)| score);
358        let sorted = matching_items.iter()
359            .map(|(_, item)| item);
360
361        let mut modifier = StyleModifier {
362            background: None,
363            foreground: None,
364            font_style: None,
365        };
366        for item in sorted {
367            modifier = modifier.apply(item.style);
368        }
369        modifier
370    }
371}
372
373#[cfg(all(feature = "default-syntaxes", feature = "default-themes"))]
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use crate::highlighting::{ThemeSet, Style, Color, FontStyle};
378    use crate::parsing::{ SyntaxSet, ScopeStack, ParseState};
379
380    #[test]
381    fn can_parse() {
382        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
383        let mut state = {
384            let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
385            ParseState::new(syntax)
386        };
387        let ts = ThemeSet::load_defaults();
388        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
389
390        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
391        let line = "module Bob::Wow::Troll::Five; 5; end";
392        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
393        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
394        let regions: Vec<(Style, &str)> = iter.collect();
395        // println!("{:#?}", regions);
396        assert_eq!(regions[11],
397                   (Style {
398                       foreground: Color {
399                           r: 208,
400                           g: 135,
401                           b: 112,
402                           a: 0xFF,
403                       },
404                       background: Color {
405                           r: 43,
406                           g: 48,
407                           b: 59,
408                           a: 0xFF,
409                       },
410                       font_style: FontStyle::empty(),
411                   },
412                    "5"));
413    }
414
415    #[test]
416    fn can_parse_with_highlight_state_from_cache() {
417        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
418        let mut state = {
419            let syntax = ps.find_syntax_by_scope(
420                Scope::new("source.python").unwrap()).unwrap();
421            ParseState::new(syntax)
422        };
423        let ts = ThemeSet::load_defaults();
424        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
425
426        // We start by parsing a python multiline-comment: """
427        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
428        let line = r#"""""#;
429        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
430        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
431        assert_eq!(1, iter.count());
432        let path = highlight_state.path;
433
434        // We then parse the next line with a highlight state built from the previous state
435        let mut highlight_state = HighlightState::new(&highlighter, path);
436        let line = "multiline comment";
437        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
438        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
439        let regions: Vec<(Style, &str)> = iter.collect();
440
441        // We expect the line to be styled as a comment.
442        assert_eq!(regions[0],
443                   (Style {
444                       foreground: Color {
445                           // (Comment: #65737E)
446                           r: 101,
447                           g: 115,
448                           b: 126,
449                           a: 0xFF,
450                       },
451                       background: Color {
452                           r: 43,
453                           g: 48,
454                           b: 59,
455                           a: 0xFF,
456                       },
457                       font_style: FontStyle::empty(),
458                   },
459                    "multiline comment"));
460    }
461
462    // see issues #133 and #203, this test tests the fixes for those issues
463    #[test]
464    fn tricky_cases() {
465        use crate::parsing::ScopeStack;
466        use std::str::FromStr;
467        use crate::highlighting::{ThemeSettings, ScopeSelectors};
468        let c1 = Color { r: 1, g: 1, b: 1, a: 255 };
469        let c2 = Color { r: 2, g: 2, b: 2, a: 255 };
470        let def_bg = Color { r: 255, g: 255, b: 255, a: 255 };
471        let test_color_scheme = Theme {
472            name: None,
473            author: None,
474            settings: ThemeSettings::default(),
475            scopes: vec![
476                ThemeItem {
477                    scope: ScopeSelectors::from_str("comment.line").unwrap(),
478                    style: StyleModifier {
479                        foreground: Some(c1),
480                        background: None,
481                        font_style: None,
482                    },
483                },
484                ThemeItem {
485                    scope: ScopeSelectors::from_str("comment").unwrap(),
486                    style: StyleModifier {
487                        foreground: Some(c2),
488                        background: None,
489                        font_style: Some(FontStyle::ITALIC),
490                    },
491                },
492                ThemeItem {
493                    scope: ScopeSelectors::from_str("comment.line.rs - keyword").unwrap(),
494                    style: StyleModifier {
495                        foreground: None,
496                        background: Some(c1),
497                        font_style: None,
498                    },
499                },
500                ThemeItem {
501                    scope: ScopeSelectors::from_str("no.match").unwrap(),
502                    style: StyleModifier {
503                        foreground: None,
504                        background: Some(c2),
505                        font_style: Some(FontStyle::UNDERLINE),
506                    },
507                },
508            ],
509        };
510        let highlighter = Highlighter::new(&test_color_scheme);
511
512        use crate::parsing::ScopeStackOp::*;
513        let ops = [
514            // three rules apply at once here, two singles and one multi
515            (0, Push(Scope::new("comment.line.rs").unwrap())),
516            // multi un-applies
517            (1, Push(Scope::new("keyword.control.rs").unwrap())),
518            (2, Pop(1)),
519        ];
520
521        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
522        let iter = HighlightIterator::new(&mut highlight_state, &ops[..], "abcdef", &highlighter);
523        let regions: Vec<Style> = iter.map(|(s, _)| s).collect();
524
525        // println!("{:#?}", regions);
526        assert_eq!(regions, vec![
527            Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
528            Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC },
529            Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
530        ]);
531
532        let full_stack = ScopeStack::from_str("comment.line.rs keyword.control.rs").unwrap();
533        let full_style = highlighter.style_for_stack(full_stack.as_slice());
534        assert_eq!(full_style, Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC });
535        let full_mod = highlighter.style_mod_for_stack(full_stack.as_slice());
536        assert_eq!(full_mod, StyleModifier { foreground: Some(c1), background: None, font_style: Some(FontStyle::ITALIC) });
537    }
538
539    #[test]
540    fn test_ranges() {
541        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
542        let mut state = {
543            let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
544            ParseState::new(syntax)
545        };
546        let ts = ThemeSet::load_defaults();
547        let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
548
549        let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
550        let line = "module Bob::Wow::Troll::Five; 5; end";
551        let ops = state.parse_line(line, &ps).expect("#[cfg(test)]");
552        let iter = RangedHighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
553        let regions: Vec<(Style, &str, Range<usize>)> = iter.collect();
554        // println!("{:#?}", regions);
555        assert_eq!(regions[11],
556                   (Style {
557                       foreground: Color {
558                           r: 208,
559                           g: 135,
560                           b: 112,
561                           a: 0xFF,
562                       },
563                       background: Color {
564                           r: 43,
565                           g: 48,
566                           b: 59,
567                           a: 0xFF,
568                       },
569                       font_style: FontStyle::empty(),
570                   },
571                    "5", Range { start: 30, end: 31 }));
572    }
573}