syntect/
dumps.rs

1//! Methods for dumping serializable structs to a compressed binary format,
2//! used to allow fast startup times
3//!
4//! Currently syntect serializes [`SyntaxSet`] structs with [`dump_to_uncompressed_file`]
5//! into `.packdump` files and likewise [`ThemeSet`] structs to `.themedump` files with [`dump_to_file`].
6//!
7//! You can use these methods to manage your own caching of compiled syntaxes and
8//! themes. And even your own `serde::Serialize` structures if you want to
9//! be consistent with your format.
10//!
11//! [`SyntaxSet`]: ../parsing/struct.SyntaxSet.html
12//! [`dump_to_uncompressed_file`]: fn.dump_to_uncompressed_file.html
13//! [`ThemeSet`]: ../highlighting/struct.ThemeSet.html
14//! [`dump_to_file`]: fn.dump_to_file.html
15use bincode::Result;
16#[cfg(feature = "dump-load")]
17use bincode::deserialize_from;
18#[cfg(feature = "dump-create")]
19use bincode::serialize_into;
20use std::fs::File;
21#[cfg(feature = "dump-load")]
22use std::io::BufRead;
23#[cfg(feature = "dump-create")]
24use std::io::{BufWriter, Write};
25#[cfg(feature = "default-syntaxes")]
26use crate::parsing::SyntaxSet;
27#[cfg(feature = "default-themes")]
28use crate::highlighting::ThemeSet;
29use std::path::Path;
30#[cfg(feature = "dump-create")]
31use flate2::write::ZlibEncoder;
32#[cfg(feature = "dump-load")]
33use flate2::bufread::ZlibDecoder;
34#[cfg(feature = "dump-create")]
35use flate2::Compression;
36#[cfg(feature = "dump-create")]
37use serde::ser::Serialize;
38#[cfg(feature = "dump-load")]
39use serde::de::DeserializeOwned;
40
41/// Dumps an object to the given writer in a compressed binary format
42///
43/// The writer is encoded with the `bincode` crate and compressed with `flate2`.
44#[cfg(feature = "dump-create")]
45pub fn dump_to_writer<T: Serialize, W: Write>(to_dump: &T, output: W) -> Result<()> {
46    serialize_to_writer_impl(to_dump, output, true)
47}
48
49/// Dumps an object to a binary array in the same format as [`dump_to_writer`]
50///
51/// [`dump_to_writer`]: fn.dump_to_writer.html
52#[cfg(feature = "dump-create")]
53pub fn dump_binary<T: Serialize>(o: &T) -> Vec<u8> {
54    let mut v = Vec::new();
55    dump_to_writer(o, &mut v).unwrap();
56    v
57}
58
59/// Dumps an encodable object to a file at a given path, in the same format as [`dump_to_writer`]
60///
61/// If a file already exists at that path it will be overwritten. The files created are encoded with
62/// the `bincode` crate and then compressed with the `flate2` crate.
63///
64/// [`dump_to_writer`]: fn.dump_to_writer.html
65#[cfg(feature = "dump-create")]
66pub fn dump_to_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
67    let out = BufWriter::new(File::create(path)?);
68    dump_to_writer(o, out)
69}
70
71/// A helper function for decoding and decompressing data from a reader
72#[cfg(feature = "dump-load")]
73pub fn from_reader<T: DeserializeOwned, R: BufRead>(input: R) -> Result<T> {
74    deserialize_from_reader_impl(input, true)
75}
76
77/// Returns a fully loaded object from a binary dump.
78///
79/// This function panics if the dump is invalid.
80#[cfg(feature = "dump-load")]
81pub fn from_binary<T: DeserializeOwned>(v: &[u8]) -> T {
82    from_reader(v).unwrap()
83}
84
85/// Returns a fully loaded object from a binary dump file.
86#[cfg(feature = "dump-load")]
87pub fn from_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
88    let contents = std::fs::read(path)?;
89    from_reader(&contents[..])
90}
91
92/// To be used when serializing a [`SyntaxSet`] to a file. A [`SyntaxSet`]
93/// itself shall not be compressed, because the data for its lazy-loaded
94/// syntaxes are already compressed. Compressing another time just results in
95/// bad performance.
96#[cfg(feature = "dump-create")]
97pub fn dump_to_uncompressed_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
98    let out = BufWriter::new(File::create(path)?);
99    serialize_to_writer_impl(o, out, false)
100}
101
102/// To be used when deserializing a [`SyntaxSet`] that was previously written to
103/// file using [dump_to_uncompressed_file].
104#[cfg(feature = "dump-load")]
105pub fn from_uncompressed_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
106    let contents = std::fs::read(path)?;
107    deserialize_from_reader_impl(&contents[..], false)
108}
109
110/// To be used when deserializing a [`SyntaxSet`] from raw data, for example
111/// data that has been embedded in your own binary with the [`include_bytes!`]
112/// macro.
113#[cfg(feature = "dump-load")]
114pub fn from_uncompressed_data<T: DeserializeOwned>(v: &[u8]) -> Result<T> {
115    deserialize_from_reader_impl(v, false)
116}
117
118/// Private low level helper function used to implement the public API.
119#[cfg(feature = "dump-create")]
120fn serialize_to_writer_impl<T: Serialize, W: Write>(to_dump: &T, output: W, use_compression: bool) -> Result<()> {
121    if use_compression {
122        let mut encoder = ZlibEncoder::new(output, Compression::best());
123        serialize_into(&mut encoder, to_dump)
124    } else {
125        serialize_into(output, to_dump)
126    }
127}
128
129/// Private low level helper function used to implement the public API.
130#[cfg(feature = "dump-load")]
131fn deserialize_from_reader_impl<T: DeserializeOwned, R: BufRead>(input: R, use_compression: bool) -> Result<T> {
132    if use_compression {
133        let mut decoder = ZlibDecoder::new(input);
134        deserialize_from(&mut decoder)
135    } else {
136        deserialize_from(input)
137    }
138}
139
140#[cfg(feature = "default-syntaxes")]
141impl SyntaxSet {
142    /// Instantiates a new syntax set from a binary dump of Sublime Text's default open source
143    /// syntax definitions.
144    ///
145    /// These dumps are included in this library's binary for convenience.
146    ///
147    /// This method loads the version for parsing line strings with no `\n` characters at the end.
148    /// If you're able to efficiently include newlines at the end of strings, use
149    /// [`load_defaults_newlines`] since it works better. See [`SyntaxSetBuilder::add_from_folder`]
150    /// for more info on this issue.
151    ///
152    /// This is the recommended way of creating a syntax set for non-advanced use cases. It is also
153    /// significantly faster than loading the YAML files.
154    ///
155    /// Note that you can load additional syntaxes after doing this. If you want you can even use
156    /// the fact that SyntaxDefinitions are serializable with the bincode crate to cache dumps of
157    /// additional syntaxes yourself.
158    ///
159    /// [`load_defaults_newlines`]: #method.load_defaults_nonewlines
160    /// [`SyntaxSetBuilder::add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
161    pub fn load_defaults_nonewlines() -> SyntaxSet {
162
163        #[cfg(feature = "metadata")]
164        {
165            let mut ps: SyntaxSet = from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")).unwrap();
166            let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
167            ps.metadata = metadata;
168            ps
169        }
170        #[cfg(not(feature = "metadata"))]
171        {
172            from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")).unwrap()
173        }
174    }
175
176    /// Same as [`load_defaults_nonewlines`] but for parsing line strings with newlines at the end.
177    ///
178    /// These are separate methods because thanks to linker garbage collection, only the serialized
179    /// dumps for the method(s) you call will be included in the binary (each is ~200kb for now).
180    ///
181    /// [`load_defaults_nonewlines`]: #method.load_defaults_nonewlines
182    pub fn load_defaults_newlines() -> SyntaxSet {
183
184        #[cfg(feature = "metadata")]
185        {
186            let mut ps: SyntaxSet = from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")).unwrap();
187            let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
188            ps.metadata = metadata;
189            ps
190        }
191        #[cfg(not(feature = "metadata"))]
192        {
193            from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")).unwrap()
194        }
195    }
196}
197
198#[cfg(feature = "default-themes")]
199impl ThemeSet {
200    /// Loads the set of default themes
201    /// Currently includes (these are the keys for the map):
202    ///
203    /// - `base16-ocean.dark`,`base16-eighties.dark`,`base16-mocha.dark`,`base16-ocean.light`
204    /// - `InspiredGitHub` from [here](https://github.com/sethlopezme/InspiredGitHub.tmtheme)
205    /// - `Solarized (dark)` and `Solarized (light)`
206    pub fn load_defaults() -> ThemeSet {
207        from_binary(include_bytes!("../assets/default.themedump"))
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    #[cfg(all(feature = "yaml-load", feature = "dump-create", feature = "dump-load", feature = "parsing"))]
214    #[test]
215    fn can_dump_and_load() {
216        use super::*;
217        use crate::parsing::SyntaxSetBuilder;
218        let mut builder = SyntaxSetBuilder::new();
219        builder.add_from_folder("testdata/Packages", false).unwrap();
220        let ss = builder.build();
221
222        let bin = dump_binary(&ss);
223        println!("{:?}", bin.len());
224        let ss2: SyntaxSet = from_binary(&bin[..]);
225        assert_eq!(ss.syntaxes().len(), ss2.syntaxes().len());
226    }
227
228    #[cfg(all(feature = "yaml-load", feature = "dump-create", feature = "dump-load"))]
229    #[test]
230    fn dump_is_deterministic() {
231        use super::*;
232        use crate::parsing::SyntaxSetBuilder;
233
234        let mut builder1 = SyntaxSetBuilder::new();
235        builder1.add_from_folder("testdata/Packages", false).unwrap();
236        let ss1 = builder1.build();
237        let bin1 = dump_binary(&ss1);
238
239        let mut builder2 = SyntaxSetBuilder::new();
240        builder2.add_from_folder("testdata/Packages", false).unwrap();
241        let ss2 = builder2.build();
242        let bin2 = dump_binary(&ss2);
243        // This is redundant, but assert_eq! can be really slow on a large
244        // vector, so check the length first to fail faster.
245        assert_eq!(bin1.len(), bin2.len());
246        assert_eq!(bin1, bin2);
247    }
248
249    #[cfg(feature = "default-themes")]
250    #[test]
251    fn has_default_themes() {
252        use crate::highlighting::ThemeSet;
253        let themes = ThemeSet::load_defaults();
254        assert!(themes.themes.len() > 4);
255    }
256}