syntect/
dumps.rs

1//! Methods for dumping serializable structs to a compressed binary format,
2//! used to allow fast startup times
3//!
4//! Currently syntect serializes [`SyntaxSet`] structs with [`dump_to_uncompressed_file`]
5//! into `.packdump` files and likewise [`ThemeSet`] structs to `.themedump` files with [`dump_to_file`].
6//!
7//! You can use these methods to manage your own caching of compiled syntaxes and
8//! themes. And even your own `serde::Serialize` structures if you want to
9//! be consistent with your format.
10//!
11//! [`SyntaxSet`]: ../parsing/struct.SyntaxSet.html
12//! [`dump_to_uncompressed_file`]: fn.dump_to_uncompressed_file.html
13//! [`ThemeSet`]: ../highlighting/struct.ThemeSet.html
14//! [`dump_to_file`]: fn.dump_to_file.html
15#[cfg(feature = "default-themes")]
16use crate::highlighting::ThemeSet;
17#[cfg(feature = "default-syntaxes")]
18use crate::parsing::SyntaxSet;
19#[cfg(feature = "dump-load")]
20use bincode::deserialize_from;
21#[cfg(feature = "dump-create")]
22use bincode::serialize_into;
23use bincode::Result;
24#[cfg(feature = "dump-load")]
25use flate2::bufread::ZlibDecoder;
26#[cfg(feature = "dump-create")]
27use flate2::write::ZlibEncoder;
28#[cfg(feature = "dump-create")]
29use flate2::Compression;
30#[cfg(feature = "dump-load")]
31use serde::de::DeserializeOwned;
32#[cfg(feature = "dump-create")]
33use serde::ser::Serialize;
34use std::fs::File;
35#[cfg(feature = "dump-load")]
36use std::io::BufRead;
37#[cfg(feature = "dump-create")]
38use std::io::{BufWriter, Write};
39use std::path::Path;
40
41/// Dumps an object to the given writer in a compressed binary format
42///
43/// The writer is encoded with the `bincode` crate and compressed with `flate2`.
44#[cfg(feature = "dump-create")]
45pub fn dump_to_writer<T: Serialize, W: Write>(to_dump: &T, output: W) -> Result<()> {
46    serialize_to_writer_impl(to_dump, output, true)
47}
48
49/// Dumps an object to a binary array in the same format as [`dump_to_writer`]
50///
51/// [`dump_to_writer`]: fn.dump_to_writer.html
52#[cfg(feature = "dump-create")]
53pub fn dump_binary<T: Serialize>(o: &T) -> Vec<u8> {
54    let mut v = Vec::new();
55    dump_to_writer(o, &mut v).unwrap();
56    v
57}
58
59/// Dumps an encodable object to a file at a given path, in the same format as [`dump_to_writer`]
60///
61/// If a file already exists at that path it will be overwritten. The files created are encoded with
62/// the `bincode` crate and then compressed with the `flate2` crate.
63///
64/// [`dump_to_writer`]: fn.dump_to_writer.html
65#[cfg(feature = "dump-create")]
66pub fn dump_to_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
67    let out = BufWriter::new(File::create(path)?);
68    dump_to_writer(o, out)
69}
70
71/// A helper function for decoding and decompressing data from a reader
72#[cfg(feature = "dump-load")]
73pub fn from_reader<T: DeserializeOwned, R: BufRead>(input: R) -> Result<T> {
74    deserialize_from_reader_impl(input, true)
75}
76
77/// Returns a fully loaded object from a binary dump.
78///
79/// This function panics if the dump is invalid.
80#[cfg(feature = "dump-load")]
81pub fn from_binary<T: DeserializeOwned>(v: &[u8]) -> T {
82    from_reader(v).unwrap()
83}
84
85/// Returns a fully loaded object from a binary dump file.
86#[cfg(feature = "dump-load")]
87pub fn from_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
88    let contents = std::fs::read(path)?;
89    from_reader(&contents[..])
90}
91
92/// To be used when serializing a [`SyntaxSet`] to a file. A [`SyntaxSet`]
93/// itself shall not be compressed, because the data for its lazy-loaded
94/// syntaxes are already compressed. Compressing another time just results in
95/// bad performance.
96#[cfg(feature = "dump-create")]
97pub fn dump_to_uncompressed_file<T: Serialize, P: AsRef<Path>>(o: &T, path: P) -> Result<()> {
98    let out = BufWriter::new(File::create(path)?);
99    serialize_to_writer_impl(o, out, false)
100}
101
102/// To be used when deserializing a [`SyntaxSet`] that was previously written to
103/// file using [dump_to_uncompressed_file].
104#[cfg(feature = "dump-load")]
105pub fn from_uncompressed_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
106    let contents = std::fs::read(path)?;
107    deserialize_from_reader_impl(&contents[..], false)
108}
109
110/// To be used when deserializing a [`SyntaxSet`] from raw data, for example
111/// data that has been embedded in your own binary with the [`include_bytes!`]
112/// macro.
113#[cfg(feature = "dump-load")]
114pub fn from_uncompressed_data<T: DeserializeOwned>(v: &[u8]) -> Result<T> {
115    deserialize_from_reader_impl(v, false)
116}
117
118/// Private low level helper function used to implement the public API.
119#[cfg(feature = "dump-create")]
120fn serialize_to_writer_impl<T: Serialize, W: Write>(
121    to_dump: &T,
122    output: W,
123    use_compression: bool,
124) -> Result<()> {
125    if use_compression {
126        let mut encoder = std::io::BufWriter::new(ZlibEncoder::new(output, Compression::best()));
127        serialize_into(&mut encoder, to_dump)
128    } else {
129        serialize_into(output, to_dump)
130    }
131}
132
133/// Private low level helper function used to implement the public API.
134#[cfg(feature = "dump-load")]
135fn deserialize_from_reader_impl<T: DeserializeOwned, R: BufRead>(
136    input: R,
137    use_compression: bool,
138) -> Result<T> {
139    if use_compression {
140        let mut decoder = ZlibDecoder::new(input);
141        deserialize_from(&mut decoder)
142    } else {
143        deserialize_from(input)
144    }
145}
146
147#[cfg(feature = "default-syntaxes")]
148impl SyntaxSet {
149    /// Instantiates a new syntax set from a binary dump of Sublime Text's default open source
150    /// syntax definitions.
151    ///
152    /// These dumps are included in this library's binary for convenience.
153    ///
154    /// This method loads the version for parsing line strings with no `\n` characters at the end.
155    /// If you're able to efficiently include newlines at the end of strings, use
156    /// [`load_defaults_newlines`] since it works better. See [`SyntaxSetBuilder::add_from_folder`]
157    /// for more info on this issue.
158    ///
159    /// This is the recommended way of creating a syntax set for non-advanced use cases. It is also
160    /// significantly faster than loading the YAML files.
161    ///
162    /// Note that you can load additional syntaxes after doing this. If you want you can even use
163    /// the fact that SyntaxDefinitions are serializable with the bincode crate to cache dumps of
164    /// additional syntaxes yourself.
165    ///
166    /// [`load_defaults_newlines`]: #method.load_defaults_nonewlines
167    /// [`SyntaxSetBuilder::add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
168    pub fn load_defaults_nonewlines() -> SyntaxSet {
169        #[cfg(feature = "metadata")]
170        {
171            let mut ps: SyntaxSet =
172                from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump"))
173                    .unwrap();
174            let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
175            ps.metadata = metadata;
176            ps
177        }
178        #[cfg(not(feature = "metadata"))]
179        {
180            from_uncompressed_data(include_bytes!("../assets/default_nonewlines.packdump")).unwrap()
181        }
182    }
183
184    /// Same as [`load_defaults_nonewlines`] but for parsing line strings with newlines at the end.
185    ///
186    /// These are separate methods because thanks to linker garbage collection, only the serialized
187    /// dumps for the method(s) you call will be included in the binary (each is ~200kb for now).
188    ///
189    /// [`load_defaults_nonewlines`]: #method.load_defaults_nonewlines
190    pub fn load_defaults_newlines() -> SyntaxSet {
191        #[cfg(feature = "metadata")]
192        {
193            let mut ps: SyntaxSet =
194                from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump"))
195                    .unwrap();
196            let metadata = from_binary(include_bytes!("../assets/default_metadata.packdump"));
197            ps.metadata = metadata;
198            ps
199        }
200        #[cfg(not(feature = "metadata"))]
201        {
202            from_uncompressed_data(include_bytes!("../assets/default_newlines.packdump")).unwrap()
203        }
204    }
205}
206
207#[cfg(feature = "default-themes")]
208impl ThemeSet {
209    /// Loads the set of default themes
210    /// Currently includes (these are the keys for the map):
211    ///
212    /// - `base16-ocean.dark`,`base16-eighties.dark`,`base16-mocha.dark`,`base16-ocean.light`
213    /// - `InspiredGitHub` from [here](https://github.com/sethlopezme/InspiredGitHub.tmtheme)
214    /// - `Solarized (dark)` and `Solarized (light)`
215    pub fn load_defaults() -> ThemeSet {
216        from_binary(include_bytes!("../assets/default.themedump"))
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    #[cfg(all(
223        feature = "yaml-load",
224        feature = "dump-create",
225        feature = "dump-load",
226        feature = "parsing"
227    ))]
228    #[test]
229    fn can_dump_and_load() {
230        use super::*;
231        use crate::utils::testdata;
232
233        let ss = &*testdata::PACKAGES_SYN_SET;
234
235        let bin = dump_binary(&ss);
236        println!("{:?}", bin.len());
237        let ss2: SyntaxSet = from_binary(&bin[..]);
238        assert_eq!(ss.syntaxes().len(), ss2.syntaxes().len());
239    }
240
241    #[cfg(all(feature = "yaml-load", feature = "dump-create", feature = "dump-load"))]
242    #[test]
243    fn dump_is_deterministic() {
244        use super::*;
245        use crate::parsing::SyntaxSetBuilder;
246        use crate::utils::testdata;
247
248        let ss1 = &*testdata::PACKAGES_SYN_SET;
249        let bin1 = dump_binary(&ss1);
250
251        let mut builder2 = SyntaxSetBuilder::new();
252        builder2
253            .add_from_folder("testdata/Packages", false)
254            .unwrap();
255        let ss2 = builder2.build();
256        let bin2 = dump_binary(&ss2);
257        // This is redundant, but assert_eq! can be really slow on a large
258        // vector, so check the length first to fail faster.
259        assert_eq!(bin1.len(), bin2.len());
260        assert_eq!(bin1, bin2);
261    }
262
263    #[cfg(feature = "default-themes")]
264    #[test]
265    fn has_default_themes() {
266        use crate::highlighting::ThemeSet;
267        let themes = ThemeSet::load_defaults();
268        assert!(themes.themes.len() > 4);
269    }
270}