1use super::regex::{Regex, Region};
8use super::{scope::*, ParsingError};
9use crate::parsing::syntax_set::SyntaxSet;
10use regex_syntax::escape;
11use serde::ser::{Serialize, Serializer};
12use serde_derive::{Deserialize, Serialize};
13use std::collections::{BTreeMap, HashMap};
14use std::hash::Hash;
15
16pub type CaptureMapping = Vec<(usize, Vec<Scope>)>;
17
18#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
20pub struct ContextId {
21 pub(crate) syntax_index: usize,
23
24 pub(crate) context_index: usize,
26}
27
28#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
37pub struct SyntaxDefinition {
38 pub name: String,
39 pub file_extensions: Vec<String>,
40 pub scope: Scope,
41 pub first_line_match: Option<String>,
42 pub hidden: bool,
43 #[serde(serialize_with = "ordered_map")]
44 pub variables: HashMap<String, String>,
45 #[serde(serialize_with = "ordered_map")]
46 pub contexts: HashMap<String, Context>,
47}
48
49#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
50pub struct Context {
51 pub meta_scope: Vec<Scope>,
52 pub meta_content_scope: Vec<Scope>,
53 pub meta_include_prototype: bool,
56 pub clear_scopes: Option<ClearAmount>,
57 pub prototype: Option<ContextId>,
61 pub uses_backrefs: bool,
62
63 pub patterns: Vec<Pattern>,
64}
65
66impl Context {
67 pub fn new(meta_include_prototype: bool) -> Context {
68 Context {
69 meta_scope: Vec::new(),
70 meta_content_scope: Vec::new(),
71 meta_include_prototype,
72 clear_scopes: None,
73 uses_backrefs: false,
74 patterns: Vec::new(),
75 prototype: None,
76 }
77 }
78}
79
80#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
81pub enum Pattern {
82 Match(MatchPattern),
83 Include(ContextReference),
84}
85
86#[derive(Debug)]
90pub struct MatchIter<'a> {
91 syntax_set: &'a SyntaxSet,
92 ctx_stack: Vec<&'a Context>,
93 index_stack: Vec<usize>,
94}
95
96#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
97pub struct MatchPattern {
98 pub has_captures: bool,
99 pub regex: Regex,
100 pub scope: Vec<Scope>,
101 pub captures: Option<CaptureMapping>,
102 pub operation: MatchOperation,
103 pub with_prototype: Option<ContextReference>,
104}
105
106#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
107#[non_exhaustive]
108pub enum ContextReference {
109 #[non_exhaustive]
110 Named(String),
111 #[non_exhaustive]
112 ByScope {
113 scope: Scope,
114 sub_context: Option<String>,
115 with_escape: bool,
120 },
121 #[non_exhaustive]
122 File {
123 name: String,
124 sub_context: Option<String>,
125 with_escape: bool,
127 },
128 #[non_exhaustive]
129 Inline(String),
130 #[non_exhaustive]
131 Direct(ContextId),
132}
133
134#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
135pub enum MatchOperation {
136 Push(Vec<ContextReference>),
137 Set(Vec<ContextReference>),
138 Pop,
139 None,
140}
141
142impl<'a> Iterator for MatchIter<'a> {
143 type Item = (&'a Context, usize);
144
145 fn next(&mut self) -> Option<(&'a Context, usize)> {
146 loop {
147 if self.ctx_stack.is_empty() {
148 return None;
149 }
150 let last_index = self.ctx_stack.len() - 1;
155 let context = self.ctx_stack[last_index];
156 let index = self.index_stack[last_index];
157 self.index_stack[last_index] = index + 1;
158 if index < context.patterns.len() {
159 match context.patterns[index] {
160 Pattern::Match(_) => {
161 return Some((context, index));
162 }
163 Pattern::Include(ref ctx_ref) => {
164 let ctx_ptr = match *ctx_ref {
165 ContextReference::Direct(ref context_id) => {
166 self.syntax_set.get_context(context_id).unwrap()
167 }
168 _ => return self.next(), };
170 self.ctx_stack.push(ctx_ptr);
171 self.index_stack.push(0);
172 }
173 }
174 } else {
175 self.ctx_stack.pop();
176 self.index_stack.pop();
177 }
178 }
179 }
180}
181
182pub fn context_iter<'a>(syntax_set: &'a SyntaxSet, context: &'a Context) -> MatchIter<'a> {
187 MatchIter {
188 syntax_set,
189 ctx_stack: vec![context],
190 index_stack: vec![0],
191 }
192}
193
194impl Context {
195 pub fn match_at(&self, index: usize) -> Result<&MatchPattern, ParsingError> {
197 match self.patterns[index] {
198 Pattern::Match(ref match_pat) => Ok(match_pat),
199 _ => Err(ParsingError::BadMatchIndex(index)),
200 }
201 }
202}
203
204impl ContextReference {
205 pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> Result<&'a Context, ParsingError> {
207 match *self {
208 ContextReference::Direct(ref context_id) => syntax_set.get_context(context_id),
209 _ => Err(ParsingError::UnresolvedContextReference(self.clone())),
210 }
211 }
212
213 pub fn id(&self) -> Result<ContextId, ParsingError> {
215 match *self {
216 ContextReference::Direct(ref context_id) => Ok(*context_id),
217 _ => Err(ParsingError::UnresolvedContextReference(self.clone())),
218 }
219 }
220}
221
222pub(crate) fn substitute_backrefs_in_regex<F>(regex_str: &str, substituter: F) -> String
223where
224 F: Fn(usize) -> Option<String>,
225{
226 let mut reg_str = String::with_capacity(regex_str.len());
227
228 let mut last_was_escape = false;
229 for c in regex_str.chars() {
230 if last_was_escape && c.is_ascii_digit() {
231 let val = c.to_digit(10).unwrap() as usize;
232 if let Some(sub) = substituter(val) {
233 reg_str.push_str(&sub);
234 }
235 } else if last_was_escape {
236 reg_str.push('\\');
237 reg_str.push(c);
238 } else if c != '\\' {
239 reg_str.push(c);
240 }
241
242 last_was_escape = c == '\\' && !last_was_escape;
243 }
244 if last_was_escape {
245 reg_str.push('\\');
246 }
247 reg_str
248}
249
250impl MatchPattern {
251 pub fn new(
252 has_captures: bool,
253 regex_str: String,
254 scope: Vec<Scope>,
255 captures: Option<CaptureMapping>,
256 operation: MatchOperation,
257 with_prototype: Option<ContextReference>,
258 ) -> MatchPattern {
259 MatchPattern {
260 has_captures,
261 regex: Regex::new(regex_str),
262 scope,
263 captures,
264 operation,
265 with_prototype,
266 }
267 }
268
269 pub fn regex_with_refs(&self, region: &Region, text: &str) -> Regex {
272 let new_regex = substitute_backrefs_in_regex(self.regex.regex_str(), |i| {
273 region.pos(i).map(|(start, end)| escape(&text[start..end]))
274 });
275
276 Regex::new(new_regex)
277 }
278
279 pub fn regex(&self) -> &Regex {
280 &self.regex
281 }
282}
283
284pub(crate) fn ordered_map<K, V, S>(map: &HashMap<K, V>, serializer: S) -> Result<S::Ok, S::Error>
286where
287 S: Serializer,
288 K: Eq + Hash + Ord + Serialize,
289 V: Serialize,
290{
291 let ordered: BTreeMap<_, _> = map.iter().collect();
292 ordered.serialize(serializer)
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 #[test]
300 fn can_compile_refs() {
301 let pat = MatchPattern {
302 has_captures: true,
303 regex: Regex::new(r"lol \\ \2 \1 '\9' \wz".into()),
304 scope: vec![],
305 captures: None,
306 operation: MatchOperation::None,
307 with_prototype: None,
308 };
309 let r = Regex::new(r"(\\\[\]\(\))(b)(c)(d)(e)".into());
310 let s = r"\[]()bcde";
311 let mut region = Region::new();
312 let matched = r.search(s, 0, s.len(), Some(&mut region));
313 assert!(matched);
314
315 let regex_with_refs = pat.regex_with_refs(®ion, s);
316 assert_eq!(regex_with_refs.regex_str(), r"lol \\ b \\\[\]\(\) '' \wz");
317 }
318}