1use std::cmp::{min, Ordering};
3use std::collections::HashMap;
4use std::fmt;
5use std::mem;
6use std::str::FromStr;
7use std::sync::{Mutex, MutexGuard};
8
9use once_cell::sync::Lazy;
10use serde::de::{Deserialize, Deserializer, Error, Visitor};
11use serde::ser::{Serialize, Serializer};
12use serde_derive::{Deserialize, Serialize};
13
14#[derive(Debug, thiserror::Error)]
16#[non_exhaustive]
17pub enum ScopeError {
18 #[error("Tried to restore cleared scopes, but none were cleared")]
19 NoClearedScopesToRestore,
20}
21
22pub const ATOM_LEN_BITS: u16 = 3;
27
28#[deprecated(
34 since = "5.3.0",
35 note = "\
36 Deprecated in anticipation of removal in the next semver-breaking release under the \
37 justification that it's incredibly niche functionality to expose. If you rely on this \
38 functionality then please express your particular use-case in the github issue: \
39 https://github.com/trishume/syntect/issues/575\
40 "
41)]
42pub static SCOPE_REPO: Lazy<Mutex<ScopeRepository>> =
43 Lazy::new(|| Mutex::new(ScopeRepository::new()));
44
45pub(crate) fn lock_global_scope_repo() -> MutexGuard<'static, ScopeRepository> {
46 #[allow(deprecated)]
47 SCOPE_REPO.lock().unwrap()
48}
49
50#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Default, Hash)]
66pub struct Scope {
67 a: u64,
68 b: u64,
69}
70
71#[derive(Debug, thiserror::Error)]
73#[non_exhaustive]
74pub enum ParseScopeError {
75 #[error("Too long scope. Scopes can be at most 8 atoms long.")]
78 TooLong,
79 #[error("Too many atoms. Max 2^16-2 atoms allowed.")]
82 TooManyAtoms,
83}
84
85#[derive(Debug)]
97pub struct ScopeRepository {
98 atoms: Vec<String>,
99 atom_index_map: HashMap<String, usize>,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
114pub struct ScopeStack {
115 clear_stack: Vec<Vec<Scope>>,
116 pub scopes: Vec<Scope>,
117}
118
119#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
120pub enum ClearAmount {
121 TopN(usize),
122 All,
123}
124
125#[derive(Debug, Clone, PartialEq, Eq)]
134pub enum ScopeStackOp {
135 Push(Scope),
136 Pop(usize),
137 Clear(ClearAmount),
139 Restore,
141 Noop,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
148pub enum BasicScopeStackOp {
149 Push(Scope),
150 Pop,
151}
152
153fn pack_as_u16s(atoms: &[usize]) -> Result<Scope, ParseScopeError> {
154 let mut res = Scope { a: 0, b: 0 };
155
156 for (i, &n) in atoms.iter().enumerate() {
157 if n >= (u16::MAX as usize) - 2 {
158 return Err(ParseScopeError::TooManyAtoms);
159 }
160 let small = (n + 1) as u64; if i < 4 {
163 let shift = (3 - i) * 16;
164 res.a |= small << shift;
165 } else {
166 let shift = (7 - i) * 16;
167 res.b |= small << shift;
168 }
169 }
170 Ok(res)
171}
172
173impl ScopeRepository {
174 fn new() -> ScopeRepository {
175 ScopeRepository {
176 atoms: Vec::new(),
177 atom_index_map: HashMap::new(),
178 }
179 }
180
181 pub fn build(&mut self, s: &str) -> Result<Scope, ParseScopeError> {
182 if s.is_empty() {
183 return Ok(Scope { a: 0, b: 0 });
184 }
185 let parts: Vec<usize> = s
186 .trim_end_matches('.')
187 .split('.')
188 .map(|a| self.atom_to_index(a))
189 .collect();
190 if parts.len() > 8 {
191 return Err(ParseScopeError::TooManyAtoms);
192 }
193 pack_as_u16s(&parts[..])
194 }
195
196 pub fn to_string(&self, scope: Scope) -> String {
197 let mut s = String::new();
198 for i in 0..8 {
199 let atom_number = scope.atom_at(i);
200 if atom_number == 0 {
203 break;
204 }
205 if i != 0 {
206 s.push('.');
207 }
208 s.push_str(self.atom_str(atom_number));
209 }
210 s
211 }
212
213 fn atom_to_index(&mut self, atom: &str) -> usize {
214 if let Some(index) = self.atom_index_map.get(atom) {
215 return *index;
216 }
217
218 self.atoms.push(atom.to_owned());
219 let index = self.atoms.len() - 1;
220 self.atom_index_map.insert(atom.to_owned(), index);
221
222 index
223 }
224
225 pub fn atom_str(&self, atom_number: u16) -> &str {
229 &self.atoms[(atom_number - 1) as usize]
230 }
231}
232
233impl Scope {
234 pub fn new(s: &str) -> Result<Scope, ParseScopeError> {
238 let mut repo = lock_global_scope_repo();
239 repo.build(s.trim())
240 }
241
242 pub fn atom_at(self, index: usize) -> u16 {
247 #[allow(clippy::panic)]
248 let shifted = if index < 4 {
250 self.a >> ((3 - index) * 16)
251 } else if index < 8 {
252 self.b >> ((7 - index) * 16)
253 } else {
254 panic!("atom index out of bounds {:?}", index);
255 };
256 (shifted & 0xFFFF) as u16
257 }
258
259 #[inline]
260 fn missing_atoms(self) -> u32 {
261 let trail = if self.b == 0 {
262 self.a.trailing_zeros() + 64
263 } else {
264 self.b.trailing_zeros()
265 };
266 trail / 16
267 }
268
269 #[inline(always)]
271 pub fn len(self) -> u32 {
272 8 - self.missing_atoms()
273 }
274
275 pub fn is_empty(self) -> bool {
276 self.len() == 0
277 }
278
279 pub fn build_string(self) -> String {
283 let repo = lock_global_scope_repo();
284 repo.to_string(self)
285 }
286
287 pub fn is_prefix_of(self, s: Scope) -> bool {
311 let pref_missing = self.missing_atoms();
312
313 let mask: (u64, u64) = if pref_missing == 8 {
315 (0, 0)
316 } else if pref_missing == 4 {
317 (u64::MAX, 0)
318 } else if pref_missing > 4 {
319 (u64::MAX << ((pref_missing - 4) * 16), 0)
320 } else {
321 (u64::MAX, u64::MAX << (pref_missing * 16))
322 };
323
324 let ax = (self.a ^ s.a) & mask.0;
326 let bx = (self.b ^ s.b) & mask.1;
327 ax == 0 && bx == 0
331 }
332}
333
334impl FromStr for Scope {
335 type Err = ParseScopeError;
336
337 fn from_str(s: &str) -> Result<Scope, ParseScopeError> {
338 Scope::new(s)
339 }
340}
341
342impl fmt::Display for Scope {
343 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
344 let s = self.build_string();
345 write!(f, "{}", s)
346 }
347}
348
349impl fmt::Debug for Scope {
350 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351 let s = self.build_string();
352 write!(f, "<{}>", s)
353 }
354}
355
356impl Serialize for Scope {
357 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
358 where
359 S: Serializer,
360 {
361 let s = self.build_string();
362 serializer.serialize_str(&s)
363 }
364}
365
366impl<'de> Deserialize<'de> for Scope {
367 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
368 where
369 D: Deserializer<'de>,
370 {
371 struct ScopeVisitor;
372
373 impl Visitor<'_> for ScopeVisitor {
374 type Value = Scope;
375
376 fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
377 formatter.write_str("a string")
378 }
379
380 fn visit_str<E>(self, v: &str) -> Result<Scope, E>
381 where
382 E: Error,
383 {
384 Scope::new(v).map_err(|e| Error::custom(format!("Invalid scope: {:?}", e)))
385 }
386 }
387
388 deserializer.deserialize_str(ScopeVisitor)
389 }
390}
391
392#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
395pub struct MatchPower(pub f64);
396
397impl Eq for MatchPower {}
398
399#[allow(clippy::derive_ord_xor_partial_ord)] impl Ord for MatchPower {
401 fn cmp(&self, other: &Self) -> Ordering {
402 self.partial_cmp(other).unwrap()
403 }
404}
405
406impl ScopeStack {
407 pub fn new() -> ScopeStack {
408 ScopeStack {
409 clear_stack: Vec::new(),
410 scopes: Vec::new(),
411 }
412 }
413
414 pub fn from_vec(v: Vec<Scope>) -> ScopeStack {
417 ScopeStack {
418 clear_stack: Vec::new(),
419 scopes: v,
420 }
421 }
422
423 #[inline]
424 pub fn push(&mut self, s: Scope) {
425 self.scopes.push(s);
426 }
427
428 #[inline]
429 pub fn pop(&mut self) {
430 self.scopes.pop();
431 }
432
433 pub fn apply(&mut self, op: &ScopeStackOp) -> Result<(), ScopeError> {
437 self.apply_with_hook(op, |_, _| {})
438 }
439
440 #[inline]
448 pub fn apply_with_hook<F>(&mut self, op: &ScopeStackOp, mut hook: F) -> Result<(), ScopeError>
449 where
450 F: FnMut(BasicScopeStackOp, &[Scope]),
451 {
452 match *op {
453 ScopeStackOp::Push(scope) => {
454 self.scopes.push(scope);
455 hook(BasicScopeStackOp::Push(scope), self.as_slice());
456 }
457 ScopeStackOp::Pop(count) => {
458 for _ in 0..count {
459 self.scopes.pop();
460 hook(BasicScopeStackOp::Pop, self.as_slice());
461 }
462 }
463 ScopeStackOp::Clear(amount) => {
464 let cleared = match amount {
465 ClearAmount::TopN(n) => {
466 let to_leave = self.scopes.len() - min(n, self.scopes.len());
468 self.scopes.split_off(to_leave)
469 }
470 ClearAmount::All => {
471 let mut cleared = Vec::new();
472 mem::swap(&mut cleared, &mut self.scopes);
473 cleared
474 }
475 };
476 let clear_amount = cleared.len();
477 self.clear_stack.push(cleared);
478 for _ in 0..clear_amount {
479 hook(BasicScopeStackOp::Pop, self.as_slice());
480 }
481 }
482 ScopeStackOp::Restore => match self.clear_stack.pop() {
483 Some(ref mut to_push) => {
484 for s in to_push {
485 self.scopes.push(*s);
486 hook(BasicScopeStackOp::Push(*s), self.as_slice());
487 }
488 }
489 None => return Err(ScopeError::NoClearedScopesToRestore),
490 },
491 ScopeStackOp::Noop => (),
492 }
493
494 Ok(())
495 }
496
497 pub fn debug_print(&self, repo: &ScopeRepository) {
500 for s in &self.scopes {
501 print!("{} ", repo.to_string(*s));
502 }
503 println!();
504 }
505
506 pub fn bottom_n(&self, n: usize) -> &[Scope] {
510 &self.scopes[0..n]
511 }
512
513 #[inline]
515 pub fn as_slice(&self) -> &[Scope] {
516 &self.scopes[..]
517 }
518
519 #[inline]
521 pub fn len(&self) -> usize {
522 self.scopes.len()
523 }
524
525 #[inline]
526 pub fn is_empty(&self) -> bool {
527 self.len() == 0
528 }
529
530 pub fn does_match(&self, stack: &[Scope]) -> Option<MatchPower> {
552 let mut sel_index: usize = 0;
553 let mut score: f64 = 0.0;
554 for (i, scope) in stack.iter().enumerate() {
555 let sel_scope = self.scopes[sel_index];
556 if sel_scope.is_prefix_of(*scope) {
557 let len = sel_scope.len();
558 score += f64::from(len) * f64::from(ATOM_LEN_BITS * (i as u16)).exp2();
560 sel_index += 1;
561 if sel_index >= self.scopes.len() {
562 return Some(MatchPower(score));
563 }
564 }
565 }
566 None
567 }
568}
569
570impl FromStr for ScopeStack {
571 type Err = ParseScopeError;
572
573 fn from_str(s: &str) -> Result<ScopeStack, ParseScopeError> {
575 let mut scopes = Vec::new();
576 for name in s.split_whitespace() {
577 scopes.push(Scope::from_str(name)?)
578 }
579 Ok(ScopeStack::from_vec(scopes))
580 }
581}
582
583impl fmt::Display for ScopeStack {
584 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
585 for s in &self.scopes {
586 write!(f, "{} ", s)?;
587 }
588 Ok(())
589 }
590}
591
592#[cfg(test)]
593mod tests {
594 use super::*;
595
596 #[test]
597 fn misc() {
598 }
604
605 #[test]
606 fn repo_works() {
607 let mut repo = ScopeRepository::new();
608 assert_eq!(
609 repo.build("source.php").unwrap(),
610 repo.build("source.php").unwrap()
611 );
612 assert_eq!(
613 repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap(),
614 repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap()
615 );
616 assert_eq!(repo.build("").unwrap(), repo.build("").unwrap());
617 let s1 = repo.build("").unwrap();
618 assert_eq!(repo.to_string(s1), "");
619 let s2 = repo.build("source.php.wow").unwrap();
620 assert_eq!(repo.to_string(s2), "source.php.wow");
621 assert!(repo.build("source.php").unwrap() != repo.build("source.perl").unwrap());
622 assert!(repo.build("source.php").unwrap() != repo.build("source.php.wagon").unwrap());
623 assert_eq!(
624 repo.build("comment.line.").unwrap(),
625 repo.build("comment.line").unwrap()
626 );
627 }
628
629 #[test]
630 fn global_repo_works() {
631 use std::str::FromStr;
632 assert_eq!(
633 Scope::new("source.php").unwrap(),
634 Scope::new("source.php").unwrap()
635 );
636 assert!(Scope::from_str("1.2.3.4.5.6.7.8").is_ok());
637 assert!(Scope::from_str("1.2.3.4.5.6.7.8.9").is_err());
638 }
639
640 #[test]
641 fn prefixes_work() {
642 assert!(Scope::new("1.2.3.4.5.6.7.8")
643 .unwrap()
644 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
645 assert!(Scope::new("1.2.3.4.5.6")
646 .unwrap()
647 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
648 assert!(Scope::new("1.2.3.4")
649 .unwrap()
650 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
651 assert!(!Scope::new("1.2.3.4.5.6.a")
652 .unwrap()
653 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
654 assert!(!Scope::new("1.2.a.4.5.6.7")
655 .unwrap()
656 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
657 assert!(!Scope::new("1.2.a.4.5.6.7")
658 .unwrap()
659 .is_prefix_of(Scope::new("1.2.3.4.5").unwrap()));
660 assert!(!Scope::new("1.2.a")
661 .unwrap()
662 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
663 }
664
665 #[test]
666 fn matching_works() {
667 use std::str::FromStr;
668 assert_eq!(
669 ScopeStack::from_str("string")
670 .unwrap()
671 .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
672 Some(MatchPower(0o1u64 as f64))
673 );
674 assert_eq!(
675 ScopeStack::from_str("source")
676 .unwrap()
677 .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
678 None
679 );
680 assert_eq!(
681 ScopeStack::from_str("a.b e.f")
682 .unwrap()
683 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
684 Some(MatchPower(0o202u64 as f64))
685 );
686 assert_eq!(
687 ScopeStack::from_str("c e.f")
688 .unwrap()
689 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
690 Some(MatchPower(0o210u64 as f64))
691 );
692 assert_eq!(
693 ScopeStack::from_str("c.d e.f")
694 .unwrap()
695 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
696 Some(MatchPower(0o220u64 as f64))
697 );
698 assert_eq!(
699 ScopeStack::from_str("a.b c e.f")
700 .unwrap()
701 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
702 Some(MatchPower(0o212u64 as f64))
703 );
704 assert_eq!(
705 ScopeStack::from_str("a c.d")
706 .unwrap()
707 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
708 Some(MatchPower(0o021u64 as f64))
709 );
710 assert_eq!(
711 ScopeStack::from_str("a c.d.e")
712 .unwrap()
713 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
714 None
715 );
716 }
717}