similar/text/
utils.rs

1use std::collections::HashMap;
2use std::hash::Hash;
3
4use super::DiffableStrRef;
5
6// quick and dirty way to get an upper sequence ratio.
7pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
8    let n = seq1.len() + seq2.len();
9    if n == 0 {
10        1.0
11    } else {
12        2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
13    }
14}
15
16/// Internal utility to calculate an upper bound for a ratio for
17/// [`get_close_matches`].  This is based on Python's difflib approach
18/// of considering the two sets to be multisets.
19///
20/// It counts the number of matches without regard to order, which is an
21/// obvious upper bound.
22pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
23
24impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
25    pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
26        let mut counts = HashMap::new();
27        for &word in seq {
28            *counts.entry(word).or_insert(0) += 1;
29        }
30        QuickSeqRatio(counts)
31    }
32
33    pub fn calc(&self, seq: &[&T]) -> f32 {
34        let n = self.0.len() + seq.len();
35        if n == 0 {
36            return 1.0;
37        }
38
39        let mut available = HashMap::new();
40        let mut matches = 0;
41        for &word in seq {
42            let x = if let Some(count) = available.get(&word) {
43                *count
44            } else {
45                self.0.get(&word).copied().unwrap_or(0)
46            };
47            available.insert(word, x - 1);
48            if x > 0 {
49                matches += 1;
50            }
51        }
52
53        2.0 * matches as f32 / n as f32
54    }
55}