similar/algorithms/
lcs.rs

1//! LCS diff algorithm.
2//!
3//! * time: `O((NM)D log (M)D)`
4//! * space `O(MN)`
5use std::collections::BTreeMap;
6use std::ops::{Index, Range};
7
8use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range};
9use crate::algorithms::DiffHook;
10use crate::deadline_support::{deadline_exceeded, Instant};
11
12/// LCS diff algorithm.
13///
14/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
15///
16/// This diff is done with an optional deadline that defines the maximal
17/// execution time permitted before it bails and falls back to an very bad
18/// approximation.  Deadlines with LCS do not make a lot of sense and should
19/// not be used.
20pub fn diff<Old, New, D>(
21    d: &mut D,
22    old: &Old,
23    old_range: Range<usize>,
24    new: &New,
25    new_range: Range<usize>,
26) -> Result<(), D::Error>
27where
28    Old: Index<usize> + ?Sized,
29    New: Index<usize> + ?Sized,
30    D: DiffHook,
31    New::Output: PartialEq<Old::Output>,
32{
33    diff_deadline(d, old, old_range, new, new_range, None)
34}
35
36/// LCS diff algorithm.
37///
38/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
39///
40/// This diff is done with an optional deadline that defines the maximal
41/// execution time permitted before it bails and falls back to an approximation.
42pub fn diff_deadline<Old, New, D>(
43    d: &mut D,
44    old: &Old,
45    old_range: Range<usize>,
46    new: &New,
47    new_range: Range<usize>,
48    deadline: Option<Instant>,
49) -> Result<(), D::Error>
50where
51    Old: Index<usize> + ?Sized,
52    New: Index<usize> + ?Sized,
53    D: DiffHook,
54    New::Output: PartialEq<Old::Output>,
55{
56    if is_empty_range(&new_range) {
57        d.delete(old_range.start, old_range.len(), new_range.start)?;
58        d.finish()?;
59        return Ok(());
60    } else if is_empty_range(&old_range) {
61        d.insert(old_range.start, new_range.start, new_range.len())?;
62        d.finish()?;
63        return Ok(());
64    }
65
66    let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
67    let common_suffix_len = common_suffix_len(
68        old,
69        old_range.start + common_prefix_len..old_range.end,
70        new,
71        new_range.start + common_prefix_len..new_range.end,
72    );
73
74    // If the sequences are not different then we're done
75    if common_prefix_len == old_range.len() && (old_range.len() == new_range.len()) {
76        d.equal(0, 0, old_range.len())?;
77        d.finish()?;
78        return Ok(());
79    }
80
81    let maybe_table = make_table(
82        old,
83        common_prefix_len..(old_range.len() - common_suffix_len),
84        new,
85        common_prefix_len..(new_range.len() - common_suffix_len),
86        deadline,
87    );
88    let mut old_idx = 0;
89    let mut new_idx = 0;
90    let new_len = new_range.len() - common_prefix_len - common_suffix_len;
91    let old_len = old_range.len() - common_prefix_len - common_suffix_len;
92
93    if common_prefix_len > 0 {
94        d.equal(old_range.start, new_range.start, common_prefix_len)?;
95    }
96
97    if let Some(table) = maybe_table {
98        while new_idx < new_len && old_idx < old_len {
99            let old_orig_idx = old_range.start + common_prefix_len + old_idx;
100            let new_orig_idx = new_range.start + common_prefix_len + new_idx;
101
102            if new[new_orig_idx] == old[old_orig_idx] {
103                d.equal(old_orig_idx, new_orig_idx, 1)?;
104                old_idx += 1;
105                new_idx += 1;
106            } else if table.get(&(new_idx, old_idx + 1)).unwrap_or(&0)
107                >= table.get(&(new_idx + 1, old_idx)).unwrap_or(&0)
108            {
109                d.delete(old_orig_idx, 1, new_orig_idx)?;
110                old_idx += 1;
111            } else {
112                d.insert(old_orig_idx, new_orig_idx, 1)?;
113                new_idx += 1;
114            }
115        }
116    } else {
117        let old_orig_idx = old_range.start + common_prefix_len + old_idx;
118        let new_orig_idx = new_range.start + common_prefix_len + new_idx;
119        d.delete(old_orig_idx, old_len, new_orig_idx)?;
120        d.insert(old_orig_idx, new_orig_idx, new_len)?;
121    }
122
123    if old_idx < old_len {
124        d.delete(
125            old_range.start + common_prefix_len + old_idx,
126            old_len - old_idx,
127            new_range.start + common_prefix_len + new_idx,
128        )?;
129        old_idx += old_len - old_idx;
130    }
131
132    if new_idx < new_len {
133        d.insert(
134            old_range.start + common_prefix_len + old_idx,
135            new_range.start + common_prefix_len + new_idx,
136            new_len - new_idx,
137        )?;
138    }
139
140    if common_suffix_len > 0 {
141        d.equal(
142            old_range.start + old_len + common_prefix_len,
143            new_range.start + new_len + common_prefix_len,
144            common_suffix_len,
145        )?;
146    }
147
148    d.finish()
149}
150
151fn make_table<Old, New>(
152    old: &Old,
153    old_range: Range<usize>,
154    new: &New,
155    new_range: Range<usize>,
156    deadline: Option<Instant>,
157) -> Option<BTreeMap<(usize, usize), u32>>
158where
159    Old: Index<usize> + ?Sized,
160    New: Index<usize> + ?Sized,
161    New::Output: PartialEq<Old::Output>,
162{
163    let old_len = old_range.len();
164    let new_len = new_range.len();
165    let mut table = BTreeMap::new();
166
167    for i in (0..new_len).rev() {
168        // are we running for too long?  give up on the table
169        if deadline_exceeded(deadline) {
170            return None;
171        }
172
173        for j in (0..old_len).rev() {
174            let val = if new[i] == old[j] {
175                table.get(&(i + 1, j + 1)).unwrap_or(&0) + 1
176            } else {
177                *table
178                    .get(&(i + 1, j))
179                    .unwrap_or(&0)
180                    .max(table.get(&(i, j + 1)).unwrap_or(&0))
181            };
182            if val > 0 {
183                table.insert((i, j), val);
184            }
185        }
186    }
187
188    Some(table)
189}
190
191#[test]
192fn test_table() {
193    let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
194    let expected = {
195        let mut m = BTreeMap::new();
196        m.insert((1, 0), 1);
197        m.insert((0, 0), 1);
198        m.insert((2, 0), 1);
199        m
200    };
201    assert_eq!(table, expected);
202}
203
204#[test]
205fn test_diff() {
206    let a: &[usize] = &[0, 1, 2, 3, 4];
207    let b: &[usize] = &[0, 1, 2, 9, 4];
208
209    let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
210    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
211    insta::assert_debug_snapshot!(d.into_inner().ops());
212}
213
214#[test]
215fn test_contiguous() {
216    let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
217    let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
218
219    let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
220    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
221    insta::assert_debug_snapshot!(d.into_inner().ops());
222}
223
224#[test]
225fn test_pat() {
226    let a: &[usize] = &[0, 1, 3, 4, 5];
227    let b: &[usize] = &[0, 1, 4, 5, 8, 9];
228
229    let mut d = crate::algorithms::Capture::new();
230    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
231    insta::assert_debug_snapshot!(d.ops());
232}
233
234#[test]
235fn test_same() {
236    let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
237    let b: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
238
239    let mut d = crate::algorithms::Capture::new();
240    diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
241    insta::assert_debug_snapshot!(d.ops());
242}
243
244#[test]
245fn test_finish_called() {
246    struct HasRunFinish(bool);
247
248    impl DiffHook for HasRunFinish {
249        type Error = ();
250        fn finish(&mut self) -> Result<(), Self::Error> {
251            self.0 = true;
252            Ok(())
253        }
254    }
255
256    let mut d = HasRunFinish(false);
257    let slice = &[1, 2];
258    let slice2 = &[1, 2, 3];
259    diff(&mut d, slice, 0..slice.len(), slice2, 0..slice2.len()).unwrap();
260    assert!(d.0);
261
262    let mut d = HasRunFinish(false);
263    let slice = &[1, 2];
264    diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap();
265    assert!(d.0);
266
267    let mut d = HasRunFinish(false);
268    let slice: &[u8] = &[];
269    diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap();
270    assert!(d.0);
271}
272
273#[test]
274fn test_bad_range_regression() {
275    use crate::algorithms::Capture;
276    use crate::DiffOp;
277    let mut d = Capture::new();
278    diff(&mut d, &[0], 0..1, &[0, 0], 0..2).unwrap();
279    assert_eq!(
280        d.into_ops(),
281        vec![
282            DiffOp::Equal {
283                old_index: 0,
284                new_index: 0,
285                len: 1
286            },
287            DiffOp::Insert {
288                old_index: 1,
289                new_index: 1,
290                new_len: 1
291            }
292        ]
293    );
294}