similar/
utils.rs

1//! Utilities for common diff related operations.
2//!
3//! This module provides specialized utilities and simplified diff operations
4//! for common operations.  It's useful when you want to work with text diffs
5//! and you're interested in getting vectors of these changes directly.
6//!
7//! # Slice Remapping
8//!
9//! When working with [`TextDiff`] it's common that one takes advantage of the
10//! built-in tokenization of the differ.  This for instance lets you do
11//! grapheme level diffs.  This is implemented by the differ generating rather
12//! small slices of strings and running a diff algorithm over them.
13//!
14//! The downside of this is that all the [`DiffOp`] objects produced by the
15//! diffing algorithm encode operations on these rather small slices.  For
16//! a lot of use cases this is not what one wants which can make this very
17//! inconvenient.  This module provides a [`TextDiffRemapper`] which lets you
18//! map from the ranges that the [`TextDiff`] returns to the original input
19//! strings.  For more information see [`TextDiffRemapper`].
20//!
21//! # Simple Diff Functions
22//!
23//! This module provides a range of common test diff functions that will
24//! produce vectors of `(change_tag, value)` tuples.  They will automatically
25//! optimize towards returning the most useful slice that one would expect for
26//! the type of diff performed.
27
28use std::hash::Hash;
29use std::ops::{Index, Range};
30
31use crate::{
32    capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff,
33};
34
35struct SliceRemapper<'x, T: ?Sized> {
36    source: &'x T,
37    indexes: Vec<Range<usize>>,
38}
39
40impl<'x, T: DiffableStr + ?Sized> SliceRemapper<'x, T> {
41    fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> {
42        let indexes = slices
43            .iter()
44            .scan(0, |state, item| {
45                let start = *state;
46                let end = start + item.len();
47                *state = end;
48                Some(start..end)
49            })
50            .collect();
51        SliceRemapper { source, indexes }
52    }
53
54    fn slice(&self, range: Range<usize>) -> Option<&'x T> {
55        let start = self.indexes.get(range.start)?.start;
56        let end = self.indexes.get(range.end - 1)?.end;
57        Some(self.source.slice(start..end))
58    }
59}
60
61impl<T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'_, T> {
62    type Output = T;
63
64    fn index(&self, range: Range<usize>) -> &Self::Output {
65        self.slice(range).expect("out of bounds")
66    }
67}
68
69/// A remapper that can remap diff ops to the original slices.
70///
71/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from
72/// two strings and the internal tokenization is used, this remapper can take
73/// a range in the tokenized sequences and remap it to the original string.
74/// This is particularly useful when you want to do things like character or
75/// grapheme level diffs but you want to not have to iterate over small sequences
76/// but large consequitive ones from the source.
77///
78/// ```rust
79/// use similar::{ChangeTag, TextDiff};
80/// use similar::utils::TextDiffRemapper;
81///
82/// let old = "yo! foo bar baz";
83/// let new = "yo! foo bor baz";
84/// let diff = TextDiff::from_words(old, new);
85/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
86/// let changes: Vec<_> = diff.ops()
87///     .iter()
88///     .flat_map(move |x| remapper.iter_slices(x))
89///     .collect();
90///
91/// assert_eq!(changes, vec![
92///     (ChangeTag::Equal, "yo! foo "),
93///     (ChangeTag::Delete, "bar"),
94///     (ChangeTag::Insert, "bor"),
95///     (ChangeTag::Equal, " baz")
96/// ]);
97pub struct TextDiffRemapper<'x, T: ?Sized> {
98    old: SliceRemapper<'x, T>,
99    new: SliceRemapper<'x, T>,
100}
101
102impl<'x, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> {
103    /// Creates a new remapper from strings and slices.
104    pub fn new(
105        old_slices: &[&'x T],
106        new_slices: &[&'x T],
107        old: &'x T,
108        new: &'x T,
109    ) -> TextDiffRemapper<'x, T> {
110        TextDiffRemapper {
111            old: SliceRemapper::new(old, old_slices),
112            new: SliceRemapper::new(new, new_slices),
113        }
114    }
115
116    /// Creates a new remapper from a text diff and the original strings.
117    pub fn from_text_diff<'old, 'new, 'bufs>(
118        diff: &TextDiff<'old, 'new, 'bufs, T>,
119        old: &'x T,
120        new: &'x T,
121    ) -> TextDiffRemapper<'x, T>
122    where
123        'old: 'x,
124        'new: 'x,
125    {
126        TextDiffRemapper {
127            old: SliceRemapper::new(old, diff.old_slices()),
128            new: SliceRemapper::new(new, diff.new_slices()),
129        }
130    }
131
132    /// Slices into the old string.
133    pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> {
134        self.old.slice(range)
135    }
136
137    /// Slices into the new string.
138    pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> {
139        self.new.slice(range)
140    }
141
142    /// Given a diffop yields the changes it encodes against the original strings.
143    ///
144    /// This is the same as the [`DiffOp::iter_slices`] method.
145    ///
146    /// ## Panics
147    ///
148    /// This method can panic if the input strings passed to the constructor
149    /// are incompatible with the input strings passed to the diffing algorithm.
150    pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> {
151        // note: this is equivalent to the code in `DiffOp::iter_slices`.  It is
152        // a copy/paste because the slicing currently cannot be well abstracted
153        // because of lifetime issues caused by the `Index` trait.
154        match *op {
155            DiffOp::Equal { old_index, len, .. } => {
156                Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len)))
157                    .into_iter()
158                    .chain(None)
159            }
160            DiffOp::Insert {
161                new_index, new_len, ..
162            } => Some((
163                ChangeTag::Insert,
164                self.new.slice(new_index..new_index + new_len),
165            ))
166            .into_iter()
167            .chain(None),
168            DiffOp::Delete {
169                old_index, old_len, ..
170            } => Some((
171                ChangeTag::Delete,
172                self.old.slice(old_index..old_index + old_len),
173            ))
174            .into_iter()
175            .chain(None),
176            DiffOp::Replace {
177                old_index,
178                old_len,
179                new_index,
180                new_len,
181            } => Some((
182                ChangeTag::Delete,
183                self.old.slice(old_index..old_index + old_len),
184            ))
185            .into_iter()
186            .chain(Some((
187                ChangeTag::Insert,
188                self.new.slice(new_index..new_index + new_len),
189            ))),
190        }
191        .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds")))
192    }
193}
194
195/// Shortcut for diffing two slices.
196///
197/// This function produces the diff of two slices and returns a vector
198/// with the changes.
199///
200/// ```rust
201/// use similar::{Algorithm, ChangeTag};
202/// use similar::utils::diff_slices;
203///
204/// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>();
205/// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>();
206/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![
207///     (ChangeTag::Equal, &["foo", "bar"][..]),
208///     (ChangeTag::Delete, &["baz"][..]),
209///     (ChangeTag::Insert, &["BAZ"][..]),
210/// ]);
211/// ```
212pub fn diff_slices<'x, T: PartialEq + Hash + Ord>(
213    alg: Algorithm,
214    old: &'x [T],
215    new: &'x [T],
216) -> Vec<(ChangeTag, &'x [T])> {
217    capture_diff_slices(alg, old, new)
218        .iter()
219        .flat_map(|op| op.iter_slices(old, new))
220        .collect()
221}
222
223/// Shortcut for making a character level diff.
224///
225/// This function produces the diff of two strings and returns a vector
226/// with the changes.  It returns connected slices into the original string
227/// rather than character level slices.
228///
229/// ```rust
230/// use similar::{Algorithm, ChangeTag};
231/// use similar::utils::diff_chars;
232///
233/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![
234///     (ChangeTag::Equal, "foo"),
235///     (ChangeTag::Delete, "bar"),
236///     (ChangeTag::Insert, "BAR"),
237///     (ChangeTag::Equal, "baz"),
238/// ]);
239/// ```
240pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>(
241    alg: Algorithm,
242    old: &'x T,
243    new: &'x T,
244) -> Vec<(ChangeTag, &'x T::Output)> {
245    let old = old.as_diffable_str();
246    let new = new.as_diffable_str();
247    let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new);
248    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
249    diff.ops()
250        .iter()
251        .flat_map(move |x| remapper.iter_slices(x))
252        .collect()
253}
254
255/// Shortcut for making a word level diff.
256///
257/// This function produces the diff of two strings and returns a vector
258/// with the changes.  It returns connected slices into the original string
259/// rather than word level slices.
260///
261/// ```rust
262/// use similar::{Algorithm, ChangeTag};
263/// use similar::utils::diff_words;
264///
265/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![
266///     (ChangeTag::Equal, "foo "),
267///     (ChangeTag::Delete, "bar"),
268///     (ChangeTag::Insert, "bor"),
269///     (ChangeTag::Equal, " baz"),
270/// ]);
271/// ```
272pub fn diff_words<'x, T: DiffableStrRef + ?Sized>(
273    alg: Algorithm,
274    old: &'x T,
275    new: &'x T,
276) -> Vec<(ChangeTag, &'x T::Output)> {
277    let old = old.as_diffable_str();
278    let new = new.as_diffable_str();
279    let diff = TextDiff::configure().algorithm(alg).diff_words(old, new);
280    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
281    diff.ops()
282        .iter()
283        .flat_map(move |x| remapper.iter_slices(x))
284        .collect()
285}
286
287/// Shortcut for making a unicode word level diff.
288///
289/// This function produces the diff of two strings and returns a vector
290/// with the changes.  It returns connected slices into the original string
291/// rather than word level slices.
292///
293/// ```rust
294/// use similar::{Algorithm, ChangeTag};
295/// use similar::utils::diff_unicode_words;
296///
297/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
298/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?";
299/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![
300///     (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "),
301///     (ChangeTag::Delete, "32.3"),
302///     (ChangeTag::Insert, "9.84"),
303///     (ChangeTag::Equal, " "),
304///     (ChangeTag::Delete, "feet"),
305///     (ChangeTag::Insert, "meters"),
306///     (ChangeTag::Equal, ", right?")
307/// ]);
308/// ```
309///
310/// This requires the `unicode` feature.
311#[cfg(feature = "unicode")]
312pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>(
313    alg: Algorithm,
314    old: &'x T,
315    new: &'x T,
316) -> Vec<(ChangeTag, &'x T::Output)> {
317    let old = old.as_diffable_str();
318    let new = new.as_diffable_str();
319    let diff = TextDiff::configure()
320        .algorithm(alg)
321        .diff_unicode_words(old, new);
322    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
323    diff.ops()
324        .iter()
325        .flat_map(move |x| remapper.iter_slices(x))
326        .collect()
327}
328
329/// Shortcut for making a grapheme level diff.
330///
331/// This function produces the diff of two strings and returns a vector
332/// with the changes.  It returns connected slices into the original string
333/// rather than grapheme level slices.
334///
335/// ```rust
336/// use similar::{Algorithm, ChangeTag};
337/// use similar::utils::diff_graphemes;
338///
339/// let old = "The flag of Austria is 🇦🇹";
340/// let new = "The flag of Albania is 🇦🇱";
341/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![
342///     (ChangeTag::Equal, "The flag of A"),
343///     (ChangeTag::Delete, "ustr"),
344///     (ChangeTag::Insert, "lban"),
345///     (ChangeTag::Equal, "ia is "),
346///     (ChangeTag::Delete, "🇦🇹"),
347///     (ChangeTag::Insert, "🇦🇱"),
348/// ]);
349/// ```
350///
351/// This requires the `unicode` feature.
352#[cfg(feature = "unicode")]
353pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>(
354    alg: Algorithm,
355    old: &'x T,
356    new: &'x T,
357) -> Vec<(ChangeTag, &'x T::Output)> {
358    let old = old.as_diffable_str();
359    let new = new.as_diffable_str();
360    let diff = TextDiff::configure()
361        .algorithm(alg)
362        .diff_graphemes(old, new);
363    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
364    diff.ops()
365        .iter()
366        .flat_map(move |x| remapper.iter_slices(x))
367        .collect()
368}
369
370/// Shortcut for making a line diff.
371///
372/// This function produces the diff of two slices and returns a vector
373/// with the changes.  Unlike [`diff_chars`] or [`diff_slices`] it returns a
374/// change tag for each line.
375///
376/// ```rust
377/// use similar::{Algorithm, ChangeTag};
378/// use similar::utils::diff_lines;
379///
380/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![
381///     (ChangeTag::Equal, "foo\n"),
382///     (ChangeTag::Equal, "bar\n"),
383///     (ChangeTag::Equal, "baz\n"),
384///     (ChangeTag::Delete, "blah"),
385///     (ChangeTag::Insert, "blurgh"),
386/// ]);
387/// ```
388pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>(
389    alg: Algorithm,
390    old: &'x T,
391    new: &'x T,
392) -> Vec<(ChangeTag, &'x T::Output)> {
393    TextDiff::configure()
394        .algorithm(alg)
395        .diff_lines(old, new)
396        .iter_all_changes()
397        .map(|change| (change.tag(), change.value()))
398        .collect()
399}
400
401#[test]
402fn test_remapper() {
403    let a = "foo bar baz";
404    let words = a.tokenize_words();
405    dbg!(&words);
406    let remap = SliceRemapper::new(a, &words);
407    assert_eq!(remap.slice(0..3), Some("foo bar"));
408    assert_eq!(remap.slice(1..3), Some(" bar"));
409    assert_eq!(remap.slice(0..1), Some("foo"));
410    assert_eq!(remap.slice(0..5), Some("foo bar baz"));
411    assert_eq!(remap.slice(0..6), None);
412}