similar/text/
inline.rs

1use std::borrow::Cow;
2use std::fmt;
3
4use crate::deadline_support::Instant;
5use crate::text::{DiffableStr, TextDiff};
6use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
7use crate::{capture_diff_deadline, get_diff_ratio};
8
9use std::ops::Index;
10
11use super::utils::upper_seq_ratio;
12
13struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> {
14    strings: &'bufs [&'s T],
15    seqs: Vec<(&'s T, usize, usize)>,
16}
17
18impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> {
19    fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> {
20        let mut seqs = Vec::new();
21        for (string_idx, string) in strings.iter().enumerate() {
22            let mut offset = 0;
23            let iter = {
24                #[cfg(feature = "unicode")]
25                {
26                    string.tokenize_unicode_words()
27                }
28                #[cfg(not(feature = "unicode"))]
29                {
30                    string.tokenize_words()
31                }
32            };
33            for word in iter {
34                seqs.push((word, string_idx, offset));
35                offset += word.len();
36            }
37        }
38        MultiLookup { strings, seqs }
39    }
40
41    pub fn len(&self) -> usize {
42        self.seqs.len()
43    }
44
45    fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> {
46        let mut last = None;
47        let mut rv = Vec::new();
48
49        for offset in 0..len {
50            let (s, str_idx, char_idx) = self.seqs[idx + offset];
51            last = match last {
52                None => Some((str_idx, char_idx, s.len())),
53                Some((last_str_idx, start_char_idx, last_len)) => {
54                    if last_str_idx == str_idx {
55                        Some((str_idx, start_char_idx, last_len + s.len()))
56                    } else {
57                        rv.push((
58                            last_str_idx,
59                            self.strings[last_str_idx]
60                                .slice(start_char_idx..start_char_idx + last_len),
61                        ));
62                        Some((str_idx, char_idx, s.len()))
63                    }
64                }
65            };
66        }
67
68        if let Some((str_idx, start_char_idx, len)) = last {
69            rv.push((
70                str_idx,
71                self.strings[str_idx].slice(start_char_idx..start_char_idx + len),
72            ));
73        }
74
75        rv
76    }
77}
78
79impl<T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'_, '_, T> {
80    type Output = T;
81
82    fn index(&self, index: usize) -> &Self::Output {
83        self.seqs[index].0
84    }
85}
86
87fn push_values<'s, T: DiffableStr + ?Sized>(
88    v: &mut Vec<Vec<(bool, &'s T)>>,
89    idx: usize,
90    emphasized: bool,
91    s: &'s T,
92) {
93    v.resize_with(v.len().max(idx + 1), Vec::new);
94    // newlines cause all kinds of wacky stuff if they end up highlighted.
95    // because of this we want to unemphasize all newlines we encounter.
96    if emphasized {
97        for seg in s.tokenize_lines_and_newlines() {
98            v[idx].push((!seg.ends_with_newline(), seg));
99        }
100    } else {
101        v[idx].push((false, s));
102    }
103}
104
105/// Represents the expanded textual change with inline highlights.
106///
107/// This is like [`Change`] but with inline highlight info.
108#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
109#[cfg_attr(feature = "serde", derive(serde::Serialize))]
110pub struct InlineChange<'s, T: DiffableStr + ?Sized> {
111    tag: ChangeTag,
112    old_index: Option<usize>,
113    new_index: Option<usize>,
114    values: Vec<(bool, &'s T)>,
115}
116
117impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
118    /// Returns the change tag.
119    pub fn tag(&self) -> ChangeTag {
120        self.tag
121    }
122
123    /// Returns the old index if available.
124    pub fn old_index(&self) -> Option<usize> {
125        self.old_index
126    }
127
128    /// Returns the new index if available.
129    pub fn new_index(&self) -> Option<usize> {
130        self.new_index
131    }
132
133    /// Returns the changed values.
134    ///
135    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
136    /// is true if it should be highlighted as an inline diff.
137    ///
138    /// Depending on the type of the underlying [`DiffableStr`] this value is
139    /// more or less useful.  If you always want to have a utf-8 string it's
140    /// better to use the [`InlineChange::iter_strings_lossy`] method.
141    pub fn values(&self) -> &[(bool, &'s T)] {
142        &self.values
143    }
144
145    /// Iterates over all (potentially lossy) utf-8 decoded values.
146    ///
147    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
148    /// is true if it should be highlighted as an inline diff.
149    ///
150    /// By default, words are split by whitespace, which results in coarser diff.
151    /// For example: `"f(x) y"` is tokenized as `["f(x)", "y"]`.
152    ///
153    /// If you want it to be tokenized instead as `["f(", "x", ")"]`,
154    /// you should enable the `"unicode"` flag.
155    pub fn iter_strings_lossy(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
156        self.values()
157            .iter()
158            .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy()))
159    }
160
161    /// Returns `true` if this change does not end in a newline and must be
162    /// followed up by one if line based diffs are used.
163    pub fn missing_newline(&self) -> bool {
164        !self.values.last().map_or(true, |x| x.1.ends_with_newline())
165    }
166}
167
168impl<'s, T: DiffableStr + ?Sized> From<Change<&'s T>> for InlineChange<'s, T> {
169    fn from(change: Change<&'s T>) -> InlineChange<'s, T> {
170        InlineChange {
171            tag: change.tag(),
172            old_index: change.old_index(),
173            new_index: change.new_index(),
174            values: vec![(false, change.value())],
175        }
176    }
177}
178
179impl<T: DiffableStr + ?Sized> fmt::Display for InlineChange<'_, T> {
180    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
181        for (emphasized, value) in self.iter_strings_lossy() {
182            let marker = match (emphasized, self.tag) {
183                (false, _) | (true, ChangeTag::Equal) => "",
184                (true, ChangeTag::Delete) => "-",
185                (true, ChangeTag::Insert) => "+",
186            };
187            write!(f, "{}{}{}", marker, value, marker)?;
188        }
189        if self.missing_newline() {
190            writeln!(f)?;
191        }
192        Ok(())
193    }
194}
195
196const MIN_RATIO: f32 = 0.5;
197
198pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>(
199    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
200    op: &DiffOp,
201    deadline: Option<Instant>,
202) -> impl Iterator<Item = InlineChange<'x, T>> + 'diff
203where
204    T: DiffableStr + ?Sized,
205    'x: 'diff,
206    'old: 'x,
207    'new: 'x,
208{
209    let (tag, old_range, new_range) = op.as_tag_tuple();
210
211    if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag {
212        return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
213    }
214
215    let mut old_index = old_range.start;
216    let mut new_index = new_range.start;
217    let old_slices = &diff.old_slices()[old_range];
218    let new_slices = &diff.new_slices()[new_range];
219
220    if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO {
221        return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
222    }
223
224    let old_lookup = MultiLookup::new(old_slices);
225    let new_lookup = MultiLookup::new(new_slices);
226
227    let ops = capture_diff_deadline(
228        Algorithm::Patience,
229        &old_lookup,
230        0..old_lookup.len(),
231        &new_lookup,
232        0..new_lookup.len(),
233        deadline,
234    );
235
236    if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO {
237        return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
238    }
239
240    let mut old_values = Vec::<Vec<_>>::new();
241    let mut new_values = Vec::<Vec<_>>::new();
242
243    for op in ops {
244        match op {
245            DiffOp::Equal {
246                old_index,
247                len,
248                new_index,
249            } => {
250                for (idx, slice) in old_lookup.get_original_slices(old_index, len) {
251                    push_values(&mut old_values, idx, false, slice);
252                }
253                for (idx, slice) in new_lookup.get_original_slices(new_index, len) {
254                    push_values(&mut new_values, idx, false, slice);
255                }
256            }
257            DiffOp::Delete {
258                old_index, old_len, ..
259            } => {
260                for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) {
261                    push_values(&mut old_values, idx, true, slice);
262                }
263            }
264            DiffOp::Insert {
265                new_index, new_len, ..
266            } => {
267                for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) {
268                    push_values(&mut new_values, idx, true, slice);
269                }
270            }
271            DiffOp::Replace {
272                old_index,
273                old_len,
274                new_index,
275                new_len,
276            } => {
277                for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) {
278                    push_values(&mut old_values, idx, true, slice);
279                }
280                for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) {
281                    push_values(&mut new_values, idx, true, slice);
282                }
283            }
284        }
285    }
286
287    let mut rv = Vec::new();
288
289    for values in old_values {
290        rv.push(InlineChange {
291            tag: ChangeTag::Delete,
292            old_index: Some(old_index),
293            new_index: None,
294            values,
295        });
296        old_index += 1;
297    }
298
299    for values in new_values {
300        rv.push(InlineChange {
301            tag: ChangeTag::Insert,
302            old_index: None,
303            new_index: Some(new_index),
304            values,
305        });
306        new_index += 1;
307    }
308
309    Box::new(rv.into_iter()) as Box<dyn Iterator<Item = _>>
310}
311
312#[test]
313fn test_line_ops_inline() {
314    let diff = TextDiff::from_lines(
315        "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
316        "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
317    );
318    assert!(diff.newline_terminated());
319    let changes = diff
320        .ops()
321        .iter()
322        .flat_map(|op| diff.iter_inline_changes(op))
323        .collect::<Vec<_>>();
324    insta::assert_debug_snapshot!(&changes);
325}
326
327#[test]
328#[cfg(feature = "serde")]
329fn test_serde() {
330    let diff = TextDiff::from_lines(
331        "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
332        "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
333    );
334    assert!(diff.newline_terminated());
335    let changes = diff
336        .ops()
337        .iter()
338        .flat_map(|op| diff.iter_inline_changes(op))
339        .collect::<Vec<_>>();
340    let json = serde_json::to_string_pretty(&changes).unwrap();
341    insta::assert_snapshot!(&json);
342}