1use std::collections::hash_map::Entry;
2use std::collections::HashMap;
3use std::fmt::Debug;
4use std::hash::{Hash, Hasher};
5use std::ops::{Add, Index, Range};
6
7#[inline(always)]
9#[allow(clippy::neg_cmp_op_on_partial_ord)]
10pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
11 !(range.start < range.end)
12}
13
14pub struct UniqueItem<'a, Idx: ?Sized> {
19 lookup: &'a Idx,
20 index: usize,
21}
22
23impl<Idx: ?Sized> UniqueItem<'_, Idx>
24where
25 Idx: Index<usize>,
26{
27 #[inline(always)]
29 pub fn value(&self) -> &Idx::Output {
30 &self.lookup[self.index]
31 }
32
33 #[inline(always)]
35 pub fn original_index(&self) -> usize {
36 self.index
37 }
38}
39
40impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
41where
42 Idx::Output: Debug,
43{
44 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
45 f.debug_struct("UniqueItem")
46 .field("value", &self.value())
47 .field("original_index", &self.original_index())
48 .finish()
49 }
50}
51
52impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
53where
54 A: Index<usize> + 'b + ?Sized,
55 B: Index<usize> + 'b + ?Sized,
56 B::Output: PartialEq<A::Output>,
57{
58 #[inline(always)]
59 fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
60 self.value() == other.value()
61 }
62}
63
64pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<Idx>>
69where
70 Idx: Index<usize> + ?Sized,
71 Idx::Output: Hash + Eq,
72{
73 let mut by_item = HashMap::new();
74 for index in range {
75 match by_item.entry(&lookup[index]) {
76 Entry::Vacant(entry) => {
77 entry.insert(Some(index));
78 }
79 Entry::Occupied(mut entry) => {
80 let entry = entry.get_mut();
81 if entry.is_some() {
82 *entry = None
83 }
84 }
85 }
86 }
87 let mut rv = by_item
88 .into_iter()
89 .filter_map(|(_, x)| x)
90 .map(|index| UniqueItem { lookup, index })
91 .collect::<Vec<_>>();
92 rv.sort_by_key(|a| a.original_index());
93 rv
94}
95
96pub fn common_prefix_len<Old, New>(
98 old: &Old,
99 old_range: Range<usize>,
100 new: &New,
101 new_range: Range<usize>,
102) -> usize
103where
104 Old: Index<usize> + ?Sized,
105 New: Index<usize> + ?Sized,
106 New::Output: PartialEq<Old::Output>,
107{
108 if is_empty_range(&old_range) || is_empty_range(&new_range) {
109 return 0;
110 }
111 new_range
112 .zip(old_range)
113 .take_while(
114 #[inline(always)]
115 |x| new[x.0] == old[x.1],
116 )
117 .count()
118}
119
120pub fn common_suffix_len<Old, New>(
122 old: &Old,
123 old_range: Range<usize>,
124 new: &New,
125 new_range: Range<usize>,
126) -> usize
127where
128 Old: Index<usize> + ?Sized,
129 New: Index<usize> + ?Sized,
130 New::Output: PartialEq<Old::Output>,
131{
132 if is_empty_range(&old_range) || is_empty_range(&new_range) {
133 return 0;
134 }
135 new_range
136 .rev()
137 .zip(old_range.rev())
138 .take_while(
139 #[inline(always)]
140 |x| new[x.0] == old[x.1],
141 )
142 .count()
143}
144
145struct OffsetLookup<Int> {
146 offset: usize,
147 vec: Vec<Int>,
148}
149
150impl<Int> Index<usize> for OffsetLookup<Int> {
151 type Output = Int;
152
153 #[inline(always)]
154 fn index(&self, index: usize) -> &Self::Output {
155 &self.vec[index - self.offset]
156 }
157}
158
159pub struct IdentifyDistinct<Int> {
186 old: OffsetLookup<Int>,
187 new: OffsetLookup<Int>,
188}
189
190impl<Int> IdentifyDistinct<Int>
191where
192 Int: Add<Output = Int> + From<u8> + Default + Copy,
193{
194 pub fn new<Old, New>(
196 old: &Old,
197 old_range: Range<usize>,
198 new: &New,
199 new_range: Range<usize>,
200 ) -> Self
201 where
202 Old: Index<usize> + ?Sized,
203 Old::Output: Eq + Hash,
204 New: Index<usize> + ?Sized,
205 New::Output: Eq + Hash + PartialEq<Old::Output>,
206 {
207 enum Key<'old, 'new, Old: ?Sized, New: ?Sized> {
208 Old(&'old Old),
209 New(&'new New),
210 }
211
212 impl<Old, New> Hash for Key<'_, '_, Old, New>
213 where
214 Old: Hash + ?Sized,
215 New: Hash + ?Sized,
216 {
217 fn hash<H: Hasher>(&self, state: &mut H) {
218 match *self {
219 Key::Old(val) => val.hash(state),
220 Key::New(val) => val.hash(state),
221 }
222 }
223 }
224
225 impl<Old, New> PartialEq for Key<'_, '_, Old, New>
226 where
227 Old: Eq + ?Sized,
228 New: Eq + PartialEq<Old> + ?Sized,
229 {
230 #[inline(always)]
231 fn eq(&self, other: &Self) -> bool {
232 match (self, other) {
233 (Key::Old(a), Key::Old(b)) => a == b,
234 (Key::New(a), Key::New(b)) => a == b,
235 (Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a,
236 }
237 }
238 }
239
240 impl<Old, New> Eq for Key<'_, '_, Old, New>
241 where
242 Old: Eq + ?Sized,
243 New: Eq + PartialEq<Old> + ?Sized,
244 {
245 }
246
247 let mut map = HashMap::new();
248 let mut old_seq = Vec::new();
249 let mut new_seq = Vec::new();
250 let mut next_id = Int::default();
251 let step = Int::from(1);
252 let old_start = old_range.start;
253 let new_start = new_range.start;
254
255 for idx in old_range {
256 let item = Key::Old(&old[idx]);
257 let id = match map.entry(item) {
258 Entry::Occupied(o) => *o.get(),
259 Entry::Vacant(v) => {
260 let id = next_id;
261 next_id = next_id + step;
262 *v.insert(id)
263 }
264 };
265 old_seq.push(id);
266 }
267
268 for idx in new_range {
269 let item = Key::New(&new[idx]);
270 let id = match map.entry(item) {
271 Entry::Occupied(o) => *o.get(),
272 Entry::Vacant(v) => {
273 let id = next_id;
274 next_id = next_id + step;
275 *v.insert(id)
276 }
277 };
278 new_seq.push(id);
279 }
280
281 IdentifyDistinct {
282 old: OffsetLookup {
283 offset: old_start,
284 vec: old_seq,
285 },
286 new: OffsetLookup {
287 offset: new_start,
288 vec: new_seq,
289 },
290 }
291 }
292
293 pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
295 &self.old
296 }
297
298 pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
300 &self.new
301 }
302
303 pub fn old_range(&self) -> Range<usize> {
305 self.old.offset..self.old.offset + self.old.vec.len()
306 }
307
308 pub fn new_range(&self) -> Range<usize> {
310 self.new.offset..self.new.offset + self.new.vec.len()
311 }
312}
313
314#[test]
315fn test_unique() {
316 let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
317 .into_iter()
318 .map(|x| (*x.value(), x.original_index()))
319 .collect::<Vec<_>>();
320 assert_eq!(u, vec![('a', 0), ('c', 2)]);
321}
322
323#[test]
324fn test_int_hasher() {
325 let ih = IdentifyDistinct::<u8>::new(
326 &["", "foo", "bar", "baz"][..],
327 1..4,
328 &["", "foo", "blah", "baz"][..],
329 1..4,
330 );
331 assert_eq!(ih.old_lookup()[1], 0);
332 assert_eq!(ih.old_lookup()[2], 1);
333 assert_eq!(ih.old_lookup()[3], 2);
334 assert_eq!(ih.new_lookup()[1], 0);
335 assert_eq!(ih.new_lookup()[2], 3);
336 assert_eq!(ih.new_lookup()[3], 2);
337 assert_eq!(ih.old_range(), 1..4);
338 assert_eq!(ih.new_range(), 1..4);
339}
340
341#[test]
342fn test_common_prefix_len() {
343 assert_eq!(
344 common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
345 0
346 );
347 assert_eq!(
348 common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
349 7
350 );
351 assert_eq!(
352 common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
353 0
354 );
355 assert_eq!(
356 common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
357 4
358 );
359}
360
361#[test]
362fn test_common_suffix_len() {
363 assert_eq!(
364 common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
365 0
366 );
367 assert_eq!(
368 common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
369 4
370 );
371 assert_eq!(
372 common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
373 0
374 );
375 assert_eq!(
376 common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
377 2
378 );
379}