1use core::{
4 fmt,
5 hash::{self, BuildHasher},
6 mem,
7};
8
9use crate::IntoU64;
10
11const PRIME64_1: u64 = 0x9E3779B185EBCA87;
13const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
14const PRIME64_3: u64 = 0x165667B19E3779F9;
15const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
16const PRIME64_5: u64 = 0x27D4EB2F165667C5;
17
18type Lane = u64;
19type Lanes = [Lane; 4];
20type Bytes = [u8; 32];
21
22const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
23
24#[derive(Clone, PartialEq)]
25struct BufferData(Lanes);
26
27impl BufferData {
28 const fn new() -> Self {
29 Self([0; 4])
30 }
31
32 const fn bytes(&self) -> &Bytes {
33 const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
34 unsafe { &*self.0.as_ptr().cast() }
37 }
38
39 fn bytes_mut(&mut self) -> &mut Bytes {
40 unsafe { &mut *self.0.as_mut_ptr().cast() }
42 }
43}
44
45impl fmt::Debug for BufferData {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 f.debug_list().entries(self.0.iter()).finish()
48 }
49}
50
51#[derive(Debug, Clone, PartialEq)]
52struct Buffer {
53 offset: usize,
54 data: BufferData,
55}
56
57impl Buffer {
58 const fn new() -> Self {
59 Self {
60 offset: 0,
61 data: BufferData::new(),
62 }
63 }
64
65 #[inline]
67 fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
68 if self.offset == 0 {
78 return (None, data);
79 };
80
81 let bytes = self.data.bytes_mut();
82 debug_assert!(self.offset <= bytes.len());
83
84 let empty = &mut bytes[self.offset..];
85 let n_to_copy = usize::min(empty.len(), data.len());
86
87 let dst = &mut empty[..n_to_copy];
88
89 let (src, rest) = data.split_at(n_to_copy);
90
91 dst.copy_from_slice(src);
92 self.offset += n_to_copy;
93
94 debug_assert!(self.offset <= bytes.len());
95
96 if self.offset == bytes.len() {
97 self.offset = 0;
98 (Some(&self.data.0), rest)
99 } else {
100 (None, rest)
101 }
102 }
103
104 #[inline]
106 fn set(&mut self, data: &[u8]) {
107 if data.is_empty() {
108 return;
109 }
110
111 debug_assert_eq!(self.offset, 0);
112
113 let n_to_copy = data.len();
114
115 let bytes = self.data.bytes_mut();
116 debug_assert!(n_to_copy < bytes.len());
117
118 bytes[..n_to_copy].copy_from_slice(data);
119 self.offset = data.len();
120 }
121
122 #[inline]
124 fn remaining(&self) -> &[u8] {
125 &self.data.bytes()[..self.offset]
126 }
127}
128
129#[derive(Clone, PartialEq)]
130struct Accumulators(Lanes);
131
132impl Accumulators {
133 const fn new(seed: u64) -> Self {
134 Self([
135 seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
136 seed.wrapping_add(PRIME64_2),
137 seed,
138 seed.wrapping_sub(PRIME64_1),
139 ])
140 }
141
142 #[inline]
144 fn write(&mut self, lanes: Lanes) {
145 let [acc1, acc2, acc3, acc4] = &mut self.0;
146 let [lane1, lane2, lane3, lane4] = lanes;
147
148 *acc1 = round(*acc1, lane1.to_le());
149 *acc2 = round(*acc2, lane2.to_le());
150 *acc3 = round(*acc3, lane3.to_le());
151 *acc4 = round(*acc4, lane4.to_le());
152 }
153
154 #[inline]
156 fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
157 while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
158 let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
161 self.write(lanes);
162 data = rest;
163 }
164 data
165 }
166
167 #[inline]
169 const fn finish(&self) -> u64 {
170 let [acc1, acc2, acc3, acc4] = self.0;
171
172 let mut acc = {
173 let acc1 = acc1.rotate_left(1);
174 let acc2 = acc2.rotate_left(7);
175 let acc3 = acc3.rotate_left(12);
176 let acc4 = acc4.rotate_left(18);
177
178 acc1.wrapping_add(acc2)
179 .wrapping_add(acc3)
180 .wrapping_add(acc4)
181 };
182
183 acc = Self::merge_accumulator(acc, acc1);
184 acc = Self::merge_accumulator(acc, acc2);
185 acc = Self::merge_accumulator(acc, acc3);
186 acc = Self::merge_accumulator(acc, acc4);
187
188 acc
189 }
190
191 #[inline]
193 const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
194 acc ^= round(0, acc_n);
195 acc = acc.wrapping_mul(PRIME64_1);
196 acc.wrapping_add(PRIME64_4)
197 }
198}
199
200impl fmt::Debug for Accumulators {
201 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202 let [acc1, acc2, acc3, acc4] = self.0;
203 f.debug_struct("Accumulators")
204 .field("acc1", &acc1)
205 .field("acc2", &acc2)
206 .field("acc3", &acc3)
207 .field("acc4", &acc4)
208 .finish()
209 }
210}
211
212#[derive(Debug, Clone, PartialEq)]
214pub struct Hasher {
215 seed: u64,
216 accumulators: Accumulators,
217 buffer: Buffer,
218 length: u64,
219}
220
221impl Default for Hasher {
222 fn default() -> Self {
223 Self::with_seed(0)
224 }
225}
226
227impl Hasher {
228 #[must_use]
231 #[inline]
247 pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
248 let len = data.len();
249
250 let mut accumulators = Accumulators::new(seed);
255
256 let data = accumulators.write_many(data);
257
258 Self::finish_with(seed, len.into_u64(), &accumulators, data)
259 }
260
261 #[must_use]
263 pub const fn with_seed(seed: u64) -> Self {
264 Self {
266 seed,
267 accumulators: Accumulators::new(seed),
268 buffer: Buffer::new(),
269 length: 0,
270 }
271 }
272
273 pub const fn seed(&self) -> u64 {
275 self.seed
276 }
277
278 pub const fn total_len(&self) -> u64 {
280 self.length
281 }
282
283 #[must_use]
284 #[inline]
286 fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
287 let mut acc = if len < BYTES_IN_LANE.into_u64() {
289 seed.wrapping_add(PRIME64_5)
290 } else {
291 accumulators.finish()
292 };
293
294 acc += len;
296
297 while let Some((chunk, rest)) = remaining.split_first_chunk() {
299 let lane = u64::from_ne_bytes(*chunk).to_le();
300
301 acc ^= round(0, lane);
302 acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
303 acc = acc.wrapping_add(PRIME64_4);
304 remaining = rest;
305 }
306
307 while let Some((chunk, rest)) = remaining.split_first_chunk() {
308 let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
309
310 acc ^= lane.wrapping_mul(PRIME64_1);
311 acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);
312 acc = acc.wrapping_add(PRIME64_3);
313
314 remaining = rest;
315 }
316
317 for &byte in remaining {
318 let lane = byte.into_u64();
319
320 acc ^= lane.wrapping_mul(PRIME64_5);
321 acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
322 }
323
324 acc ^= acc >> 33;
326 acc = acc.wrapping_mul(PRIME64_2);
327 acc ^= acc >> 29;
328 acc = acc.wrapping_mul(PRIME64_3);
329 acc ^= acc >> 32;
330
331 acc
332 }
333}
334
335impl hash::Hasher for Hasher {
336 #[inline]
338 fn write(&mut self, data: &[u8]) {
339 let len = data.len();
340
341 let (buffered_lanes, data) = self.buffer.extend(data);
343
344 if let Some(&lanes) = buffered_lanes {
345 self.accumulators.write(lanes);
346 }
347
348 let data = self.accumulators.write_many(data);
349
350 self.buffer.set(data);
351
352 self.length += len.into_u64();
353 }
354
355 #[inline]
357 fn finish(&self) -> u64 {
358 Self::finish_with(
359 self.seed,
360 self.length,
361 &self.accumulators,
362 self.buffer.remaining(),
363 )
364 }
365}
366
367#[inline]
369const fn round(mut acc: u64, lane: u64) -> u64 {
370 acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
371 acc = acc.rotate_left(31);
372 acc.wrapping_mul(PRIME64_1)
373}
374
375#[derive(Clone)]
377pub struct State(u64);
378
379impl State {
380 pub fn with_seed(seed: u64) -> Self {
382 Self(seed)
383 }
384}
385
386impl BuildHasher for State {
387 type Hasher = Hasher;
388
389 fn build_hasher(&self) -> Self::Hasher {
390 Hasher::with_seed(self.0)
391 }
392}
393
394#[cfg(test)]
395mod test {
396 use core::{
397 array,
398 hash::{BuildHasherDefault, Hasher as _},
399 };
400 use std::collections::HashMap;
401
402 use super::*;
403
404 const _TRAITS: () = {
405 const fn is_clone<T: Clone>() {}
406 is_clone::<Hasher>();
407 is_clone::<State>();
408 };
409
410 const EMPTY_BYTES: [u8; 0] = [];
411
412 #[test]
413 fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
414 let bytes = [0x9c; 32];
415
416 let mut byte_by_byte = Hasher::with_seed(0);
417 for byte in bytes.chunks(1) {
418 byte_by_byte.write(byte);
419 }
420 let byte_by_byte = byte_by_byte.finish();
421
422 let mut one_chunk = Hasher::with_seed(0);
423 one_chunk.write(&bytes);
424 let one_chunk = one_chunk.finish();
425
426 assert_eq!(byte_by_byte, one_chunk);
427 }
428
429 #[test]
430 fn hash_of_nothing_matches_c_implementation() {
431 let mut hasher = Hasher::with_seed(0);
432 hasher.write(&EMPTY_BYTES);
433 assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
434 }
435
436 #[test]
437 fn hash_of_single_byte_matches_c_implementation() {
438 let mut hasher = Hasher::with_seed(0);
439 hasher.write(&[42]);
440 assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
441 }
442
443 #[test]
444 fn hash_of_multiple_bytes_matches_c_implementation() {
445 let mut hasher = Hasher::with_seed(0);
446 hasher.write(b"Hello, world!\0");
447 assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
448 }
449
450 #[test]
451 fn hash_of_multiple_chunks_matches_c_implementation() {
452 let bytes: [u8; 100] = array::from_fn(|i| i as u8);
453 let mut hasher = Hasher::with_seed(0);
454 hasher.write(&bytes);
455 assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
456 }
457
458 #[test]
459 fn hash_with_different_seed_matches_c_implementation() {
460 let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
461 hasher.write(&EMPTY_BYTES);
462 assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
463 }
464
465 #[test]
466 fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
467 let bytes: [u8; 100] = array::from_fn(|i| i as u8);
468 let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
469 hasher.write(&bytes);
470 assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
471 }
472
473 #[test]
474 fn hashes_with_different_offsets_are_the_same() {
475 let bytes = [0x7c; 4096];
476 let expected = Hasher::oneshot(0, &[0x7c; 64]);
477
478 let the_same = bytes
479 .windows(64)
480 .map(|w| {
481 let mut hasher = Hasher::with_seed(0);
482 hasher.write(w);
483 hasher.finish()
484 })
485 .all(|h| h == expected);
486 assert!(the_same);
487 }
488
489 #[test]
490 fn can_be_used_in_a_hashmap_with_a_default_seed() {
491 let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
492 hash.insert(42, "the answer");
493 assert_eq!(hash.get(&42), Some(&"the answer"));
494 }
495}
496
497#[cfg(feature = "random")]
498#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
499mod random_impl {
500 use super::*;
501
502 #[derive(Clone)]
505 pub struct RandomState(State);
506
507 impl Default for RandomState {
508 fn default() -> Self {
509 Self::new()
510 }
511 }
512
513 impl RandomState {
514 fn new() -> Self {
515 Self(State::with_seed(rand::random()))
516 }
517 }
518
519 impl BuildHasher for RandomState {
520 type Hasher = Hasher;
521
522 fn build_hasher(&self) -> Self::Hasher {
523 self.0.build_hasher()
524 }
525 }
526
527 #[cfg(test)]
528 mod test {
529 use std::collections::HashMap;
530
531 use super::*;
532
533 const _TRAITS: () = {
534 const fn is_clone<T: Clone>() {}
535 is_clone::<RandomState>();
536 };
537
538 #[test]
539 fn can_be_used_in_a_hashmap_with_a_random_seed() {
540 let mut hash: HashMap<_, _, RandomState> = Default::default();
541 hash.insert(42, "the answer");
542 assert_eq!(hash.get(&42), Some(&"the answer"));
543 }
544 }
545}
546
547#[cfg(feature = "random")]
548#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
549pub use random_impl::*;
550
551#[cfg(feature = "serialize")]
552#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
553mod serialize_impl {
554 use serde::{Deserialize, Serialize};
555
556 use super::*;
557
558 impl<'de> Deserialize<'de> for Hasher {
559 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
560 where
561 D: serde::Deserializer<'de>,
562 {
563 let shim = Deserialize::deserialize(deserializer)?;
564
565 let Shim {
566 total_len,
567 seed,
568 core,
569 buffer,
570 buffer_usage,
571 } = shim;
572 let Core { v1, v2, v3, v4 } = core;
573
574 let mut buffer_data = BufferData::new();
575 buffer_data.bytes_mut().copy_from_slice(&buffer);
576
577 Ok(Hasher {
578 seed,
579 accumulators: Accumulators([v1, v2, v3, v4]),
580 buffer: Buffer {
581 offset: buffer_usage,
582 data: buffer_data,
583 },
584 length: total_len,
585 })
586 }
587 }
588
589 impl Serialize for Hasher {
590 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
591 where
592 S: serde::Serializer,
593 {
594 let Hasher {
595 seed,
596 ref accumulators,
597 ref buffer,
598 length,
599 } = *self;
600 let [v1, v2, v3, v4] = accumulators.0;
601 let Buffer { offset, ref data } = *buffer;
602 let buffer = *data.bytes();
603
604 let shim = Shim {
605 total_len: length,
606 seed,
607 core: Core { v1, v2, v3, v4 },
608 buffer,
609 buffer_usage: offset,
610 };
611
612 shim.serialize(serializer)
613 }
614 }
615
616 #[derive(Serialize, Deserialize)]
617 struct Shim {
618 total_len: u64,
619 seed: u64,
620 core: Core,
621 buffer: [u8; 32],
622 buffer_usage: usize,
623 }
624
625 #[derive(Serialize, Deserialize)]
626 struct Core {
627 v1: u64,
628 v2: u64,
629 v3: u64,
630 v4: u64,
631 }
632
633 #[cfg(test)]
634 mod test {
635 use std::hash::Hasher as _;
636
637 use super::*;
638
639 type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
640
641 #[test]
642 fn test_serialization_cycle() -> Result {
643 let mut hasher = Hasher::with_seed(0);
644 hasher.write(b"Hello, world!\0");
645 let _ = hasher.finish();
646
647 let serialized = serde_json::to_string(&hasher)?;
648 let unserialized: Hasher = serde_json::from_str(&serialized)?;
649 assert_eq!(hasher, unserialized);
650 Ok(())
651 }
652
653 #[test]
654 fn test_serialization_stability() -> Result {
655 let mut hasher = Hasher::with_seed(0);
656 hasher.write(b"Hello, world!\0");
657 let _ = hasher.finish();
658
659 let expected_serialized = r#"{
660 "total_len": 14,
661 "seed": 0,
662 "core": {
663 "v1": 6983438078262162902,
664 "v2": 14029467366897019727,
665 "v3": 0,
666 "v4": 7046029288634856825
667 },
668 "buffer": [
669 72, 101, 108, 108, 111, 44, 32, 119,
670 111, 114, 108, 100, 33, 0, 0, 0,
671 0, 0, 0, 0, 0, 0, 0, 0,
672 0, 0, 0, 0, 0, 0, 0, 0
673 ],
674 "buffer_usage": 14
675 }"#;
676
677 let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
678 assert_eq!(hasher, unserialized);
679
680 let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
681 let actual_value = serde_json::to_value(&hasher)?;
682 assert_eq!(expected_value, actual_value);
683
684 Ok(())
685 }
686 }
687}