From 373451952e4e067648bb6fa929262c3e72be520d Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Thu, 24 May 2018 08:31:47 -0400 Subject: [PATCH] move code over from rustc --- .gitignore | 2 + Cargo.lock | 4 ++ Cargo.toml | 6 +++ README.md | 14 ++++++- src/lib.rs | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0c64114 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,4 @@ +[[package]] +name = "rustc-hash" +version = "0.1.0" + diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..fe105ac --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "rustc-hash" +version = "0.1.0" +authors = ["Niko Matsakis "] + +[dependencies] diff --git a/README.md b/README.md index 7de120c..ce1b789 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,14 @@ # rustc-hash -Custom hash algorithm used by rustc (plus hashmap/set aliases): fast, deterministic, not secure + +A speedy hash algorithm used within rustc. The hashmap in liballoc by +default uses SipHash which isn't quite as speedy as we want. In the +compiler we're not really worried about DOS attempts, so we use a fast +non-cryptographic hash. + +This is the same as the algorithm used by Firefox -- which is a +homespun one not based on any widely-known algorithm -- though +modified to produce 64-bit hash values instead of 32-bit hash +values. It consistently out-performs an FNV-based hash within rustc +itself -- the collision rate is similar or slightly worse than FNV, +but the speed of the hash function itself is much higher because it +works on up to 8 bytes at a time. diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5bf2543 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,109 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::collections::{HashMap, HashSet}; +use std::default::Default; +use std::hash::{Hasher, Hash, BuildHasherDefault}; +use std::ops::BitXor; + +pub type FxHashMap = HashMap>; +pub type FxHashSet = HashSet>; + +#[allow(non_snake_case)] +pub fn FxHashMap() -> FxHashMap { + HashMap::default() +} + +#[allow(non_snake_case)] +pub fn FxHashSet() -> FxHashSet { + HashSet::default() +} + +/// A speedy hash algorithm for use within rustc. The hashmap in liballoc +/// by default uses SipHash which isn't quite as speedy as we want. In the +/// compiler we're not really worried about DOS attempts, so we use a fast +/// non-cryptographic hash. +/// +/// This is the same as the algorithm used by Firefox -- which is a homespun +/// one not based on any widely-known algorithm -- though modified to produce +/// 64-bit hash values instead of 32-bit hash values. It consistently +/// out-performs an FNV-based hash within rustc itself -- the collision rate is +/// similar or slightly worse than FNV, but the speed of the hash function +/// itself is much higher because it works on up to 8 bytes at a time. +pub struct FxHasher { + hash: usize +} + +#[cfg(target_pointer_width = "32")] +const K: usize = 0x9e3779b9; +#[cfg(target_pointer_width = "64")] +const K: usize = 0x517cc1b727220a95; + +impl Default for FxHasher { + #[inline] + fn default() -> FxHasher { + FxHasher { hash: 0 } + } +} + +impl FxHasher { + #[inline] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + } +} + +impl Hasher for FxHasher { + #[inline] + fn write(&mut self, bytes: &[u8]) { + for byte in bytes { + let i = *byte; + self.add_to_hash(i as usize); + } + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.add_to_hash(i as usize); + } + + #[cfg(target_pointer_width = "32")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + self.add_to_hash((i >> 32) as usize); + } + + #[cfg(target_pointer_width = "64")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.add_to_hash(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash as u64 + } +}