mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-16 14:16:14 +07:00
689de1d6ca
This is a fairly minimal fixup to the horribly bad behavior of hash_64() with certain input patterns. In particular, because the multiplicative value used for the 64-bit hash was intentionally bit-sparse (so that the multiply could be done with shifts and adds on architectures without hardware multipliers), some bits did not get spread out very much. In particular, certain fairly common bit ranges in the input (roughly bits 12-20: commonly with the most information in them when you hash things like byte offsets in files or memory that have block factors that mean that the low bits are often zero) would not necessarily show up much in the result. There's a bigger patch-series brewing to fix up things more completely, but this is the fairly minimal fix for the 64-bit hashing problem. It simply picks a much better constant multiplier, spreading the bits out a lot better. NOTE! For 32-bit architectures, the bad old hash_64() remains the same for now, since 64-bit multiplies are expensive. The bigger hashing cleanup will replace the 32-bit case with something better. The new constants were picked by George Spelvin who wrote that bigger cleanup series. I just picked out the constants and part of the comment from that series. Cc: stable@vger.kernel.org Cc: George Spelvin <linux@horizon.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
103 lines
2.7 KiB
C
103 lines
2.7 KiB
C
#ifndef _LINUX_HASH_H
|
|
#define _LINUX_HASH_H
|
|
/* Fast hashing routine for ints, longs and pointers.
|
|
(C) 2002 Nadia Yvette Chambers, IBM */
|
|
|
|
/*
|
|
* Knuth recommends primes in approximately golden ratio to the maximum
|
|
* integer representable by a machine word for multiplicative hashing.
|
|
* Chuck Lever verified the effectiveness of this technique:
|
|
* http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
|
|
*
|
|
* These primes are chosen to be bit-sparse, that is operations on
|
|
* them can use shifts and additions instead of multiplications for
|
|
* machines where multiplications are slow.
|
|
*/
|
|
|
|
#include <asm/types.h>
|
|
#include <linux/compiler.h>
|
|
|
|
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
|
|
#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
|
|
/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
|
|
#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL
|
|
|
|
#if BITS_PER_LONG == 32
|
|
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
|
|
#define hash_long(val, bits) hash_32(val, bits)
|
|
#elif BITS_PER_LONG == 64
|
|
#define hash_long(val, bits) hash_64(val, bits)
|
|
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
|
|
#else
|
|
#error Wordsize not 32 or 64
|
|
#endif
|
|
|
|
/*
|
|
* The above primes are actively bad for hashing, since they are
|
|
* too sparse. The 32-bit one is mostly ok, the 64-bit one causes
|
|
* real problems. Besides, the "prime" part is pointless for the
|
|
* multiplicative hash.
|
|
*
|
|
* Although a random odd number will do, it turns out that the golden
|
|
* ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
|
|
* properties.
|
|
*
|
|
* These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2.
|
|
* (See Knuth vol 3, section 6.4, exercise 9.)
|
|
*/
|
|
#define GOLDEN_RATIO_32 0x61C88647
|
|
#define GOLDEN_RATIO_64 0x61C8864680B583EBull
|
|
|
|
static __always_inline u64 hash_64(u64 val, unsigned int bits)
|
|
{
|
|
u64 hash = val;
|
|
|
|
#if BITS_PER_LONG == 64
|
|
hash = hash * GOLDEN_RATIO_64;
|
|
#else
|
|
/* Sigh, gcc can't optimise this alone like it does for 32 bits. */
|
|
u64 n = hash;
|
|
n <<= 18;
|
|
hash -= n;
|
|
n <<= 33;
|
|
hash -= n;
|
|
n <<= 3;
|
|
hash += n;
|
|
n <<= 3;
|
|
hash -= n;
|
|
n <<= 4;
|
|
hash += n;
|
|
n <<= 2;
|
|
hash += n;
|
|
#endif
|
|
|
|
/* High bits are more random, so use them. */
|
|
return hash >> (64 - bits);
|
|
}
|
|
|
|
static inline u32 hash_32(u32 val, unsigned int bits)
|
|
{
|
|
/* On some cpus multiply is faster, on others gcc will do shifts */
|
|
u32 hash = val * GOLDEN_RATIO_PRIME_32;
|
|
|
|
/* High bits are more random, so use them. */
|
|
return hash >> (32 - bits);
|
|
}
|
|
|
|
static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
|
|
{
|
|
return hash_long((unsigned long)ptr, bits);
|
|
}
|
|
|
|
static inline u32 hash32_ptr(const void *ptr)
|
|
{
|
|
unsigned long val = (unsigned long)ptr;
|
|
|
|
#if BITS_PER_LONG == 64
|
|
val ^= (val >> 32);
|
|
#endif
|
|
return (u32)val;
|
|
}
|
|
|
|
#endif /* _LINUX_HASH_H */
|