mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-20 00:06:14 +07:00
f2db633d30
Similar to x86/sparc/powerpc implementations except: 1) we implement an extremely efficient has_zero()/find_zero() sequence with both prep_zero_mask() and create_zero_mask() no-operations. 2) Our output from prep_zero_mask() differs in that only the lowest eight bits are used to represent the zero bytes nevertheless it can be safely ORed with other similar masks from prep_zero_mask() and forms input to create_zero_mask(), the two fundamental properties prep_zero_mask() must satisfy. Tests on EV67 and EV68 CPUs revealed that the generic code is essentially as fast (to within 0.5% of CPU cycles) of the old Alpha specific code for large quadword-aligned strings, despite the 30% extra CPU instructions executed. In contrast, the generic code for unaligned strings is substantially slower (by more than a factor of 3) than the old Alpha specific code. Signed-off-by: Michael Cree <mcree@orcon.net.nz> Acked-by: Matt Turner <mattst88@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
56 lines
1.2 KiB
C
56 lines
1.2 KiB
C
#ifndef _ASM_WORD_AT_A_TIME_H
|
|
#define _ASM_WORD_AT_A_TIME_H
|
|
|
|
#include <asm/compiler.h>
|
|
|
|
/*
|
|
* word-at-a-time interface for Alpha.
|
|
*/
|
|
|
|
/*
|
|
* We do not use the word_at_a_time struct on Alpha, but it needs to be
|
|
* implemented to humour the generic code.
|
|
*/
|
|
struct word_at_a_time {
|
|
const unsigned long unused;
|
|
};
|
|
|
|
#define WORD_AT_A_TIME_CONSTANTS { 0 }
|
|
|
|
/* Return nonzero if val has a zero */
|
|
static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
|
|
{
|
|
unsigned long zero_locations = __kernel_cmpbge(0, val);
|
|
*bits = zero_locations;
|
|
return zero_locations;
|
|
}
|
|
|
|
static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
|
|
{
|
|
return bits;
|
|
}
|
|
|
|
#define create_zero_mask(bits) (bits)
|
|
|
|
static inline unsigned long find_zero(unsigned long bits)
|
|
{
|
|
#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
|
|
/* Simple if have CIX instructions */
|
|
return __kernel_cttz(bits);
|
|
#else
|
|
unsigned long t1, t2, t3;
|
|
/* Retain lowest set bit only */
|
|
bits &= -bits;
|
|
/* Binary search for lowest set bit */
|
|
t1 = bits & 0xf0;
|
|
t2 = bits & 0xcc;
|
|
t3 = bits & 0xaa;
|
|
if (t1) t1 = 4;
|
|
if (t2) t2 = 2;
|
|
if (t3) t3 = 1;
|
|
return t1 + t2 + t3;
|
|
#endif
|
|
}
|
|
|
|
#endif /* _ASM_WORD_AT_A_TIME_H */
|