mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-22 19:03:28 +07:00
36126f8f2e
This changes the interfaces in <asm/word-at-a-time.h> to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized <asm/word-at-a-time.h> file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The <asm/word-at-a-time.h> functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an *exact* mask of which byte had the first zero. This is run directly *outside* the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly *which* byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
||
---|---|---|
.. | ||
numachip | ||
uv | ||
visws | ||
xen | ||
a.out-core.h | ||
a.out.h | ||
acpi.h | ||
aes.h | ||
agp.h | ||
alternative-asm.h | ||
alternative.h | ||
amd_nb.h | ||
apb_timer.h | ||
apic_flat_64.h | ||
apic.h | ||
apicdef.h | ||
apm.h | ||
arch_hweight.h | ||
archrandom.h | ||
asm-offsets.h | ||
asm.h | ||
atomic64_32.h | ||
atomic64_64.h | ||
atomic.h | ||
auxvec.h | ||
barrier.h | ||
bios_ebda.h | ||
bitops.h | ||
bitsperlong.h | ||
boot.h | ||
bootparam.h | ||
bug.h | ||
bugs.h | ||
byteorder.h | ||
cache.h | ||
cacheflush.h | ||
calgary.h | ||
calling.h | ||
ce4100.h | ||
checksum_32.h | ||
checksum_64.h | ||
checksum.h | ||
clocksource.h | ||
cmpxchg_32.h | ||
cmpxchg_64.h | ||
cmpxchg.h | ||
compat.h | ||
cpu_device_id.h | ||
cpu.h | ||
cpufeature.h | ||
cpumask.h | ||
cputime.h | ||
current.h | ||
debugreg.h | ||
delay.h | ||
desc_defs.h | ||
desc.h | ||
device.h | ||
div64.h | ||
dma-contiguous.h | ||
dma-mapping.h | ||
dma.h | ||
dmi.h | ||
dwarf2.h | ||
e820.h | ||
edac.h | ||
efi.h | ||
elf.h | ||
emergency-restart.h | ||
entry_arch.h | ||
errno.h | ||
exec.h | ||
fb.h | ||
fcntl.h | ||
fixmap.h | ||
floppy.h | ||
fpu-internal.h | ||
frame.h | ||
ftrace.h | ||
futex.h | ||
gart.h | ||
genapic.h | ||
geode.h | ||
gpio.h | ||
hardirq.h | ||
highmem.h | ||
hpet.h | ||
hugetlb.h | ||
hw_breakpoint.h | ||
hw_irq.h | ||
hypertransport.h | ||
hyperv.h | ||
hypervisor.h | ||
i387.h | ||
i8259.h | ||
ia32_unistd.h | ||
ia32.h | ||
idle.h | ||
inat_types.h | ||
inat.h | ||
init.h | ||
insn.h | ||
inst.h | ||
intel_scu_ipc.h | ||
io_apic.h | ||
io.h | ||
ioctl.h | ||
ioctls.h | ||
iomap.h | ||
iommu_table.h | ||
iommu.h | ||
ipcbuf.h | ||
ipi.h | ||
irq_regs.h | ||
irq_remapping.h | ||
irq_vectors.h | ||
irq.h | ||
irqflags.h | ||
ist.h | ||
jump_label.h | ||
kbdleds.h | ||
Kbuild | ||
kdebug.h | ||
kexec.h | ||
kgdb.h | ||
kmap_types.h | ||
kmemcheck.h | ||
kprobes.h | ||
kvm_emulate.h | ||
kvm_host.h | ||
kvm_para.h | ||
kvm.h | ||
ldt.h | ||
lguest_hcall.h | ||
lguest.h | ||
linkage.h | ||
local64.h | ||
local.h | ||
mach_timer.h | ||
mach_traps.h | ||
math_emu.h | ||
mc146818rtc.h | ||
mce.h | ||
microcode.h | ||
mman.h | ||
mmconfig.h | ||
mmu_context.h | ||
mmu.h | ||
mmx.h | ||
mmzone_32.h | ||
mmzone_64.h | ||
mmzone.h | ||
module.h | ||
mpspec_def.h | ||
mpspec.h | ||
mrst-vrtc.h | ||
mrst.h | ||
msgbuf.h | ||
mshyperv.h | ||
msidef.h | ||
msr-index.h | ||
msr.h | ||
mtrr.h | ||
mutex_32.h | ||
mutex_64.h | ||
mutex.h | ||
mwait.h | ||
nmi.h | ||
nops.h | ||
numa_32.h | ||
numa_64.h | ||
numa.h | ||
numaq.h | ||
olpc_ofw.h | ||
olpc.h | ||
page_32_types.h | ||
page_32.h | ||
page_64_types.h | ||
page_64.h | ||
page_types.h | ||
page.h | ||
param.h | ||
paravirt_types.h | ||
paravirt.h | ||
parport.h | ||
pat.h | ||
pci_64.h | ||
pci_x86.h | ||
pci-direct.h | ||
pci-functions.h | ||
pci.h | ||
percpu.h | ||
perf_event_p4.h | ||
perf_event.h | ||
pgalloc.h | ||
pgtable_32_types.h | ||
pgtable_32.h | ||
pgtable_64_types.h | ||
pgtable_64.h | ||
pgtable_types.h | ||
pgtable-2level_types.h | ||
pgtable-2level.h | ||
pgtable-3level_types.h | ||
pgtable-3level.h | ||
pgtable.h | ||
poll.h | ||
posix_types_32.h | ||
posix_types_64.h | ||
posix_types_x32.h | ||
posix_types.h | ||
prctl.h | ||
probe_roms.h | ||
processor-cyrix.h | ||
processor-flags.h | ||
processor.h | ||
prom.h | ||
proto.h | ||
ptrace-abi.h | ||
ptrace.h | ||
pvclock-abi.h | ||
pvclock.h | ||
reboot_fixups.h | ||
reboot.h | ||
required-features.h | ||
resource.h | ||
resume-trace.h | ||
rio.h | ||
rtc.h | ||
rwlock.h | ||
rwsem.h | ||
scatterlist.h | ||
seccomp_32.h | ||
seccomp_64.h | ||
seccomp.h | ||
sections.h | ||
segment.h | ||
sembuf.h | ||
serial.h | ||
serpent.h | ||
setup_arch.h | ||
setup.h | ||
shmbuf.h | ||
shmparam.h | ||
sigcontext32.h | ||
sigcontext.h | ||
sigframe.h | ||
sighandling.h | ||
siginfo.h | ||
signal.h | ||
smp.h | ||
smpboot_hooks.h | ||
socket.h | ||
sockios.h | ||
sparsemem.h | ||
special_insns.h | ||
spinlock_types.h | ||
spinlock.h | ||
stackprotector.h | ||
stacktrace.h | ||
stat.h | ||
statfs.h | ||
string_32.h | ||
string_64.h | ||
string.h | ||
suspend_32.h | ||
suspend_64.h | ||
suspend.h | ||
svm.h | ||
swab.h | ||
swiotlb.h | ||
switch_to.h | ||
sync_bitops.h | ||
sys_ia32.h | ||
syscall.h | ||
syscalls.h | ||
tce.h | ||
termbits.h | ||
termios.h | ||
thread_info.h | ||
time.h | ||
timer.h | ||
timex.h | ||
tlb.h | ||
tlbflush.h | ||
topology.h | ||
trampoline.h | ||
traps.h | ||
tsc.h | ||
types.h | ||
uaccess_32.h | ||
uaccess_64.h | ||
uaccess.h | ||
ucontext.h | ||
unaligned.h | ||
unistd.h | ||
uprobes.h | ||
user32.h | ||
user_32.h | ||
user_64.h | ||
user.h | ||
vdso.h | ||
vga.h | ||
vgtod.h | ||
virtext.h | ||
vm86.h | ||
vmx.h | ||
vsyscall.h | ||
vvar.h | ||
word-at-a-time.h | ||
x2apic.h | ||
x86_init.h | ||
xcr.h | ||
xor_32.h | ||
xor_64.h | ||
xor_avx.h | ||
xor.h | ||
xsave.h |