linux_dsm_epyc7002/arch/powerpc/include/asm
Simon Guo d58badfb7c powerpc/64: enhance memcmp() with VMX instruction for long bytes comparision
This patch add VMX primitives to do memcmp() in case the compare size
is equal or greater than 4K bytes. KSM feature can benefit from this.

Test result with following test program(replace the "^>" with ""):
------
># cat tools/testing/selftests/powerpc/stringloops/memcmp.c
>#include <malloc.h>
>#include <stdlib.h>
>#include <string.h>
>#include <time.h>
>#include "utils.h"
>#define SIZE (1024 * 1024 * 900)
>#define ITERATIONS 40

int test_memcmp(const void *s1, const void *s2, size_t n);

static int testcase(void)
{
        char *s1;
        char *s2;
        unsigned long i;

        s1 = memalign(128, SIZE);
        if (!s1) {
                perror("memalign");
                exit(1);
        }

        s2 = memalign(128, SIZE);
        if (!s2) {
                perror("memalign");
                exit(1);
        }

        for (i = 0; i < SIZE; i++)  {
                s1[i] = i & 0xff;
                s2[i] = i & 0xff;
        }
        for (i = 0; i < ITERATIONS; i++) {
		int ret = test_memcmp(s1, s2, SIZE);

		if (ret) {
			printf("return %d at[%ld]! should have returned zero\n", ret, i);
			abort();
		}
	}

        return 0;
}

int main(void)
{
        return test_harness(testcase, "memcmp");
}
------
Without this patch (but with the first patch "powerpc/64: Align bytes
before fall back to .Lshort in powerpc64 memcmp()." in the series):
	4.726728762 seconds time elapsed                                          ( +-  3.54%)
With VMX patch:
	4.234335473 seconds time elapsed                                          ( +-  2.63%)
		There is ~+10% improvement.

Testing with unaligned and different offset version (make s1 and s2 shift
random offset within 16 bytes) can archieve higher improvement than 10%..

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-07-24 22:03:21 +10:00
..
book3s powerpc/mm/hash: Add hpte_get_old_v and use that instead of opencoding 2018-07-24 22:03:18 +10:00
nohash powerpc/mm/32: Fix pgtable_page_dtor call 2018-06-26 23:43:14 +10:00
8xx_immap.h
accounting.h
agp.h
archrandom.h
asm-compat.h
asm-offsets.h
asm-prototypes.h powerpc/64: enhance memcmp() with VMX instruction for long bytes comparision 2018-07-24 22:03:21 +10:00
async_tx.h
atomic.h
backlight.h
barrier.h powerpc/64s: Add support for ori barrier_nospec patching 2018-06-03 20:43:44 +10:00
bitops.h
bootx.h
btext.h
bug.h powerpc/pseries, ps3: panic flush kernel messages before halting system 2018-01-22 11:44:24 +11:00
bugs.h
cache.h powerpc/lib: Adjust .balign inside string functions for PPC32 2018-06-04 00:39:19 +10:00
cacheflush.h powerpc/64s/radix: Fix missing ptesync in flush_cache_vmap 2018-06-06 18:50:53 +10:00
cell-pmu.h
cell-regs.h
checksum.h powerpc: Implement csum_ipv6_magic in assembly 2018-06-04 00:39:19 +10:00
cmpxchg.h
code-patching.h powerpc/lib/feature-fixups: use raw_patch_instruction() 2018-01-21 15:06:25 +11:00
compat.h y2038: powerpc: Extend sysvipc data structures 2018-04-20 16:20:13 +02:00
context_tracking.h
copro.h
cpm1.h powerpc/sysdev: change CPM GPIO to platform_device 2018-01-20 23:29:02 -06:00
cpm2.h
cpm.h powerpc/sysdev: change CPM GPIO to platform_device 2018-01-20 23:29:02 -06:00
cpu_has_feature.h
cpufeature.h
cpuidle.h
cputable.h Merge branch 'topic/ppc-kvm' into next 2018-07-19 14:37:57 +10:00
cputhreads.h
cputime.h powerpc/time: inline arch_vtime_task_switch() 2018-06-04 00:39:20 +10:00
current.h
dbdma.h
dbell.h
dcr-generic.h
dcr-mmio.h
dcr-native.h
dcr-regs.h
dcr.h
debug.h powerpc: Add ppc_breakpoint_available() 2018-03-27 23:52:43 +11:00
debugfs.h
delay.h
device.h
disassemble.h
dma-direct.h dma/direct: Handle the memory encryption bit in common code 2018-03-20 10:01:59 +01:00
dma-mapping.h
dma.h
drmem.h powerpc/drmem: Add support for ibm, dynamic-memory-v2 property 2018-01-16 23:26:29 +11:00
dt_cpu_ftrs.h
edac.h
eeh_event.h powerpc/eeh: Manage EEH_PE_RECOVERING inside eeh_handle_normal_event() 2018-03-27 23:44:58 +11:00
eeh.h powerpc/eeh: Avoid misleading message "EEH: no capable adapters found" 2018-07-02 23:54:26 +10:00
ehv_pic.h
elf.h
emergency-restart.h
emulated_ops.h
epapr_hcalls.h powerpc/epapr: Move register keyword at the beginning of declaration 2018-03-13 15:50:32 +11:00
exception-64e.h
exception-64s.h powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit 2018-05-21 20:45:31 -07:00
exec.h
extable.h
fadump.h
fb.h
feature-fixups.h powerpc updates for 4.18 2018-06-07 10:23:33 -07:00
firmware.h powerpc/pseries: Fix duplicate firmware feature for DRC_INFO 2018-02-22 14:32:32 +11:00
fixmap.h
floppy.h
fs_pd.h
fsl_85xx_cache_sram.h
fsl_gtm.h
fsl_hcalls.h
fsl_lbc.h
fsl_pamu_stash.h
fsl_pm.h
ftrace.h Merge branch 'fixes' into next 2018-06-03 20:32:02 +10:00
futex.h
grackle.h
hardirq.h powerpc updates for 4.18 2018-06-07 10:23:33 -07:00
head-64.h powerpc/64s: Add support to take additional parameter in MASKABLE_* macro 2018-01-19 22:37:02 +11:00
heathrow.h
highmem.h
hmi.h KVM: PPC: Book3S HV: Improve handling of debug-trigger HMIs on POWER9 2018-01-18 15:31:25 +11:00
hugetlb.h powerpc/mm/hugetlb: Update huge_ptep_set_access_flags to call __ptep_set_access_flags directly 2018-06-03 20:40:33 +10:00
hvcall.h misc: IBM Virtual Management Channel Driver (VMC) 2018-05-14 16:35:42 +02:00
hvconsole.h
hvcserver.h
hvsi.h
hw_breakpoint.h powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it 2018-04-04 21:54:02 +10:00
hw_irq.h powerpc/64s: make PACA_IRQ_HARD_DIS track MSR[EE] closely 2018-07-24 22:03:14 +10:00
hydra.h
i8259.h
ibmebus.h
icswx.h
ide.h
ima.h
imc-pmu.h powerpc/perf: Unregister thread-imc if core-imc not supported 2018-06-03 20:43:37 +10:00
immap_cpm2.h
io_event_irq.h
io-defs.h
io-workarounds.h
io.h powerpc/io: Add __raw_writeq_be() __raw_rm_writeq_be() 2018-05-18 21:59:56 +10:00
iommu.h powerpc/powernv/ioda: Allocate indirect TCE levels on demand 2018-07-16 22:53:11 +10:00
ipic.h
irq_work.h powerpc: Add missing prototype for arch_irq_work_raise() 2018-03-13 15:50:37 +11:00
irq.h powerpc: Add missing prototype for init_IRQ() 2018-03-13 15:50:38 +11:00
irqflags.h powerpc/64: Rename soft_enabled to irq_soft_mask 2018-01-19 22:37:01 +11:00
isa-bridge.h
jump_label.h
Kbuild
kdebug.h
kdump.h
kexec.h kexec_file,x86,powerpc: factor out kexec_file_ops functions 2018-04-13 17:10:27 -07:00
keylargo.h
kgdb.h
kmap_types.h
kprobes.h
kvm_asm.h KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9 2018-03-24 00:39:13 +11:00
kvm_book3s_32.h
kvm_book3s_64.h KVM: PPC: Move nip/ctr/lr/xer registers to pt_regs in kvm_vcpu_arch 2018-05-18 15:38:23 +10:00
kvm_book3s_asm.h KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9 2018-03-24 00:39:13 +11:00
kvm_book3s.h KVM: PPC: Book3S PR: Support TAR handling for PR KVM HTM 2018-06-01 10:30:43 +10:00
kvm_booke_hv_asm.h
kvm_booke.h KVM: PPC: Move nip/ctr/lr/xer registers to pt_regs in kvm_vcpu_arch 2018-05-18 15:38:23 +10:00
kvm_fpu.h
kvm_host.h KVM: PPC: Book3S PR: Add transaction memory save/restore skeleton 2018-06-01 10:29:55 +10:00
kvm_para.h KVM: Introduce paravirtualization hints and KVM_HINTS_DEDICATED 2018-03-06 18:40:44 +01:00
kvm_ppc.h KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction mmio emulation with analyse_instr() input 2018-05-22 19:53:00 +10:00
libata-portmap.h
linkage.h
livepatch.h
local.h powerpc/64s: Implement local_t using irq soft masking 2018-01-19 22:37:04 +11:00
lppaca.h powerpc: Fix oops due to bad access of lppaca on bare metal 2018-04-03 21:50:07 +10:00
lv1call.h
machdep.h powerpc: use time64_t in read_persistent_clock 2018-06-03 20:43:33 +10:00
macio.h
mc146818rtc.h
mce.h
mediabay.h
membarrier.h membarrier: Provide GLOBAL_EXPEDITED command 2018-02-05 21:34:31 +01:00
mm-arch-hooks.h
mman.h mm: Add address parameter to arch_validate_prot() 2018-03-18 07:38:47 -07:00
mmu_context.h mm/pkeys, powerpc, x86: Provide an empty vma_pkey() in linux/pkeys.h 2018-05-09 11:50:41 +10:00
mmu-8xx.h powerpc/mm/slice: implement a slice mask cache 2018-03-13 23:43:06 +11:00
mmu-40x.h
mmu-44x.h
mmu-book3e.h powerpc/mm/book3e/64: Remove unsupported 64Kpage size from 64bit booke 2018-05-15 22:29:10 +10:00
mmu.h powerpc/64s: Remove POWER4 support 2018-04-01 00:47:50 +11:00
mmzone.h
module.h powerpc/kbuild: move -mprofile-kernel check to Kconfig 2018-06-11 09:16:29 +09:00
mpc5xxx.h
mpc6xx.h
mpc52xx_psc.h
mpc52xx.h powerpc/52xx: Add missing functions prototypes 2018-05-25 12:04:42 +10:00
mpc85xx.h
mpc5121.h
mpc8260.h
mpic_msgr.h
mpic_timer.h powerpc/mpic_timer: avoid struct timeval 2018-01-21 15:06:16 +11:00
mpic.h
msi_bitmap.h
nmi.h powerpc/64s: Fix build failures with CONFIG_NMI_IPI=n 2018-06-19 23:03:50 +10:00
nvram.h
ohare.h
opal-api.h powerpc/powernv: call OPAL_QUIESCE before OPAL_SIGNAL_SYSTEM_RESET 2018-06-03 20:40:30 +10:00
opal.h powerpc: use time64_t in read_persistent_clock 2018-06-03 20:43:33 +10:00
oprofile_impl.h
paca.h powerpc/64s: Remove POWER9 DD1 support 2018-07-16 11:37:21 +10:00
page_32.h
page_64.h powerpc/mm/slice: create header files dedicated to slices 2018-03-06 09:21:22 +11:00
page.h powerpc/fadump: Do not use hugepages when fadump is active 2018-05-03 23:09:25 +10:00
parport.h
pasemi_dma.h
pci-bridge.h powerpc/powernv/idoa: Remove unnecessary pcidev from pci_dn 2018-01-27 20:39:01 +11:00
pci.h PCI: remove PCI_DMA_BUS_IS_PHYS 2018-05-07 07:15:41 +02:00
percpu.h
perf_event_fsl_emb.h
perf_event_server.h powerpc/perf: Infrastructure to support addition of blacklisted events 2018-03-27 19:25:10 +11:00
perf_event.h
pgalloc.h
pgtable-be-types.h
pgtable-types.h
pgtable.h powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang 2018-06-03 20:40:34 +10:00
pkeys.h powerpc/pkeys: make protection key 0 less special 2018-07-24 21:43:24 +10:00
plpar_wrappers.h powerpc/pseries: put cede MSR[EE] check under IRQ_SOFT_MASK_DEBUG 2018-06-03 20:40:25 +10:00
pmac_feature.h
pmac_low_i2c.h
pmac_pfunc.h powerpc/powermac: Move pmac_pfunc_base_install prototype to header file 2018-05-25 12:04:41 +10:00
pmc.h powerpc/64s: Do not allocate lppaca if we are not virtualized 2018-03-30 23:34:22 +11:00
pmi.h
pnv-ocxl.h ocxl: Rename pnv_ocxl_spa_remove_pe to clarify it's action 2018-06-03 20:40:32 +10:00
pnv-pci.h Revert "powerpc/powernv: Add support for the cxl kernel api on the real phb" 2018-07-02 23:54:32 +10:00
powernv.h powerpc/powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters 2018-04-24 09:46:57 +10:00
ppc4xx_ocm.h
ppc4xx.h
ppc_asm.h powerpc/32: Use stmw/lmw for registers save/restore in asm 2018-05-18 00:09:06 +10:00
ppc-opcode.h powerpc: add vcmpequd/vcmpequb ppc instruction macro 2018-07-24 22:03:20 +10:00
ppc-pci.h
probes.h
processor.h powerpc: Rename thread_struct.fs to addr_limit 2018-06-03 20:43:42 +10:00
prom.h pseries/drc-info: Search DRC properties for CPU indexes 2018-01-21 16:21:46 +11:00
ps3.h
ps3av.h
ps3gpu.h
ps3stor.h
pte-common.h mm: introduce ARCH_HAS_PTE_SPECIAL 2018-06-07 17:34:35 -07:00
pte-walk.h
ptrace.h
reg_8xx.h powerpc/8xx: Remove CPU6 ERRATA Workaround 2018-01-16 23:47:12 +11:00
reg_a2.h
reg_booke.h
reg_fsl_emb.h
reg.h Merge remote-tracking branch 'remotes/powerpc/topic/ppc-kvm' into kvm-ppc-next 2018-05-31 09:27:10 +10:00
rheap.h powerpc: Add missing prototype 2018-05-25 12:04:43 +10:00
rio.h
rtas.h powerpc: use time64_t in read_persistent_clock 2018-06-03 20:43:33 +10:00
runlatch.h
scom.h
seccomp.h
sections.h
security_features.h powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit 2018-05-21 20:45:31 -07:00
serial.h
setjmp.h
setup.h powerpc/64s: Enable barrier_nospec based on firmware settings 2018-06-03 20:43:45 +10:00
sfp-machine.h
shmparam.h
signal.h
slice.h powerpc/mm/slice: Simplify and optimise slice context initialisation 2018-03-13 23:43:05 +11:00
smp.h powerpc: NMI IPI make NMI IPIs fully sychronous 2018-07-24 22:03:14 +10:00
smu.h
sparsemem.h powerpc/mm: Increase MAX_PHYSMEM_BITS to 128TB with SPARSEMEM_VMEMMAP config 2018-07-24 22:03:17 +10:00
spinlock_types.h
spinlock.h powerpc: Fix oops due to bad access of lppaca on bare metal 2018-04-03 21:50:07 +10:00
spu_csa.h
spu_info.h
spu_priv1.h
spu.h
sstep.h powerpc/sstep: Introduce GETTYPE macro 2018-06-03 21:19:40 +10:00
string.h
swab.h
swiotlb.h
switch_to.h powerpc: use task_pid_nr() for TID allocation 2018-06-03 20:40:31 +10:00
synch.h powerpc/64: Fix smp_wmb barrier definition use use lwsync consistently 2018-03-31 00:10:34 +11:00
syscall.h
syscalls.h powerpc/syscalls: switch rtas(2) to SYSCALL_DEFINE 2018-05-10 23:25:14 +10:00
systbl.h powerpc: Wire up io_pgetevents 2018-06-23 21:43:21 +10:00
tce.h
termios.h
thread_info.h powerpc: Check address limit on user-mode return (TIF_FSCHECK) 2018-06-03 20:43:42 +10:00
time.h powerpc: remove unused to_tm() helper 2018-06-03 20:43:34 +10:00
timex.h
tlb.h powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask 2018-06-03 20:40:36 +10:00
tlbflush.h
tm.h powerpc: Export tm_enable()/tm_disable/tm_abort() APIs 2018-05-24 16:04:02 +10:00
topology.h powerpc/pseries: Fix CONFIG_NUMA=n build 2018-05-08 14:59:56 +10:00
trace_clock.h
trace.h powerpc/pseries: hcall_exit tracepoint retval should be signed 2018-05-10 23:17:43 +10:00
tsi108_irq.h
tsi108_pci.h
tsi108.h
types.h
uaccess.h powerpc: Use barrier_nospec in copy_from_user() 2018-06-03 20:43:45 +10:00
udbg.h
uic.h
unaligned.h
uninorth.h
unistd.h powerpc: Wire up io_pgetevents 2018-06-23 21:43:21 +10:00
uprobes.h
user.h
vas.h
vdso_datapage.h
vdso.h
vga.h
vio.h
word-at-a-time.h
xics.h
xilinx_intc.h
xilinx_pci.h
xive-regs.h powerpc/xive: Remove (almost) unused macros 2018-06-03 20:43:35 +10:00
xive.h KVM changes for 4.16 2018-02-10 13:16:35 -08:00
xmon.h powerpc/xmon: Add __printf annotation to xmon_printf() 2018-05-25 12:04:36 +10:00
xor_altivec.h powerpc/altivec: Add missing prototypes for altivec 2018-05-25 12:04:38 +10:00
xor.h powerpc/altivec: Add missing prototypes for altivec 2018-05-25 12:04:38 +10:00