mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-11 17:36:49 +07:00
25e5566ed3
For the case where the source is not aligned modulo 8 we don't use load-twins to suck the data in and this kills performance since normal loads allocate in the L1 cache (unlike load-twin) and thus big memcpys swipe the entire L1 D-cache. We need to allocate a register window to implement this properly, but that actually simplifies a lot of things as a nice side-effect. Signed-off-by: David S. Miller <davem@davemloft.net>
41 lines
843 B
ArmAsm
41 lines
843 B
ArmAsm
/* NGcopy_to_user.S: Niagara optimized copy to userspace.
|
|
*
|
|
* Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
|
|
*/
|
|
|
|
#define EX_ST(x) \
|
|
98: x; \
|
|
.section .fixup; \
|
|
.align 4; \
|
|
99: wr %g0, ASI_AIUS, %asi;\
|
|
ret; \
|
|
restore %g0, 1, %o0; \
|
|
.section __ex_table,"a";\
|
|
.align 4; \
|
|
.word 98b, 99b; \
|
|
.text; \
|
|
.align 4;
|
|
|
|
#ifndef ASI_AIUS
|
|
#define ASI_AIUS 0x11
|
|
#endif
|
|
|
|
#define FUNC_NAME NGcopy_to_user
|
|
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
|
|
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS
|
|
#define EX_RETVAL(x) %g0
|
|
|
|
#ifdef __KERNEL__
|
|
/* Writing to %asi is _expensive_ so we hardcode it.
|
|
* Reading %asi to check for KERNEL_DS is comparatively
|
|
* cheap.
|
|
*/
|
|
#define PREAMBLE \
|
|
rd %asi, %g1; \
|
|
cmp %g1, ASI_AIUS; \
|
|
bne,pn %icc, memcpy_user_stub; \
|
|
nop
|
|
#endif
|
|
|
|
#include "NGmemcpy.S"
|