mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-22 16:51:23 +07:00
1da177e4c3
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
136 lines
3.3 KiB
ArmAsm
136 lines
3.3 KiB
ArmAsm
/* memcpy.S: optimised assembly memcpy
|
|
*
|
|
* Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
|
|
.text
|
|
.p2align 4
|
|
|
|
###############################################################################
|
|
#
|
|
# void *memcpy(void *to, const char *from, size_t count)
|
|
#
|
|
# - NOTE: must not use any stack. exception detection performs function return
|
|
# to caller's fixup routine, aborting the remainder of the copy
|
|
#
|
|
###############################################################################
|
|
.globl memcpy,__memcpy_end
|
|
.type memcpy,@function
|
|
memcpy:
|
|
or.p gr8,gr9,gr4
|
|
orcc gr10,gr0,gr0,icc3
|
|
or.p gr10,gr4,gr4
|
|
beqlr icc3,#0
|
|
|
|
# optimise based on best common alignment for to, from & count
|
|
andicc.p gr4,#0x0f,gr0,icc0
|
|
setlos #8,gr11
|
|
andicc.p gr4,#0x07,gr0,icc1
|
|
beq icc0,#0,memcpy_16
|
|
andicc.p gr4,#0x03,gr0,icc0
|
|
beq icc1,#0,memcpy_8
|
|
andicc.p gr4,#0x01,gr0,icc1
|
|
beq icc0,#0,memcpy_4
|
|
setlos.p #1,gr11
|
|
beq icc1,#0,memcpy_2
|
|
|
|
# do byte by byte copy
|
|
sub.p gr8,gr11,gr3
|
|
sub gr9,gr11,gr9
|
|
0: ldubu.p @(gr9,gr11),gr4
|
|
subicc gr10,#1,gr10,icc0
|
|
stbu.p gr4,@(gr3,gr11)
|
|
bne icc0,#2,0b
|
|
bralr
|
|
|
|
# do halfword by halfword copy
|
|
memcpy_2:
|
|
setlos #2,gr11
|
|
sub.p gr8,gr11,gr3
|
|
sub gr9,gr11,gr9
|
|
0: lduhu.p @(gr9,gr11),gr4
|
|
subicc gr10,#2,gr10,icc0
|
|
sthu.p gr4,@(gr3,gr11)
|
|
bne icc0,#2,0b
|
|
bralr
|
|
|
|
# do word by word copy
|
|
memcpy_4:
|
|
setlos #4,gr11
|
|
sub.p gr8,gr11,gr3
|
|
sub gr9,gr11,gr9
|
|
0: ldu.p @(gr9,gr11),gr4
|
|
subicc gr10,#4,gr10,icc0
|
|
stu.p gr4,@(gr3,gr11)
|
|
bne icc0,#2,0b
|
|
bralr
|
|
|
|
# do double-word by double-word copy
|
|
memcpy_8:
|
|
sub.p gr8,gr11,gr3
|
|
sub gr9,gr11,gr9
|
|
0: lddu.p @(gr9,gr11),gr4
|
|
subicc gr10,#8,gr10,icc0
|
|
stdu.p gr4,@(gr3,gr11)
|
|
bne icc0,#2,0b
|
|
bralr
|
|
|
|
# do quad-word by quad-word copy
|
|
memcpy_16:
|
|
sub.p gr8,gr11,gr3
|
|
sub gr9,gr11,gr9
|
|
0: lddu @(gr9,gr11),gr4
|
|
lddu.p @(gr9,gr11),gr6
|
|
subicc gr10,#16,gr10,icc0
|
|
stdu gr4,@(gr3,gr11)
|
|
stdu.p gr6,@(gr3,gr11)
|
|
bne icc0,#2,0b
|
|
bralr
|
|
__memcpy_end:
|
|
|
|
.size memcpy, __memcpy_end-memcpy
|
|
|
|
###############################################################################
|
|
#
|
|
# copy to/from userspace
|
|
# - return the number of bytes that could not be copied (0 on complete success)
|
|
#
|
|
# long __memcpy_user(void *dst, const void *src, size_t count)
|
|
#
|
|
###############################################################################
|
|
.globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
|
|
.type __memcpy_user,@function
|
|
__memcpy_user:
|
|
movsg lr,gr7
|
|
subi.p sp,#8,sp
|
|
add gr8,gr10,gr6 ; calculate expected end address
|
|
stdi gr6,@(sp,#0)
|
|
|
|
# abuse memcpy to do the dirty work
|
|
call memcpy
|
|
__memcpy_user_error_lr:
|
|
ldi.p @(sp,#4),gr7
|
|
setlos #0,gr8
|
|
jmpl.p @(gr7,gr0)
|
|
addi sp,#8,sp
|
|
|
|
# deal any exception generated by memcpy
|
|
# GR8 - memcpy's current dest address
|
|
# GR11 - memset's step value (index register for store insns)
|
|
__memcpy_user_error_handler:
|
|
lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr
|
|
add gr11,gr3,gr7
|
|
sub.p gr4,gr7,gr8
|
|
|
|
addi sp,#8,sp
|
|
jmpl @(gr5,gr0)
|
|
|
|
.size __memcpy_user, .-__memcpy_user
|