mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
182 lines
2.6 KiB
ArmAsm
182 lines
2.6 KiB
ArmAsm
|
/*
|
||
|
* arch/alpha/lib/memmove.S
|
||
|
*
|
||
|
* Barely optimized memmove routine for Alpha EV5.
|
||
|
*
|
||
|
* This is hand-massaged output from the original memcpy.c. We defer to
|
||
|
* memcpy whenever possible; the backwards copy loops are not unrolled.
|
||
|
*/
|
||
|
|
||
|
.set noat
|
||
|
.set noreorder
|
||
|
.text
|
||
|
|
||
|
.align 4
|
||
|
.globl memmove
|
||
|
.ent memmove
|
||
|
memmove:
|
||
|
ldgp $29, 0($27)
|
||
|
unop
|
||
|
nop
|
||
|
.prologue 1
|
||
|
|
||
|
addq $16,$18,$4
|
||
|
addq $17,$18,$5
|
||
|
cmpule $4,$17,$1 /* dest + n <= src */
|
||
|
cmpule $5,$16,$2 /* dest >= src + n */
|
||
|
|
||
|
bis $1,$2,$1
|
||
|
mov $16,$0
|
||
|
xor $16,$17,$2
|
||
|
bne $1,memcpy !samegp
|
||
|
|
||
|
and $2,7,$2 /* Test for src/dest co-alignment. */
|
||
|
and $16,7,$1
|
||
|
cmpule $16,$17,$3
|
||
|
bne $3,$memmove_up /* dest < src */
|
||
|
|
||
|
and $4,7,$1
|
||
|
bne $2,$misaligned_dn
|
||
|
unop
|
||
|
beq $1,$skip_aligned_byte_loop_head_dn
|
||
|
|
||
|
$aligned_byte_loop_head_dn:
|
||
|
lda $4,-1($4)
|
||
|
lda $5,-1($5)
|
||
|
unop
|
||
|
ble $18,$egress
|
||
|
|
||
|
ldq_u $3,0($5)
|
||
|
ldq_u $2,0($4)
|
||
|
lda $18,-1($18)
|
||
|
extbl $3,$5,$1
|
||
|
|
||
|
insbl $1,$4,$1
|
||
|
mskbl $2,$4,$2
|
||
|
bis $1,$2,$1
|
||
|
and $4,7,$6
|
||
|
|
||
|
stq_u $1,0($4)
|
||
|
bne $6,$aligned_byte_loop_head_dn
|
||
|
|
||
|
$skip_aligned_byte_loop_head_dn:
|
||
|
lda $18,-8($18)
|
||
|
blt $18,$skip_aligned_word_loop_dn
|
||
|
|
||
|
$aligned_word_loop_dn:
|
||
|
ldq $1,-8($5)
|
||
|
nop
|
||
|
lda $5,-8($5)
|
||
|
lda $18,-8($18)
|
||
|
|
||
|
stq $1,-8($4)
|
||
|
nop
|
||
|
lda $4,-8($4)
|
||
|
bge $18,$aligned_word_loop_dn
|
||
|
|
||
|
$skip_aligned_word_loop_dn:
|
||
|
lda $18,8($18)
|
||
|
bgt $18,$byte_loop_tail_dn
|
||
|
unop
|
||
|
ret $31,($26),1
|
||
|
|
||
|
.align 4
|
||
|
$misaligned_dn:
|
||
|
nop
|
||
|
fnop
|
||
|
unop
|
||
|
beq $18,$egress
|
||
|
|
||
|
$byte_loop_tail_dn:
|
||
|
ldq_u $3,-1($5)
|
||
|
ldq_u $2,-1($4)
|
||
|
lda $5,-1($5)
|
||
|
lda $4,-1($4)
|
||
|
|
||
|
lda $18,-1($18)
|
||
|
extbl $3,$5,$1
|
||
|
insbl $1,$4,$1
|
||
|
mskbl $2,$4,$2
|
||
|
|
||
|
bis $1,$2,$1
|
||
|
stq_u $1,0($4)
|
||
|
bgt $18,$byte_loop_tail_dn
|
||
|
br $egress
|
||
|
|
||
|
$memmove_up:
|
||
|
mov $16,$4
|
||
|
mov $17,$5
|
||
|
bne $2,$misaligned_up
|
||
|
beq $1,$skip_aligned_byte_loop_head_up
|
||
|
|
||
|
$aligned_byte_loop_head_up:
|
||
|
unop
|
||
|
ble $18,$egress
|
||
|
ldq_u $3,0($5)
|
||
|
ldq_u $2,0($4)
|
||
|
|
||
|
lda $18,-1($18)
|
||
|
extbl $3,$5,$1
|
||
|
insbl $1,$4,$1
|
||
|
mskbl $2,$4,$2
|
||
|
|
||
|
bis $1,$2,$1
|
||
|
lda $5,1($5)
|
||
|
stq_u $1,0($4)
|
||
|
lda $4,1($4)
|
||
|
|
||
|
and $4,7,$6
|
||
|
bne $6,$aligned_byte_loop_head_up
|
||
|
|
||
|
$skip_aligned_byte_loop_head_up:
|
||
|
lda $18,-8($18)
|
||
|
blt $18,$skip_aligned_word_loop_up
|
||
|
|
||
|
$aligned_word_loop_up:
|
||
|
ldq $1,0($5)
|
||
|
nop
|
||
|
lda $5,8($5)
|
||
|
lda $18,-8($18)
|
||
|
|
||
|
stq $1,0($4)
|
||
|
nop
|
||
|
lda $4,8($4)
|
||
|
bge $18,$aligned_word_loop_up
|
||
|
|
||
|
$skip_aligned_word_loop_up:
|
||
|
lda $18,8($18)
|
||
|
bgt $18,$byte_loop_tail_up
|
||
|
unop
|
||
|
ret $31,($26),1
|
||
|
|
||
|
.align 4
|
||
|
$misaligned_up:
|
||
|
nop
|
||
|
fnop
|
||
|
unop
|
||
|
beq $18,$egress
|
||
|
|
||
|
$byte_loop_tail_up:
|
||
|
ldq_u $3,0($5)
|
||
|
ldq_u $2,0($4)
|
||
|
lda $18,-1($18)
|
||
|
extbl $3,$5,$1
|
||
|
|
||
|
insbl $1,$4,$1
|
||
|
mskbl $2,$4,$2
|
||
|
bis $1,$2,$1
|
||
|
stq_u $1,0($4)
|
||
|
|
||
|
lda $5,1($5)
|
||
|
lda $4,1($4)
|
||
|
nop
|
||
|
bgt $18,$byte_loop_tail_up
|
||
|
|
||
|
$egress:
|
||
|
ret $31,($26),1
|
||
|
nop
|
||
|
nop
|
||
|
nop
|
||
|
|
||
|
.end memmove
|