linux_dsm_epyc7002/arch/arm/lib/memcpy.S
Linus Torvalds 1da177e4c3 Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
2005-04-16 15:20:36 -07:00

394 lines
7.6 KiB
ArmAsm

/*
* linux/arch/arm/lib/memcpy.S
*
* Copyright (C) 1995-1999 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* ASM optimised string functions
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
#define ENTER \
mov ip,sp ;\
stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\
sub fp,ip,#4
#define EXIT \
LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
#define EXITEQ \
LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
/*
* Prototype: void memcpy(void *to,const void *from,unsigned long n);
*/
ENTRY(memcpy)
ENTRY(memmove)
ENTER
cmp r1, r0
bcc 23f
subs r2, r2, #4
blt 6f
PLD( pld [r1, #0] )
ands ip, r0, #3
bne 7f
ands ip, r1, #3
bne 8f
1: subs r2, r2, #8
blt 5f
subs r2, r2, #20
blt 4f
PLD( pld [r1, #28] )
PLD( subs r2, r2, #64 )
PLD( blt 3f )
2: PLD( pld [r1, #60] )
PLD( pld [r1, #92] )
ldmia r1!, {r3 - r9, ip}
subs r2, r2, #32
stmgeia r0!, {r3 - r9, ip}
ldmgeia r1!, {r3 - r9, ip}
subges r2, r2, #32
stmia r0!, {r3 - r9, ip}
bge 2b
3: PLD( ldmia r1!, {r3 - r9, ip} )
PLD( adds r2, r2, #32 )
PLD( stmgeia r0!, {r3 - r9, ip} )
PLD( ldmgeia r1!, {r3 - r9, ip} )
PLD( subges r2, r2, #32 )
PLD( stmia r0!, {r3 - r9, ip} )
4: cmn r2, #16
ldmgeia r1!, {r3 - r6}
subge r2, r2, #16
stmgeia r0!, {r3 - r6}
adds r2, r2, #20
ldmgeia r1!, {r3 - r5}
subge r2, r2, #12
stmgeia r0!, {r3 - r5}
5: adds r2, r2, #8
blt 6f
subs r2, r2, #4
ldrlt r3, [r1], #4
ldmgeia r1!, {r4, r5}
subge r2, r2, #4
strlt r3, [r0], #4
stmgeia r0!, {r4, r5}
6: adds r2, r2, #4
EXITEQ
cmp r2, #2
ldrb r3, [r1], #1
ldrgeb r4, [r1], #1
ldrgtb r5, [r1], #1
strb r3, [r0], #1
strgeb r4, [r0], #1
strgtb r5, [r0], #1
EXIT
7: rsb ip, ip, #4
cmp ip, #2
ldrb r3, [r1], #1
ldrgeb r4, [r1], #1
ldrgtb r5, [r1], #1
strb r3, [r0], #1
strgeb r4, [r0], #1
strgtb r5, [r0], #1
subs r2, r2, ip
blt 6b
ands ip, r1, #3
beq 1b
8: bic r1, r1, #3
ldr r7, [r1], #4
cmp ip, #2
bgt 18f
beq 13f
cmp r2, #12
blt 11f
PLD( pld [r1, #12] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 10f )
PLD( pld [r1, #28] )
9: PLD( pld [r1, #44] )
10: mov r3, r7, pull #8
ldmia r1!, {r4 - r7}
subs r2, r2, #16
orr r3, r3, r4, push #24
mov r4, r4, pull #8
orr r4, r4, r5, push #24
mov r5, r5, pull #8
orr r5, r5, r6, push #24
mov r6, r6, pull #8
orr r6, r6, r7, push #24
stmia r0!, {r3 - r6}
bge 9b
PLD( cmn r2, #32 )
PLD( bge 10b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 12f
11: mov r3, r7, pull #8
ldr r7, [r1], #4
subs r2, r2, #4
orr r3, r3, r7, push #24
str r3, [r0], #4
bge 11b
12: sub r1, r1, #3
b 6b
13: cmp r2, #12
blt 16f
PLD( pld [r1, #12] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 15f )
PLD( pld [r1, #28] )
14: PLD( pld [r1, #44] )
15: mov r3, r7, pull #16
ldmia r1!, {r4 - r7}
subs r2, r2, #16
orr r3, r3, r4, push #16
mov r4, r4, pull #16
orr r4, r4, r5, push #16
mov r5, r5, pull #16
orr r5, r5, r6, push #16
mov r6, r6, pull #16
orr r6, r6, r7, push #16
stmia r0!, {r3 - r6}
bge 14b
PLD( cmn r2, #32 )
PLD( bge 15b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 17f
16: mov r3, r7, pull #16
ldr r7, [r1], #4
subs r2, r2, #4
orr r3, r3, r7, push #16
str r3, [r0], #4
bge 16b
17: sub r1, r1, #2
b 6b
18: cmp r2, #12
blt 21f
PLD( pld [r1, #12] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 20f )
PLD( pld [r1, #28] )
19: PLD( pld [r1, #44] )
20: mov r3, r7, pull #24
ldmia r1!, {r4 - r7}
subs r2, r2, #16
orr r3, r3, r4, push #8
mov r4, r4, pull #24
orr r4, r4, r5, push #8
mov r5, r5, pull #24
orr r5, r5, r6, push #8
mov r6, r6, pull #24
orr r6, r6, r7, push #8
stmia r0!, {r3 - r6}
bge 19b
PLD( cmn r2, #32 )
PLD( bge 20b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 22f
21: mov r3, r7, pull #24
ldr r7, [r1], #4
subs r2, r2, #4
orr r3, r3, r7, push #8
str r3, [r0], #4
bge 21b
22: sub r1, r1, #1
b 6b
23: add r1, r1, r2
add r0, r0, r2
subs r2, r2, #4
blt 29f
PLD( pld [r1, #-4] )
ands ip, r0, #3
bne 30f
ands ip, r1, #3
bne 31f
24: subs r2, r2, #8
blt 28f
subs r2, r2, #20
blt 27f
PLD( pld [r1, #-32] )
PLD( subs r2, r2, #64 )
PLD( blt 26f )
25: PLD( pld [r1, #-64] )
PLD( pld [r1, #-96] )
ldmdb r1!, {r3 - r9, ip}
subs r2, r2, #32
stmgedb r0!, {r3 - r9, ip}
ldmgedb r1!, {r3 - r9, ip}
subges r2, r2, #32
stmdb r0!, {r3 - r9, ip}
bge 25b
26: PLD( ldmdb r1!, {r3 - r9, ip} )
PLD( adds r2, r2, #32 )
PLD( stmgedb r0!, {r3 - r9, ip} )
PLD( ldmgedb r1!, {r3 - r9, ip} )
PLD( subges r2, r2, #32 )
PLD( stmdb r0!, {r3 - r9, ip} )
27: cmn r2, #16
ldmgedb r1!, {r3 - r6}
subge r2, r2, #16
stmgedb r0!, {r3 - r6}
adds r2, r2, #20
ldmgedb r1!, {r3 - r5}
subge r2, r2, #12
stmgedb r0!, {r3 - r5}
28: adds r2, r2, #8
blt 29f
subs r2, r2, #4
ldrlt r3, [r1, #-4]!
ldmgedb r1!, {r4, r5}
subge r2, r2, #4
strlt r3, [r0, #-4]!
stmgedb r0!, {r4, r5}
29: adds r2, r2, #4
EXITEQ
cmp r2, #2
ldrb r3, [r1, #-1]!
ldrgeb r4, [r1, #-1]!
ldrgtb r5, [r1, #-1]!
strb r3, [r0, #-1]!
strgeb r4, [r0, #-1]!
strgtb r5, [r0, #-1]!
EXIT
30: cmp ip, #2
ldrb r3, [r1, #-1]!
ldrgeb r4, [r1, #-1]!
ldrgtb r5, [r1, #-1]!
strb r3, [r0, #-1]!
strgeb r4, [r0, #-1]!
strgtb r5, [r0, #-1]!
subs r2, r2, ip
blt 29b
ands ip, r1, #3
beq 24b
31: bic r1, r1, #3
ldr r3, [r1], #0
cmp ip, #2
blt 41f
beq 36f
cmp r2, #12
blt 34f
PLD( pld [r1, #-16] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 33f )
PLD( pld [r1, #-32] )
32: PLD( pld [r1, #-48] )
33: mov r7, r3, push #8
ldmdb r1!, {r3, r4, r5, r6}
subs r2, r2, #16
orr r7, r7, r6, pull #24
mov r6, r6, push #8
orr r6, r6, r5, pull #24
mov r5, r5, push #8
orr r5, r5, r4, pull #24
mov r4, r4, push #8
orr r4, r4, r3, pull #24
stmdb r0!, {r4, r5, r6, r7}
bge 32b
PLD( cmn r2, #32 )
PLD( bge 33b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 35f
34: mov ip, r3, push #8
ldr r3, [r1, #-4]!
subs r2, r2, #4
orr ip, ip, r3, pull #24
str ip, [r0, #-4]!
bge 34b
35: add r1, r1, #3
b 29b
36: cmp r2, #12
blt 39f
PLD( pld [r1, #-16] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 38f )
PLD( pld [r1, #-32] )
37: PLD( pld [r1, #-48] )
38: mov r7, r3, push #16
ldmdb r1!, {r3, r4, r5, r6}
subs r2, r2, #16
orr r7, r7, r6, pull #16
mov r6, r6, push #16
orr r6, r6, r5, pull #16
mov r5, r5, push #16
orr r5, r5, r4, pull #16
mov r4, r4, push #16
orr r4, r4, r3, pull #16
stmdb r0!, {r4, r5, r6, r7}
bge 37b
PLD( cmn r2, #32 )
PLD( bge 38b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 40f
39: mov ip, r3, push #16
ldr r3, [r1, #-4]!
subs r2, r2, #4
orr ip, ip, r3, pull #16
str ip, [r0, #-4]!
bge 39b
40: add r1, r1, #2
b 29b
41: cmp r2, #12
blt 44f
PLD( pld [r1, #-16] )
sub r2, r2, #12
PLD( subs r2, r2, #32 )
PLD( blt 43f )
PLD( pld [r1, #-32] )
42: PLD( pld [r1, #-48] )
43: mov r7, r3, push #24
ldmdb r1!, {r3, r4, r5, r6}
subs r2, r2, #16
orr r7, r7, r6, pull #8
mov r6, r6, push #24
orr r6, r6, r5, pull #8
mov r5, r5, push #24
orr r5, r5, r4, pull #8
mov r4, r4, push #24
orr r4, r4, r3, pull #8
stmdb r0!, {r4, r5, r6, r7}
bge 42b
PLD( cmn r2, #32 )
PLD( bge 43b )
PLD( add r2, r2, #32 )
adds r2, r2, #12
blt 45f
44: mov ip, r3, push #24
ldr r3, [r1, #-4]!
subs r2, r2, #4
orr ip, ip, r3, pull #8
str ip, [r0, #-4]!
bge 44b
45: add r1, r1, #1
b 29b