linux_dsm_epyc7002/arch/arm/lib/csumpartial.S

140 lines
2.9 KiB
ArmAsm
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/lib/csumpartial.S
*
* Copyright (C) 1995-1998 Russell King
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
/*
* Function: __u32 csum_partial(const char *src, int len, __u32 sum)
* Params : r0 = buffer, r1 = len, r2 = checksum
* Returns : r0 = new checksum
*/
buf .req r0
len .req r1
sum .req r2
td0 .req r3
td1 .req r4 @ save before use
td2 .req r5 @ save before use
td3 .req lr
.Lzero: mov r0, sum
add sp, sp, #4
ldr pc, [sp], #4
/*
* Handle 0 to 7 bytes, with any alignment of source and
* destination pointers. Note that when we get here, C = 0
*/
.Lless8: teq len, #0 @ check for zero count
beq .Lzero
/* we must have at least one byte. */
tst buf, #1 @ odd address?
movne sum, sum, ror #8
ldrbne td0, [buf], #1
subne len, len, #1
adcsne sum, sum, td0, put_byte_1
.Lless4: tst len, #6
beq .Lless8_byte
/* we are now half-word aligned */
.Lless8_wordlp:
#if __LINUX_ARM_ARCH__ >= 4
ldrh td0, [buf], #2
sub len, len, #2
#else
ldrb td0, [buf], #1
ldrb td3, [buf], #1
sub len, len, #2
#ifndef __ARMEB__
orr td0, td0, td3, lsl #8
#else
orr td0, td3, td0, lsl #8
#endif
#endif
adcs sum, sum, td0
tst len, #6
bne .Lless8_wordlp
.Lless8_byte: tst len, #1 @ odd number of bytes
ldrbne td0, [buf], #1 @ include last byte
adcsne sum, sum, td0, put_byte_0 @ update checksum
.Ldone: adc r0, sum, #0 @ collect up the last carry
ldr td0, [sp], #4
tst td0, #1 @ check buffer alignment
movne r0, r0, ror #8 @ rotate checksum by 8 bits
ldr pc, [sp], #4 @ return
.Lnot_aligned: tst buf, #1 @ odd address
ldrbne td0, [buf], #1 @ make even
subne len, len, #1
adcsne sum, sum, td0, put_byte_1 @ update checksum
tst buf, #2 @ 32-bit aligned?
#if __LINUX_ARM_ARCH__ >= 4
ldrhne td0, [buf], #2 @ make 32-bit aligned
subne len, len, #2
#else
ldrbne td0, [buf], #1
ldrbne ip, [buf], #1
subne len, len, #2
#ifndef __ARMEB__
orrne td0, td0, ip, lsl #8
#else
orrne td0, ip, td0, lsl #8
#endif
#endif
adcsne sum, sum, td0 @ update checksum
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-06-30 22:29:12 +07:00
ret lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}
cmp len, #8 @ Ensure that we have at least
blo .Lless8 @ 8 bytes to copy.
tst buf, #1
movne sum, sum, ror #8
adds sum, sum, #0 @ C = 0
tst buf, #3 @ Test destination alignment
blne .Lnot_aligned @ align destination, return here
1: bics ip, len, #31
beq 3f
stmfd sp!, {r4 - r5}
2: ldmia buf!, {td0, td1, td2, td3}
adcs sum, sum, td0
adcs sum, sum, td1
adcs sum, sum, td2
adcs sum, sum, td3
ldmia buf!, {td0, td1, td2, td3}
adcs sum, sum, td0
adcs sum, sum, td1
adcs sum, sum, td2
adcs sum, sum, td3
sub ip, ip, #32
teq ip, #0
bne 2b
ldmfd sp!, {r4 - r5}
3: tst len, #0x1c @ should not change C
beq .Lless4
4: ldr td0, [buf], #4
sub len, len, #4
adcs sum, sum, td0
tst len, #0x1c
bne 4b
b .Lless4
ENDPROC(csum_partial)