mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 11:46:52 +07:00
4494ce4fb4
Update license to use SPDX-License-Identifier instead of verbose license text. Link: http://lkml.kernel.org/r/871s6wcswb.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Reviewed-by: Simon Horman <horms+renesas@verge.net.au> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Rich Felker <dalias@libc.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
129 lines
2.3 KiB
ArmAsm
129 lines
2.3 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
|
|
*
|
|
* Copyright (C) 2006 Free Software Foundation, Inc.
|
|
*/
|
|
|
|
/* Moderately Space-optimized libgcc routines for the Renesas SH /
|
|
STMicroelectronics ST40 CPUs.
|
|
Contributed by J"orn Rennecke joern.rennecke@st.com. */
|
|
|
|
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
|
|
sh4-200 run times:
|
|
udiv small divisor: 55 cycles
|
|
udiv large divisor: 52 cycles
|
|
sdiv small divisor, positive result: 59 cycles
|
|
sdiv large divisor, positive result: 56 cycles
|
|
sdiv small divisor, negative result: 65 cycles (*)
|
|
sdiv large divisor, negative result: 62 cycles (*)
|
|
(*): r2 is restored in the rts delay slot and has a lingering latency
|
|
of two more cycles. */
|
|
.balign 4
|
|
.global __udivsi3_i4i
|
|
.global __udivsi3_i4
|
|
.set __udivsi3_i4, __udivsi3_i4i
|
|
.type __udivsi3_i4i, @function
|
|
.type __sdivsi3_i4i, @function
|
|
__udivsi3_i4i:
|
|
sts pr,r1
|
|
mov.l r4,@-r15
|
|
extu.w r5,r0
|
|
cmp/eq r5,r0
|
|
swap.w r4,r0
|
|
shlr16 r4
|
|
bf/s large_divisor
|
|
div0u
|
|
mov.l r5,@-r15
|
|
shll16 r5
|
|
sdiv_small_divisor:
|
|
div1 r5,r4
|
|
bsr div6
|
|
div1 r5,r4
|
|
div1 r5,r4
|
|
bsr div6
|
|
div1 r5,r4
|
|
xtrct r4,r0
|
|
xtrct r0,r4
|
|
bsr div7
|
|
swap.w r4,r4
|
|
div1 r5,r4
|
|
bsr div7
|
|
div1 r5,r4
|
|
xtrct r4,r0
|
|
mov.l @r15+,r5
|
|
swap.w r0,r0
|
|
mov.l @r15+,r4
|
|
jmp @r1
|
|
rotcl r0
|
|
div7:
|
|
div1 r5,r4
|
|
div6:
|
|
div1 r5,r4; div1 r5,r4; div1 r5,r4
|
|
div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
|
|
|
|
divx3:
|
|
rotcl r0
|
|
div1 r5,r4
|
|
rotcl r0
|
|
div1 r5,r4
|
|
rotcl r0
|
|
rts
|
|
div1 r5,r4
|
|
|
|
large_divisor:
|
|
mov.l r5,@-r15
|
|
sdiv_large_divisor:
|
|
xor r4,r0
|
|
.rept 4
|
|
rotcl r0
|
|
bsr divx3
|
|
div1 r5,r4
|
|
.endr
|
|
mov.l @r15+,r5
|
|
mov.l @r15+,r4
|
|
jmp @r1
|
|
rotcl r0
|
|
|
|
.global __sdivsi3_i4i
|
|
.global __sdivsi3_i4
|
|
.global __sdivsi3
|
|
.set __sdivsi3_i4, __sdivsi3_i4i
|
|
.set __sdivsi3, __sdivsi3_i4i
|
|
__sdivsi3_i4i:
|
|
mov.l r4,@-r15
|
|
cmp/pz r5
|
|
mov.l r5,@-r15
|
|
bt/s pos_divisor
|
|
cmp/pz r4
|
|
neg r5,r5
|
|
extu.w r5,r0
|
|
bt/s neg_result
|
|
cmp/eq r5,r0
|
|
neg r4,r4
|
|
pos_result:
|
|
swap.w r4,r0
|
|
bra sdiv_check_divisor
|
|
sts pr,r1
|
|
pos_divisor:
|
|
extu.w r5,r0
|
|
bt/s pos_result
|
|
cmp/eq r5,r0
|
|
neg r4,r4
|
|
neg_result:
|
|
mova negate_result,r0
|
|
;
|
|
mov r0,r1
|
|
swap.w r4,r0
|
|
lds r2,macl
|
|
sts pr,r2
|
|
sdiv_check_divisor:
|
|
shlr16 r4
|
|
bf/s sdiv_large_divisor
|
|
div0u
|
|
bra sdiv_small_divisor
|
|
shll16 r5
|
|
.balign 4
|
|
negate_result:
|
|
neg r0,r0
|
|
jmp @r2
|
|
sts macl,r2
|