mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-22 21:21:02 +07:00
1031a3a57e
Newer code paths that are heavier in 64-bit math manage to get this generated by newer compilers, provide a definition and export accordingly. This is trivially wrapped around the existing __sdivsi3 code. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
136 lines
2.7 KiB
ArmAsm
136 lines
2.7 KiB
ArmAsm
.global __sdivsi3
|
|
.global __sdivsi3_1
|
|
.global __sdivsi3_2
|
|
.section .text..SHmedia32,"ax"
|
|
.align 2
|
|
|
|
/* inputs: r4,r5 */
|
|
/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
|
|
/* result in r0 */
|
|
__sdivsi3:
|
|
__sdivsi3_1:
|
|
ptb __div_table,tr0
|
|
gettr tr0,r20
|
|
|
|
__sdivsi3_2:
|
|
nsb r5, r1
|
|
shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
|
|
shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
|
|
/* bubble */
|
|
ldx.ub r20, r21, r19 /* u0.8 */
|
|
shari r25, 32, r25 /* normalize to s2.30 */
|
|
shlli r21, 1, r21
|
|
muls.l r25, r19, r19 /* s2.38 */
|
|
ldx.w r20, r21, r21 /* s2.14 */
|
|
ptabs r18, tr0
|
|
shari r19, 24, r19 /* truncate to s2.14 */
|
|
sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
|
|
muls.l r19, r19, r21 /* u0.28 */
|
|
sub r63, r1, r1
|
|
addi r1, 92, r1
|
|
muls.l r25, r21, r18 /* s2.58 */
|
|
shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
|
|
/* bubble */
|
|
sub r19, r18, r18
|
|
shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
|
|
muls.l r18, r25, r0 /* s2.60 */
|
|
muls.l r18, r4, r25 /* s32.30 */
|
|
/* bubble */
|
|
shari r0, 16, r19 /* s-16.44 */
|
|
muls.l r19, r18, r19 /* s-16.74 */
|
|
shari r25, 63, r0
|
|
shari r4, 14, r18 /* s19.-14 */
|
|
shari r19, 30, r19 /* s-16.44 */
|
|
muls.l r19, r18, r19 /* s15.30 */
|
|
xor r21, r0, r21 /* You could also use the constant 1 << 27. */
|
|
add r21, r25, r21
|
|
sub r21, r19, r21
|
|
shard r21, r1, r21
|
|
sub r21, r0, r0
|
|
blink tr0, r63
|
|
|
|
/* This table has been generated by divtab.c .
|
|
Defects for bias -330:
|
|
Max defect: 6.081536e-07 at -1.000000e+00
|
|
Min defect: 2.849516e-08 at 1.030651e+00
|
|
Max 2nd step defect: 9.606539e-12 at -1.000000e+00
|
|
Min 2nd step defect: 0.000000e+00 at 0.000000e+00
|
|
Defect at 1: 1.238659e-07
|
|
Defect at -2: 1.061708e-07 */
|
|
|
|
.balign 2
|
|
.type __div_table,@object
|
|
.size __div_table,128
|
|
/* negative division constants */
|
|
.word -16638
|
|
.word -17135
|
|
.word -17737
|
|
.word -18433
|
|
.word -19103
|
|
.word -19751
|
|
.word -20583
|
|
.word -21383
|
|
.word -22343
|
|
.word -23353
|
|
.word -24407
|
|
.word -25582
|
|
.word -26863
|
|
.word -28382
|
|
.word -29965
|
|
.word -31800
|
|
/* negative division factors */
|
|
.byte 66
|
|
.byte 70
|
|
.byte 75
|
|
.byte 81
|
|
.byte 87
|
|
.byte 93
|
|
.byte 101
|
|
.byte 109
|
|
.byte 119
|
|
.byte 130
|
|
.byte 142
|
|
.byte 156
|
|
.byte 172
|
|
.byte 192
|
|
.byte 214
|
|
.byte 241
|
|
.skip 16
|
|
.global __div_table
|
|
__div_table:
|
|
.skip 16
|
|
/* positive division factors */
|
|
.byte 241
|
|
.byte 214
|
|
.byte 192
|
|
.byte 172
|
|
.byte 156
|
|
.byte 142
|
|
.byte 130
|
|
.byte 119
|
|
.byte 109
|
|
.byte 101
|
|
.byte 93
|
|
.byte 87
|
|
.byte 81
|
|
.byte 75
|
|
.byte 70
|
|
.byte 66
|
|
/* positive division constants */
|
|
.word 31801
|
|
.word 29966
|
|
.word 28383
|
|
.word 26864
|
|
.word 25583
|
|
.word 24408
|
|
.word 23354
|
|
.word 22344
|
|
.word 21384
|
|
.word 20584
|
|
.word 19752
|
|
.word 19104
|
|
.word 18434
|
|
.word 17738
|
|
.word 17136
|
|
.word 16639
|