mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-23 04:19:48 +07:00
6bc9a3966f
This is the complete set of new arch Score's files for linux. Score instruction set support 16bits, 32bits and 64bits instruction, Score SOC had been used in game machine and LCD TV. Signed-off-by: Chen Liqin <liqin.chen@sunplusct.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
256 lines
5.4 KiB
ArmAsm
256 lines
5.4 KiB
ArmAsm
/*
|
|
* arch/score/lib/csum_partial.S
|
|
*
|
|
* Score Processor version.
|
|
*
|
|
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
|
|
* Lennox Wu <lennox.wu@sunplusct.com>
|
|
* Chen Liqin <liqin.chen@sunplusct.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, see the file COPYING, or write
|
|
* to the Free Software Foundation, Inc.,
|
|
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
#include <linux/linkage.h>
|
|
|
|
#define ADDC(sum,reg) \
|
|
add sum, sum, reg; \
|
|
cmp.c reg, sum; \
|
|
bleu 9f; \
|
|
addi sum, 0x1; \
|
|
9:
|
|
|
|
#define CSUM_BIGCHUNK(src, offset, sum) \
|
|
lw r8, [src, offset + 0x00]; \
|
|
lw r9, [src, offset + 0x04]; \
|
|
lw r10, [src, offset + 0x08]; \
|
|
lw r11, [src, offset + 0x0c]; \
|
|
ADDC(sum, r8); \
|
|
ADDC(sum, r9); \
|
|
ADDC(sum, r10); \
|
|
ADDC(sum, r11); \
|
|
lw r8, [src, offset + 0x10]; \
|
|
lw r9, [src, offset + 0x14]; \
|
|
lw r10, [src, offset + 0x18]; \
|
|
lw r11, [src, offset + 0x1c]; \
|
|
ADDC(sum, r8); \
|
|
ADDC(sum, r9); \
|
|
ADDC(sum, r10); \
|
|
ADDC(sum, r11); \
|
|
|
|
#define src r4
|
|
#define dest r5
|
|
#define sum r27
|
|
|
|
.text
|
|
/* unknown src alignment and < 8 bytes to go */
|
|
small_csumcpy:
|
|
mv r5, r10
|
|
ldi r9, 0x0
|
|
cmpi.c r25, 0x1
|
|
beq pass_small_set_t7 /*already set, jump to pass_small_set_t7*/
|
|
andri.c r25,r4 , 0x1 /*Is src 2 bytes aligned?*/
|
|
|
|
pass_small_set_t7:
|
|
beq aligned
|
|
cmpi.c r5, 0x0
|
|
beq fold
|
|
lbu r9, [src]
|
|
slli r9,r9, 0x8 /*Little endian*/
|
|
ADDC(sum, r9)
|
|
addi src, 0x1
|
|
subi.c r5, 0x1
|
|
|
|
/*len still a full word */
|
|
aligned:
|
|
andri.c r8, r5, 0x4 /*Len >= 4?*/
|
|
beq len_less_4bytes
|
|
|
|
/* Still a full word (4byte) to go,and the src is word aligned.*/
|
|
andri.c r8, src, 0x3 /*src is 4bytes aligned, so use LW!!*/
|
|
beq four_byte_aligned
|
|
lhu r9, [src]
|
|
addi src, 2
|
|
ADDC(sum, r9)
|
|
lhu r9, [src]
|
|
addi src, 2
|
|
ADDC(sum, r9)
|
|
b len_less_4bytes
|
|
|
|
four_byte_aligned: /* Len >=4 and four byte aligned */
|
|
lw r9, [src]
|
|
addi src, 4
|
|
ADDC(sum, r9)
|
|
|
|
len_less_4bytes: /* 2 byte aligned aligned and length<4B */
|
|
andri.c r8, r5, 0x2
|
|
beq len_less_2bytes
|
|
lhu r9, [src]
|
|
addi src, 0x2 /* src+=2 */
|
|
ADDC(sum, r9)
|
|
|
|
len_less_2bytes: /* len = 1 */
|
|
andri.c r8, r5, 0x1
|
|
beq fold /* less than 2 and not equal 1--> len=0 -> fold */
|
|
lbu r9, [src]
|
|
|
|
fold_ADDC:
|
|
ADDC(sum, r9)
|
|
fold:
|
|
/* fold checksum */
|
|
slli r26, sum, 16
|
|
add sum, sum, r26
|
|
cmp.c r26, sum
|
|
srli sum, sum, 16
|
|
bleu 1f /* if r26<=sum */
|
|
addi sum, 0x1 /* r26>sum */
|
|
1:
|
|
/* odd buffer alignment? r25 was set in csum_partial */
|
|
cmpi.c r25, 0x0
|
|
beq 1f
|
|
slli r26, sum, 8
|
|
srli sum, sum, 8
|
|
or sum, sum, r26
|
|
andi sum, 0xffff
|
|
1:
|
|
.set optimize
|
|
/* Add the passed partial csum. */
|
|
ADDC(sum, r6)
|
|
mv r4, sum
|
|
br r3
|
|
.set volatile
|
|
|
|
.align 5
|
|
ENTRY(csum_partial)
|
|
ldi sum, 0
|
|
ldi r25, 0
|
|
mv r10, r5
|
|
cmpi.c r5, 0x8
|
|
blt small_csumcpy /* < 8(singed) bytes to copy */
|
|
cmpi.c r5, 0x0
|
|
beq out
|
|
andri.c r25, src, 0x1 /* odd buffer? */
|
|
|
|
beq word_align
|
|
hword_align: /* 1 byte */
|
|
lbu r8, [src]
|
|
subi r5, 0x1
|
|
slli r8, r8, 8
|
|
ADDC(sum, r8)
|
|
addi src, 0x1
|
|
|
|
word_align: /* 2 bytes */
|
|
andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */
|
|
beq dword_align /* not, maybe dword_align */
|
|
lhu r8, [src]
|
|
subi r5, 0x2
|
|
ADDC(sum, r8)
|
|
addi src, 0x2
|
|
|
|
dword_align: /* 4bytes */
|
|
mv r26, r5 /* maybe useless when len >=56 */
|
|
ldi r8, 56
|
|
cmp.c r8, r5
|
|
bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */
|
|
andri.c r26, src, 0x4
|
|
beq qword_align
|
|
lw r8, [src]
|
|
subi r5, 0x4
|
|
ADDC(sum, r8)
|
|
addi src, 0x4
|
|
|
|
qword_align: /* 8 bytes */
|
|
andri.c r26, src, 0x8
|
|
beq oword_align
|
|
lw r8, [src, 0x0]
|
|
lw r9, [src, 0x4]
|
|
subi r5, 0x8 /* len-=0x8 */
|
|
ADDC(sum, r8)
|
|
ADDC(sum, r9)
|
|
addi src, 0x8
|
|
|
|
oword_align: /* 16bytes */
|
|
andri.c r26, src, 0x10
|
|
beq begin_movement
|
|
lw r10, [src, 0x08]
|
|
lw r11, [src, 0x0c]
|
|
lw r8, [src, 0x00]
|
|
lw r9, [src, 0x04]
|
|
ADDC(sum, r10)
|
|
ADDC(sum, r11)
|
|
ADDC(sum, r8)
|
|
ADDC(sum, r9)
|
|
subi r5, 0x10
|
|
addi src, 0x10
|
|
|
|
begin_movement:
|
|
srli.c r26, r5, 0x7 /* len>=128? */
|
|
beq 1f /* len<128 */
|
|
|
|
/* r26 is the result that computed in oword_align */
|
|
move_128bytes:
|
|
CSUM_BIGCHUNK(src, 0x00, sum)
|
|
CSUM_BIGCHUNK(src, 0x20, sum)
|
|
CSUM_BIGCHUNK(src, 0x40, sum)
|
|
CSUM_BIGCHUNK(src, 0x60, sum)
|
|
subi.c r26, 0x01 /* r26 equals len/128 */
|
|
addi src, 0x80
|
|
bne move_128bytes
|
|
|
|
1: /* len<128,we process 64byte here */
|
|
andri.c r10, r5, 0x40
|
|
beq 1f
|
|
|
|
move_64bytes:
|
|
CSUM_BIGCHUNK(src, 0x00, sum)
|
|
CSUM_BIGCHUNK(src, 0x20, sum)
|
|
addi src, 0x40
|
|
|
|
1: /* len<64 */
|
|
andri r26, r5, 0x1c /* 0x1c=28 */
|
|
andri.c r10, r5, 0x20
|
|
beq do_end_words /* decided by andri */
|
|
|
|
move_32bytes:
|
|
CSUM_BIGCHUNK(src, 0x00, sum)
|
|
andri r26, r5, 0x1c
|
|
addri src, src, 0x20
|
|
|
|
do_end_words: /* len<32 */
|
|
/* r26 was set already in dword_align */
|
|
cmpi.c r26, 0x0
|
|
beq maybe_end_cruft /* len<28 or len<56 */
|
|
srli r26, r26, 0x2
|
|
|
|
end_words:
|
|
lw r8, [src]
|
|
subi.c r26, 0x1 /* unit is 4 byte */
|
|
ADDC(sum, r8)
|
|
addi src, 0x4
|
|
cmpi.c r26, 0x0
|
|
bne end_words /* r26!=0 */
|
|
|
|
maybe_end_cruft: /* len<4 */
|
|
andri r10, r5, 0x3
|
|
|
|
small_memcpy:
|
|
mv r5, r10
|
|
j small_csumcpy
|
|
|
|
out:
|
|
mv r4, sum
|
|
br r3
|
|
|
|
END(csum_partial)
|