mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-16 17:56:39 +07:00
e9c4943a10
The generic csum_ipv6_magic() generates a pretty bad result 00000000 <csum_ipv6_magic>: (PPC32) 0: 81 23 00 00 lwz r9,0(r3) 4: 81 03 00 04 lwz r8,4(r3) 8: 7c e7 4a 14 add r7,r7,r9 c: 7d 29 38 10 subfc r9,r9,r7 10: 7d 4a 51 10 subfe r10,r10,r10 14: 7d 27 42 14 add r9,r7,r8 18: 7d 2a 48 50 subf r9,r10,r9 1c: 80 e3 00 08 lwz r7,8(r3) 20: 7d 08 48 10 subfc r8,r8,r9 24: 7d 4a 51 10 subfe r10,r10,r10 28: 7d 29 3a 14 add r9,r9,r7 2c: 81 03 00 0c lwz r8,12(r3) 30: 7d 2a 48 50 subf r9,r10,r9 34: 7c e7 48 10 subfc r7,r7,r9 38: 7d 4a 51 10 subfe r10,r10,r10 3c: 7d 29 42 14 add r9,r9,r8 40: 7d 2a 48 50 subf r9,r10,r9 44: 80 e4 00 00 lwz r7,0(r4) 48: 7d 08 48 10 subfc r8,r8,r9 4c: 7d 4a 51 10 subfe r10,r10,r10 50: 7d 29 3a 14 add r9,r9,r7 54: 7d 2a 48 50 subf r9,r10,r9 58: 81 04 00 04 lwz r8,4(r4) 5c: 7c e7 48 10 subfc r7,r7,r9 60: 7d 4a 51 10 subfe r10,r10,r10 64: 7d 29 42 14 add r9,r9,r8 68: 7d 2a 48 50 subf r9,r10,r9 6c: 80 e4 00 08 lwz r7,8(r4) 70: 7d 08 48 10 subfc r8,r8,r9 74: 7d 4a 51 10 subfe r10,r10,r10 78: 7d 29 3a 14 add r9,r9,r7 7c: 7d 2a 48 50 subf r9,r10,r9 80: 81 04 00 0c lwz r8,12(r4) 84: 7c e7 48 10 subfc r7,r7,r9 88: 7d 4a 51 10 subfe r10,r10,r10 8c: 7d 29 42 14 add r9,r9,r8 90: 7d 2a 48 50 subf r9,r10,r9 94: 7d 08 48 10 subfc r8,r8,r9 98: 7d 4a 51 10 subfe r10,r10,r10 9c: 7d 29 2a 14 add r9,r9,r5 a0: 7d 2a 48 50 subf r9,r10,r9 a4: 7c a5 48 10 subfc r5,r5,r9 a8: 7c 63 19 10 subfe r3,r3,r3 ac: 7d 29 32 14 add r9,r9,r6 b0: 7d 23 48 50 subf r9,r3,r9 b4: 7c c6 48 10 subfc r6,r6,r9 b8: 7c 63 19 10 subfe r3,r3,r3 bc: 7c 63 48 50 subf r3,r3,r9 c0: 54 6a 80 3e rotlwi r10,r3,16 c4: 7c 63 52 14 add r3,r3,r10 c8: 7c 63 18 f8 not r3,r3 cc: 54 63 84 3e rlwinm r3,r3,16,16,31 d0: 4e 80 00 20 blr 0000000000000000 <.csum_ipv6_magic>: (PPC64) 0: 81 23 00 00 lwz r9,0(r3) 4: 80 03 00 04 lwz r0,4(r3) 8: 81 63 00 08 lwz r11,8(r3) c: 7c e7 4a 14 add r7,r7,r9 10: 7f 89 38 40 cmplw cr7,r9,r7 14: 7d 47 02 14 add r10,r7,r0 18: 7d 30 10 26 mfocrf r9,1 1c: 55 29 f7 fe rlwinm r9,r9,30,31,31 20: 7d 4a 4a 14 add r10,r10,r9 24: 7f 80 50 40 cmplw cr7,r0,r10 28: 7d 2a 5a 14 add r9,r10,r11 2c: 80 03 00 0c lwz r0,12(r3) 30: 81 44 00 00 lwz r10,0(r4) 34: 7d 10 10 26 mfocrf r8,1 38: 55 08 f7 fe rlwinm r8,r8,30,31,31 3c: 7d 29 42 14 add r9,r9,r8 40: 81 04 00 04 lwz r8,4(r4) 44: 7f 8b 48 40 cmplw cr7,r11,r9 48: 7d 29 02 14 add r9,r9,r0 4c: 7d 70 10 26 mfocrf r11,1 50: 55 6b f7 fe rlwinm r11,r11,30,31,31 54: 7d 29 5a 14 add r9,r9,r11 58: 7f 80 48 40 cmplw cr7,r0,r9 5c: 7d 29 52 14 add r9,r9,r10 60: 7c 10 10 26 mfocrf r0,1 64: 54 00 f7 fe rlwinm r0,r0,30,31,31 68: 7d 69 02 14 add r11,r9,r0 6c: 7f 8a 58 40 cmplw cr7,r10,r11 70: 7c 0b 42 14 add r0,r11,r8 74: 81 44 00 08 lwz r10,8(r4) 78: 7c f0 10 26 mfocrf r7,1 7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31 80: 7c 00 3a 14 add r0,r0,r7 84: 7f 88 00 40 cmplw cr7,r8,r0 88: 7d 20 52 14 add r9,r0,r10 8c: 80 04 00 0c lwz r0,12(r4) 90: 7d 70 10 26 mfocrf r11,1 94: 55 6b f7 fe rlwinm r11,r11,30,31,31 98: 7d 29 5a 14 add r9,r9,r11 9c: 7f 8a 48 40 cmplw cr7,r10,r9 a0: 7d 29 02 14 add r9,r9,r0 a4: 7d 70 10 26 mfocrf r11,1 a8: 55 6b f7 fe rlwinm r11,r11,30,31,31 ac: 7d 29 5a 14 add r9,r9,r11 b0: 7f 80 48 40 cmplw cr7,r0,r9 b4: 7d 29 2a 14 add r9,r9,r5 b8: 7c 10 10 26 mfocrf r0,1 bc: 54 00 f7 fe rlwinm r0,r0,30,31,31 c0: 7d 29 02 14 add r9,r9,r0 c4: 7f 85 48 40 cmplw cr7,r5,r9 c8: 7c 09 32 14 add r0,r9,r6 cc: 7d 50 10 26 mfocrf r10,1 d0: 55 4a f7 fe rlwinm r10,r10,30,31,31 d4: 7c 00 52 14 add r0,r0,r10 d8: 7f 80 30 40 cmplw cr7,r0,r6 dc: 7d 30 10 26 mfocrf r9,1 e0: 55 29 ef fe rlwinm r9,r9,29,31,31 e4: 7c 09 02 14 add r0,r9,r0 e8: 54 03 80 3e rotlwi r3,r0,16 ec: 7c 03 02 14 add r0,r3,r0 f0: 7c 03 00 f8 not r3,r0 f4: 78 63 84 22 rldicl r3,r3,48,48 f8: 4e 80 00 20 blr This patch implements it in assembly for both PPC32 and PPC64 Link: https://github.com/linuxppc/linux/issues/9 Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
223 lines
5.9 KiB
C
223 lines
5.9 KiB
C
#ifndef _ASM_POWERPC_CHECKSUM_H
|
|
#define _ASM_POWERPC_CHECKSUM_H
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#ifdef CONFIG_GENERIC_CSUM
|
|
#include <asm-generic/checksum.h>
|
|
#else
|
|
#include <linux/bitops.h>
|
|
#include <linux/in6.h>
|
|
/*
|
|
* Computes the checksum of a memory block at src, length len,
|
|
* and adds in "sum" (32-bit), while copying the block to dst.
|
|
* If an access exception occurs on src or dst, it stores -EFAULT
|
|
* to *src_err or *dst_err respectively (if that pointer is not
|
|
* NULL), and, for an error on src, zeroes the rest of dst.
|
|
*
|
|
* Like csum_partial, this must be called with even lengths,
|
|
* except for the last fragment.
|
|
*/
|
|
extern __wsum csum_partial_copy_generic(const void *src, void *dst,
|
|
int len, __wsum sum,
|
|
int *src_err, int *dst_err);
|
|
|
|
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
|
|
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
|
|
int len, __wsum sum, int *err_ptr);
|
|
#define HAVE_CSUM_COPY_USER
|
|
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
|
|
int len, __wsum sum, int *err_ptr);
|
|
|
|
#define csum_partial_copy_nocheck(src, dst, len, sum) \
|
|
csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
|
|
|
|
|
|
/*
|
|
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
|
|
* 1's complement 16-bit checksum.
|
|
*/
|
|
static inline __sum16 csum_fold(__wsum sum)
|
|
{
|
|
unsigned int tmp;
|
|
|
|
/* swap the two 16-bit halves of sum */
|
|
__asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum));
|
|
/* if there is a carry from adding the two 16-bit halves,
|
|
it will carry from the lower half into the upper half,
|
|
giving us the correct sum in the upper half. */
|
|
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
|
|
}
|
|
|
|
static inline u32 from64to32(u64 x)
|
|
{
|
|
return (x + ror64(x, 32)) >> 32;
|
|
}
|
|
|
|
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
|
|
__u8 proto, __wsum sum)
|
|
{
|
|
#ifdef __powerpc64__
|
|
u64 s = (__force u32)sum;
|
|
|
|
s += (__force u32)saddr;
|
|
s += (__force u32)daddr;
|
|
#ifdef __BIG_ENDIAN__
|
|
s += proto + len;
|
|
#else
|
|
s += (proto + len) << 8;
|
|
#endif
|
|
return (__force __wsum) from64to32(s);
|
|
#else
|
|
__asm__("\n\
|
|
addc %0,%0,%1 \n\
|
|
adde %0,%0,%2 \n\
|
|
adde %0,%0,%3 \n\
|
|
addze %0,%0 \n\
|
|
"
|
|
: "=r" (sum)
|
|
: "r" (daddr), "r"(saddr), "r"(proto + len), "0"(sum));
|
|
return sum;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* computes the checksum of the TCP/UDP pseudo-header
|
|
* returns a 16-bit checksum, already complemented
|
|
*/
|
|
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
|
|
__u8 proto, __wsum sum)
|
|
{
|
|
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
|
|
}
|
|
|
|
#define HAVE_ARCH_CSUM_ADD
|
|
static inline __wsum csum_add(__wsum csum, __wsum addend)
|
|
{
|
|
#ifdef __powerpc64__
|
|
u64 res = (__force u64)csum;
|
|
#endif
|
|
if (__builtin_constant_p(csum) && csum == 0)
|
|
return addend;
|
|
if (__builtin_constant_p(addend) && addend == 0)
|
|
return csum;
|
|
|
|
#ifdef __powerpc64__
|
|
res += (__force u64)addend;
|
|
return (__force __wsum)((u32)res + (res >> 32));
|
|
#else
|
|
asm("addc %0,%0,%1;"
|
|
"addze %0,%0;"
|
|
: "+r" (csum) : "r" (addend) : "xer");
|
|
return csum;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* This is a version of ip_compute_csum() optimized for IP headers,
|
|
* which always checksum on 4 octet boundaries. ihl is the number
|
|
* of 32-bit words and is always >= 5.
|
|
*/
|
|
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
|
|
{
|
|
const u32 *ptr = (const u32 *)iph + 1;
|
|
#ifdef __powerpc64__
|
|
unsigned int i;
|
|
u64 s = *(const u32 *)iph;
|
|
|
|
for (i = 0; i < ihl - 1; i++, ptr++)
|
|
s += *ptr;
|
|
return (__force __wsum)from64to32(s);
|
|
#else
|
|
__wsum sum, tmp;
|
|
|
|
asm("mtctr %3;"
|
|
"addc %0,%4,%5;"
|
|
"1: lwzu %1, 4(%2);"
|
|
"adde %0,%0,%1;"
|
|
"bdnz 1b;"
|
|
"addze %0,%0;"
|
|
: "=r" (sum), "=r" (tmp), "+b" (ptr)
|
|
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
|
|
: "ctr", "xer", "memory");
|
|
|
|
return sum;
|
|
#endif
|
|
}
|
|
|
|
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
|
{
|
|
return csum_fold(ip_fast_csum_nofold(iph, ihl));
|
|
}
|
|
|
|
/*
|
|
* computes the checksum of a memory block at buff, length len,
|
|
* and adds in "sum" (32-bit)
|
|
*
|
|
* returns a 32-bit number suitable for feeding into itself
|
|
* or csum_tcpudp_magic
|
|
*
|
|
* this function must be called with even lengths, except
|
|
* for the last fragment, which may be odd
|
|
*
|
|
* it's best to have buff aligned on a 32-bit boundary
|
|
*/
|
|
__wsum __csum_partial(const void *buff, int len, __wsum sum);
|
|
|
|
static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
|
|
{
|
|
if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
|
|
if (len == 2)
|
|
sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
|
|
if (len >= 4)
|
|
sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
|
|
if (len == 6)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 4));
|
|
if (len >= 8)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 4));
|
|
if (len == 10)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 8));
|
|
if (len >= 12)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 8));
|
|
if (len == 14)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u16 *)(buff + 12));
|
|
if (len >= 16)
|
|
sum = csum_add(sum, (__force __wsum)
|
|
*(const u32 *)(buff + 12));
|
|
} else if (__builtin_constant_p(len) && (len & 3) == 0) {
|
|
sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
|
|
} else {
|
|
sum = __csum_partial(buff, len, sum);
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/*
|
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
|
* in icmp.c
|
|
*/
|
|
static inline __sum16 ip_compute_csum(const void *buff, int len)
|
|
{
|
|
return csum_fold(csum_partial(buff, len, 0));
|
|
}
|
|
|
|
#define _HAVE_ARCH_IPV6_CSUM
|
|
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
|
|
const struct in6_addr *daddr,
|
|
__u32 len, __u8 proto, __wsum sum);
|
|
|
|
#endif
|
|
#endif /* __KERNEL__ */
|
|
#endif
|