mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-05 07:06:59 +07:00
5a0e3ad6af
percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
466 lines
14 KiB
C
466 lines
14 KiB
C
/*
|
|
* Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
|
|
* Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks!
|
|
* Code was from the public domain, copyright abandoned. Code was
|
|
* subsequently included in the kernel, thus was re-licensed under the
|
|
* GNU GPL v2.
|
|
*
|
|
* Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
|
|
* Same crc32 function was used in 5 other places in the kernel.
|
|
* I made one version, and deleted the others.
|
|
* There are various incantations of crc32(). Some use a seed of 0 or ~0.
|
|
* Some xor at the end with ~0. The generic crc32() function takes
|
|
* seed as an argument, and doesn't xor at the end. Then individual
|
|
* users can do whatever they need.
|
|
* drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
|
|
* fs/jffs2 uses seed 0, doesn't xor with ~0.
|
|
* fs/partitions/efi.c uses seed ~0, xor's with ~0.
|
|
*
|
|
* This source code is licensed under the GNU General Public License,
|
|
* Version 2. See the file COPYING for more details.
|
|
*/
|
|
|
|
#include <linux/crc32.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/types.h>
|
|
#include <linux/init.h>
|
|
#include <asm/atomic.h>
|
|
#include "crc32defs.h"
|
|
#if CRC_LE_BITS == 8
|
|
# define tole(x) __constant_cpu_to_le32(x)
|
|
#else
|
|
# define tole(x) (x)
|
|
#endif
|
|
|
|
#if CRC_BE_BITS == 8
|
|
# define tobe(x) __constant_cpu_to_be32(x)
|
|
#else
|
|
# define tobe(x) (x)
|
|
#endif
|
|
#include "crc32table.h"
|
|
|
|
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
|
|
MODULE_DESCRIPTION("Ethernet CRC32 calculations");
|
|
MODULE_LICENSE("GPL");
|
|
|
|
#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
|
|
|
|
static inline u32
|
|
crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
|
|
{
|
|
# ifdef __LITTLE_ENDIAN
|
|
# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8)
|
|
# else
|
|
# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
|
|
# endif
|
|
const u32 *b;
|
|
size_t rem_len;
|
|
|
|
/* Align it */
|
|
if (unlikely((long)buf & 3 && len)) {
|
|
do {
|
|
DO_CRC(*buf++);
|
|
} while ((--len) && ((long)buf)&3);
|
|
}
|
|
rem_len = len & 3;
|
|
/* load data 32 bits wide, xor data 32 bits wide. */
|
|
len = len >> 2;
|
|
b = (const u32 *)buf;
|
|
for (--b; len; --len) {
|
|
crc ^= *++b; /* use pre increment for speed */
|
|
DO_CRC(0);
|
|
DO_CRC(0);
|
|
DO_CRC(0);
|
|
DO_CRC(0);
|
|
}
|
|
len = rem_len;
|
|
/* And the last few bytes */
|
|
if (len) {
|
|
u8 *p = (u8 *)(b + 1) - 1;
|
|
do {
|
|
DO_CRC(*++p); /* use pre increment for speed */
|
|
} while (--len);
|
|
}
|
|
return crc;
|
|
#undef DO_CRC
|
|
}
|
|
#endif
|
|
/**
|
|
* crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
|
|
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
|
|
* other uses, or the previous crc32 value if computing incrementally.
|
|
* @p: pointer to buffer over which CRC is run
|
|
* @len: length of buffer @p
|
|
*/
|
|
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
|
|
|
|
#if CRC_LE_BITS == 1
|
|
/*
|
|
* In fact, the table-based code will work in this case, but it can be
|
|
* simplified by inlining the table in ?: form.
|
|
*/
|
|
|
|
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
|
|
{
|
|
int i;
|
|
while (len--) {
|
|
crc ^= *p++;
|
|
for (i = 0; i < 8; i++)
|
|
crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
|
|
}
|
|
return crc;
|
|
}
|
|
#else /* Table-based approach */
|
|
|
|
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
|
|
{
|
|
# if CRC_LE_BITS == 8
|
|
const u32 *tab = crc32table_le;
|
|
|
|
crc = __cpu_to_le32(crc);
|
|
crc = crc32_body(crc, p, len, tab);
|
|
return __le32_to_cpu(crc);
|
|
# elif CRC_LE_BITS == 4
|
|
while (len--) {
|
|
crc ^= *p++;
|
|
crc = (crc >> 4) ^ crc32table_le[crc & 15];
|
|
crc = (crc >> 4) ^ crc32table_le[crc & 15];
|
|
}
|
|
return crc;
|
|
# elif CRC_LE_BITS == 2
|
|
while (len--) {
|
|
crc ^= *p++;
|
|
crc = (crc >> 2) ^ crc32table_le[crc & 3];
|
|
crc = (crc >> 2) ^ crc32table_le[crc & 3];
|
|
crc = (crc >> 2) ^ crc32table_le[crc & 3];
|
|
crc = (crc >> 2) ^ crc32table_le[crc & 3];
|
|
}
|
|
return crc;
|
|
# endif
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
|
|
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
|
|
* other uses, or the previous crc32 value if computing incrementally.
|
|
* @p: pointer to buffer over which CRC is run
|
|
* @len: length of buffer @p
|
|
*/
|
|
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
|
|
|
|
#if CRC_BE_BITS == 1
|
|
/*
|
|
* In fact, the table-based code will work in this case, but it can be
|
|
* simplified by inlining the table in ?: form.
|
|
*/
|
|
|
|
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
|
|
{
|
|
int i;
|
|
while (len--) {
|
|
crc ^= *p++ << 24;
|
|
for (i = 0; i < 8; i++)
|
|
crc =
|
|
(crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE :
|
|
0);
|
|
}
|
|
return crc;
|
|
}
|
|
|
|
#else /* Table-based approach */
|
|
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
|
|
{
|
|
# if CRC_BE_BITS == 8
|
|
const u32 *tab = crc32table_be;
|
|
|
|
crc = __cpu_to_be32(crc);
|
|
crc = crc32_body(crc, p, len, tab);
|
|
return __be32_to_cpu(crc);
|
|
# elif CRC_BE_BITS == 4
|
|
while (len--) {
|
|
crc ^= *p++ << 24;
|
|
crc = (crc << 4) ^ crc32table_be[crc >> 28];
|
|
crc = (crc << 4) ^ crc32table_be[crc >> 28];
|
|
}
|
|
return crc;
|
|
# elif CRC_BE_BITS == 2
|
|
while (len--) {
|
|
crc ^= *p++ << 24;
|
|
crc = (crc << 2) ^ crc32table_be[crc >> 30];
|
|
crc = (crc << 2) ^ crc32table_be[crc >> 30];
|
|
crc = (crc << 2) ^ crc32table_be[crc >> 30];
|
|
crc = (crc << 2) ^ crc32table_be[crc >> 30];
|
|
}
|
|
return crc;
|
|
# endif
|
|
}
|
|
#endif
|
|
|
|
EXPORT_SYMBOL(crc32_le);
|
|
EXPORT_SYMBOL(crc32_be);
|
|
|
|
/*
|
|
* A brief CRC tutorial.
|
|
*
|
|
* A CRC is a long-division remainder. You add the CRC to the message,
|
|
* and the whole thing (message+CRC) is a multiple of the given
|
|
* CRC polynomial. To check the CRC, you can either check that the
|
|
* CRC matches the recomputed value, *or* you can check that the
|
|
* remainder computed on the message+CRC is 0. This latter approach
|
|
* is used by a lot of hardware implementations, and is why so many
|
|
* protocols put the end-of-frame flag after the CRC.
|
|
*
|
|
* It's actually the same long division you learned in school, except that
|
|
* - We're working in binary, so the digits are only 0 and 1, and
|
|
* - When dividing polynomials, there are no carries. Rather than add and
|
|
* subtract, we just xor. Thus, we tend to get a bit sloppy about
|
|
* the difference between adding and subtracting.
|
|
*
|
|
* A 32-bit CRC polynomial is actually 33 bits long. But since it's
|
|
* 33 bits long, bit 32 is always going to be set, so usually the CRC
|
|
* is written in hex with the most significant bit omitted. (If you're
|
|
* familiar with the IEEE 754 floating-point format, it's the same idea.)
|
|
*
|
|
* Note that a CRC is computed over a string of *bits*, so you have
|
|
* to decide on the endianness of the bits within each byte. To get
|
|
* the best error-detecting properties, this should correspond to the
|
|
* order they're actually sent. For example, standard RS-232 serial is
|
|
* little-endian; the most significant bit (sometimes used for parity)
|
|
* is sent last. And when appending a CRC word to a message, you should
|
|
* do it in the right order, matching the endianness.
|
|
*
|
|
* Just like with ordinary division, the remainder is always smaller than
|
|
* the divisor (the CRC polynomial) you're dividing by. Each step of the
|
|
* division, you take one more digit (bit) of the dividend and append it
|
|
* to the current remainder. Then you figure out the appropriate multiple
|
|
* of the divisor to subtract to being the remainder back into range.
|
|
* In binary, it's easy - it has to be either 0 or 1, and to make the
|
|
* XOR cancel, it's just a copy of bit 32 of the remainder.
|
|
*
|
|
* When computing a CRC, we don't care about the quotient, so we can
|
|
* throw the quotient bit away, but subtract the appropriate multiple of
|
|
* the polynomial from the remainder and we're back to where we started,
|
|
* ready to process the next bit.
|
|
*
|
|
* A big-endian CRC written this way would be coded like:
|
|
* for (i = 0; i < input_bits; i++) {
|
|
* multiple = remainder & 0x80000000 ? CRCPOLY : 0;
|
|
* remainder = (remainder << 1 | next_input_bit()) ^ multiple;
|
|
* }
|
|
* Notice how, to get at bit 32 of the shifted remainder, we look
|
|
* at bit 31 of the remainder *before* shifting it.
|
|
*
|
|
* But also notice how the next_input_bit() bits we're shifting into
|
|
* the remainder don't actually affect any decision-making until
|
|
* 32 bits later. Thus, the first 32 cycles of this are pretty boring.
|
|
* Also, to add the CRC to a message, we need a 32-bit-long hole for it at
|
|
* the end, so we have to add 32 extra cycles shifting in zeros at the
|
|
* end of every message,
|
|
*
|
|
* So the standard trick is to rearrage merging in the next_input_bit()
|
|
* until the moment it's needed. Then the first 32 cycles can be precomputed,
|
|
* and merging in the final 32 zero bits to make room for the CRC can be
|
|
* skipped entirely.
|
|
* This changes the code to:
|
|
* for (i = 0; i < input_bits; i++) {
|
|
* remainder ^= next_input_bit() << 31;
|
|
* multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
|
|
* remainder = (remainder << 1) ^ multiple;
|
|
* }
|
|
* With this optimization, the little-endian code is simpler:
|
|
* for (i = 0; i < input_bits; i++) {
|
|
* remainder ^= next_input_bit();
|
|
* multiple = (remainder & 1) ? CRCPOLY : 0;
|
|
* remainder = (remainder >> 1) ^ multiple;
|
|
* }
|
|
*
|
|
* Note that the other details of endianness have been hidden in CRCPOLY
|
|
* (which must be bit-reversed) and next_input_bit().
|
|
*
|
|
* However, as long as next_input_bit is returning the bits in a sensible
|
|
* order, we can actually do the merging 8 or more bits at a time rather
|
|
* than one bit at a time:
|
|
* for (i = 0; i < input_bytes; i++) {
|
|
* remainder ^= next_input_byte() << 24;
|
|
* for (j = 0; j < 8; j++) {
|
|
* multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
|
|
* remainder = (remainder << 1) ^ multiple;
|
|
* }
|
|
* }
|
|
* Or in little-endian:
|
|
* for (i = 0; i < input_bytes; i++) {
|
|
* remainder ^= next_input_byte();
|
|
* for (j = 0; j < 8; j++) {
|
|
* multiple = (remainder & 1) ? CRCPOLY : 0;
|
|
* remainder = (remainder << 1) ^ multiple;
|
|
* }
|
|
* }
|
|
* If the input is a multiple of 32 bits, you can even XOR in a 32-bit
|
|
* word at a time and increase the inner loop count to 32.
|
|
*
|
|
* You can also mix and match the two loop styles, for example doing the
|
|
* bulk of a message byte-at-a-time and adding bit-at-a-time processing
|
|
* for any fractional bytes at the end.
|
|
*
|
|
* The only remaining optimization is to the byte-at-a-time table method.
|
|
* Here, rather than just shifting one bit of the remainder to decide
|
|
* in the correct multiple to subtract, we can shift a byte at a time.
|
|
* This produces a 40-bit (rather than a 33-bit) intermediate remainder,
|
|
* but again the multiple of the polynomial to subtract depends only on
|
|
* the high bits, the high 8 bits in this case.
|
|
*
|
|
* The multiple we need in that case is the low 32 bits of a 40-bit
|
|
* value whose high 8 bits are given, and which is a multiple of the
|
|
* generator polynomial. This is simply the CRC-32 of the given
|
|
* one-byte message.
|
|
*
|
|
* Two more details: normally, appending zero bits to a message which
|
|
* is already a multiple of a polynomial produces a larger multiple of that
|
|
* polynomial. To enable a CRC to detect this condition, it's common to
|
|
* invert the CRC before appending it. This makes the remainder of the
|
|
* message+crc come out not as zero, but some fixed non-zero value.
|
|
*
|
|
* The same problem applies to zero bits prepended to the message, and
|
|
* a similar solution is used. Instead of starting with a remainder of
|
|
* 0, an initial remainder of all ones is used. As long as you start
|
|
* the same way on decoding, it doesn't make a difference.
|
|
*/
|
|
|
|
#ifdef UNITTEST
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#if 0 /*Not used at present */
|
|
static void
|
|
buf_dump(char const *prefix, unsigned char const *buf, size_t len)
|
|
{
|
|
fputs(prefix, stdout);
|
|
while (len--)
|
|
printf(" %02x", *buf++);
|
|
putchar('\n');
|
|
|
|
}
|
|
#endif
|
|
|
|
static void bytereverse(unsigned char *buf, size_t len)
|
|
{
|
|
while (len--) {
|
|
unsigned char x = bitrev8(*buf);
|
|
*buf++ = x;
|
|
}
|
|
}
|
|
|
|
static void random_garbage(unsigned char *buf, size_t len)
|
|
{
|
|
while (len--)
|
|
*buf++ = (unsigned char) random();
|
|
}
|
|
|
|
#if 0 /* Not used at present */
|
|
static void store_le(u32 x, unsigned char *buf)
|
|
{
|
|
buf[0] = (unsigned char) x;
|
|
buf[1] = (unsigned char) (x >> 8);
|
|
buf[2] = (unsigned char) (x >> 16);
|
|
buf[3] = (unsigned char) (x >> 24);
|
|
}
|
|
#endif
|
|
|
|
static void store_be(u32 x, unsigned char *buf)
|
|
{
|
|
buf[0] = (unsigned char) (x >> 24);
|
|
buf[1] = (unsigned char) (x >> 16);
|
|
buf[2] = (unsigned char) (x >> 8);
|
|
buf[3] = (unsigned char) x;
|
|
}
|
|
|
|
/*
|
|
* This checks that CRC(buf + CRC(buf)) = 0, and that
|
|
* CRC commutes with bit-reversal. This has the side effect
|
|
* of bytewise bit-reversing the input buffer, and returns
|
|
* the CRC of the reversed buffer.
|
|
*/
|
|
static u32 test_step(u32 init, unsigned char *buf, size_t len)
|
|
{
|
|
u32 crc1, crc2;
|
|
size_t i;
|
|
|
|
crc1 = crc32_be(init, buf, len);
|
|
store_be(crc1, buf + len);
|
|
crc2 = crc32_be(init, buf, len + 4);
|
|
if (crc2)
|
|
printf("\nCRC cancellation fail: 0x%08x should be 0\n",
|
|
crc2);
|
|
|
|
for (i = 0; i <= len + 4; i++) {
|
|
crc2 = crc32_be(init, buf, i);
|
|
crc2 = crc32_be(crc2, buf + i, len + 4 - i);
|
|
if (crc2)
|
|
printf("\nCRC split fail: 0x%08x\n", crc2);
|
|
}
|
|
|
|
/* Now swap it around for the other test */
|
|
|
|
bytereverse(buf, len + 4);
|
|
init = bitrev32(init);
|
|
crc2 = bitrev32(crc1);
|
|
if (crc1 != bitrev32(crc2))
|
|
printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n",
|
|
crc1, crc2, bitrev32(crc2));
|
|
crc1 = crc32_le(init, buf, len);
|
|
if (crc1 != crc2)
|
|
printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1,
|
|
crc2);
|
|
crc2 = crc32_le(init, buf, len + 4);
|
|
if (crc2)
|
|
printf("\nCRC cancellation fail: 0x%08x should be 0\n",
|
|
crc2);
|
|
|
|
for (i = 0; i <= len + 4; i++) {
|
|
crc2 = crc32_le(init, buf, i);
|
|
crc2 = crc32_le(crc2, buf + i, len + 4 - i);
|
|
if (crc2)
|
|
printf("\nCRC split fail: 0x%08x\n", crc2);
|
|
}
|
|
|
|
return crc1;
|
|
}
|
|
|
|
#define SIZE 64
|
|
#define INIT1 0
|
|
#define INIT2 0
|
|
|
|
int main(void)
|
|
{
|
|
unsigned char buf1[SIZE + 4];
|
|
unsigned char buf2[SIZE + 4];
|
|
unsigned char buf3[SIZE + 4];
|
|
int i, j;
|
|
u32 crc1, crc2, crc3;
|
|
|
|
for (i = 0; i <= SIZE; i++) {
|
|
printf("\rTesting length %d...", i);
|
|
fflush(stdout);
|
|
random_garbage(buf1, i);
|
|
random_garbage(buf2, i);
|
|
for (j = 0; j < i; j++)
|
|
buf3[j] = buf1[j] ^ buf2[j];
|
|
|
|
crc1 = test_step(INIT1, buf1, i);
|
|
crc2 = test_step(INIT2, buf2, i);
|
|
/* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */
|
|
crc3 = test_step(INIT1 ^ INIT2, buf3, i);
|
|
if (crc3 != (crc1 ^ crc2))
|
|
printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n",
|
|
crc3, crc1, crc2);
|
|
}
|
|
printf("\nAll test complete. No failures expected.\n");
|
|
return 0;
|
|
}
|
|
|
|
#endif /* UNITTEST */
|