mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-22 14:55:02 +07:00
e7cdb60fd2
Pull zstd support from Chris Mason: "Nick Terrell's patch series to add zstd support to the kernel has been floating around for a while. After talking with Dave Sterba, Herbert and Phillip, we decided to send the whole thing in as one pull request. zstd is a big win in speed over zlib and in compression ratio over lzo, and the compression team here at FB has gotten great results using it in production. Nick will continue to update the kernel side with new improvements from the open source zstd userland code. Nick has a number of benchmarks for the main zstd code in his lib/zstd commit: I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using `silesia.tar` [3], which is 211,988,480 B large. Run the following commands for the benchmark: sudo modprobe zstd_compress_test sudo mknod zstd_compress_test c 245 0 sudo cp silesia.tar zstd_compress_test The time is reported by the time of the userland `cp`. The MB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Adjusted MB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). The memory reported is the amount of memory the compressor requests. | Method | Size (B) | Time (s) | Ratio | MB/s | Adj MB/s | Mem (MB) | |----------|----------|----------|-------|---------|----------|----------| | none | 11988480 | 0.100 | 1 | 2119.88 | - | - | | zstd -1 | 73645762 | 1.044 | 2.878 | 203.05 | 224.56 | 1.23 | | zstd -3 | 66988878 | 1.761 | 3.165 | 120.38 | 127.63 | 2.47 | | zstd -5 | 65001259 | 2.563 | 3.261 | 82.71 | 86.07 | 2.86 | | zstd -10 | 60165346 | 13.242 | 3.523 | 16.01 | 16.13 | 13.22 | | zstd -15 | 58009756 | 47.601 | 3.654 | 4.45 | 4.46 | 21.61 | | zstd -19 | 54014593 | 102.835 | 3.925 | 2.06 | 2.06 | 60.15 | | zlib -1 | 77260026 | 2.895 | 2.744 | 73.23 | 75.85 | 0.27 | | zlib -3 | 72972206 | 4.116 | 2.905 | 51.50 | 52.79 | 0.27 | | zlib -6 | 68190360 | 9.633 | 3.109 | 22.01 | 22.24 | 0.27 | | zlib -9 | 67613382 | 22.554 | 3.135 | 9.40 | 9.44 | 0.27 | I benchmarked zstd decompression using the same method on the same machine. The benchmark file is located in the upstream zstd repo under `contrib/linux-kernel/zstd_decompress_test.c` [4]. The memory reported is the amount of memory required to decompress data compressed with the given compression level. If you know the maximum size of your input, you can reduce the memory usage of decompression irrespective of the compression level. | Method | Time (s) | MB/s | Adjusted MB/s | Memory (MB) | |----------|----------|---------|---------------|-------------| | none | 0.025 | 8479.54 | - | - | | zstd -1 | 0.358 | 592.15 | 636.60 | 0.84 | | zstd -3 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -5 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -10 | 0.374 | 566.81 | 607.42 | 2.51 | | zstd -15 | 0.379 | 559.34 | 598.84 | 4.61 | | zstd -19 | 0.412 | 514.54 | 547.77 | 8.80 | | zlib -1 | 0.940 | 225.52 | 231.68 | 0.04 | | zlib -3 | 0.883 | 240.08 | 247.07 | 0.04 | | zlib -6 | 0.844 | 251.17 | 258.84 | 0.04 | | zlib -9 | 0.837 | 253.27 | 287.64 | 0.04 | I ran a long series of tests and benchmarks on the btrfs side and the gains are very similar to the core benchmarks Nick ran" * 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: squashfs: Add zstd support btrfs: Add zstd support lib: Add zstd modules lib: Add xxhash module
590 lines
13 KiB
Plaintext
590 lines
13 KiB
Plaintext
#
|
|
# Library configuration
|
|
#
|
|
|
|
config BINARY_PRINTF
|
|
def_bool n
|
|
|
|
menu "Library routines"
|
|
|
|
config RAID6_PQ
|
|
tristate
|
|
|
|
config BITREVERSE
|
|
tristate
|
|
|
|
config HAVE_ARCH_BITREVERSE
|
|
bool
|
|
default n
|
|
depends on BITREVERSE
|
|
help
|
|
This option enables the use of hardware bit-reversal instructions on
|
|
architectures which support such operations.
|
|
|
|
config RATIONAL
|
|
bool
|
|
|
|
config GENERIC_STRNCPY_FROM_USER
|
|
bool
|
|
|
|
config GENERIC_STRNLEN_USER
|
|
bool
|
|
|
|
config GENERIC_NET_UTILS
|
|
bool
|
|
|
|
config GENERIC_FIND_FIRST_BIT
|
|
bool
|
|
|
|
config NO_GENERIC_PCI_IOPORT_MAP
|
|
bool
|
|
|
|
config GENERIC_PCI_IOMAP
|
|
bool
|
|
|
|
config GENERIC_IOMAP
|
|
bool
|
|
select GENERIC_PCI_IOMAP
|
|
|
|
config GENERIC_IO
|
|
bool
|
|
default n
|
|
|
|
config STMP_DEVICE
|
|
bool
|
|
|
|
config ARCH_USE_CMPXCHG_LOCKREF
|
|
bool
|
|
|
|
config ARCH_HAS_FAST_MULTIPLIER
|
|
bool
|
|
|
|
config CRC_CCITT
|
|
tristate "CRC-CCITT functions"
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC-CCITT functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC-CCITT
|
|
functions require M here.
|
|
|
|
config CRC16
|
|
tristate "CRC16 functions"
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC16 functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC16
|
|
functions require M here.
|
|
|
|
config CRC_T10DIF
|
|
tristate "CRC calculation for the T10 Data Integrity Field"
|
|
select CRYPTO
|
|
select CRYPTO_CRCT10DIF
|
|
help
|
|
This option is only needed if a module that's not in the
|
|
kernel tree needs to calculate CRC checks for use with the
|
|
SCSI data integrity subsystem.
|
|
|
|
config CRC_ITU_T
|
|
tristate "CRC ITU-T V.41 functions"
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC ITU-T V.41 functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC ITU-T V.41
|
|
functions require M here.
|
|
|
|
config CRC32
|
|
tristate "CRC32/CRC32c functions"
|
|
default y
|
|
select BITREVERSE
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC32/CRC32c functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC32/CRC32c
|
|
functions require M here.
|
|
|
|
config CRC32_SELFTEST
|
|
tristate "CRC32 perform self test on init"
|
|
depends on CRC32
|
|
help
|
|
This option enables the CRC32 library functions to perform a
|
|
self test on initialization. The self test computes crc32_le
|
|
and crc32_be over byte strings with random alignment and length
|
|
and computes the total elapsed time and number of bytes processed.
|
|
|
|
choice
|
|
prompt "CRC32 implementation"
|
|
depends on CRC32
|
|
default CRC32_SLICEBY8
|
|
help
|
|
This option allows a kernel builder to override the default choice
|
|
of CRC32 algorithm. Choose the default ("slice by 8") unless you
|
|
know that you need one of the others.
|
|
|
|
config CRC32_SLICEBY8
|
|
bool "Slice by 8 bytes"
|
|
help
|
|
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
|
|
This is the fastest algorithm, but comes with a 8KiB lookup table.
|
|
Most modern processors have enough cache to hold this table without
|
|
thrashing the cache.
|
|
|
|
This is the default implementation choice. Choose this one unless
|
|
you have a good reason not to.
|
|
|
|
config CRC32_SLICEBY4
|
|
bool "Slice by 4 bytes"
|
|
help
|
|
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
|
|
This is a bit slower than slice by 8, but has a smaller 4KiB lookup
|
|
table.
|
|
|
|
Only choose this option if you know what you are doing.
|
|
|
|
config CRC32_SARWATE
|
|
bool "Sarwate's Algorithm (one byte at a time)"
|
|
help
|
|
Calculate checksum a byte at a time using Sarwate's algorithm. This
|
|
is not particularly fast, but has a small 256 byte lookup table.
|
|
|
|
Only choose this option if you know what you are doing.
|
|
|
|
config CRC32_BIT
|
|
bool "Classic Algorithm (one bit at a time)"
|
|
help
|
|
Calculate checksum one bit at a time. This is VERY slow, but has
|
|
no lookup table. This is provided as a debugging option.
|
|
|
|
Only choose this option if you are debugging crc32.
|
|
|
|
endchoice
|
|
|
|
config CRC4
|
|
tristate "CRC4 functions"
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC4 functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC4
|
|
functions require M here.
|
|
|
|
config CRC7
|
|
tristate "CRC7 functions"
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC7 functions, but a module built outside
|
|
the kernel tree does. Such modules that use library CRC7
|
|
functions require M here.
|
|
|
|
config LIBCRC32C
|
|
tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
|
|
select CRYPTO
|
|
select CRYPTO_CRC32C
|
|
help
|
|
This option is provided for the case where no in-kernel-tree
|
|
modules require CRC32c functions, but a module built outside the
|
|
kernel tree does. Such modules that use library CRC32c functions
|
|
require M here. See Castagnoli93.
|
|
Module will be libcrc32c.
|
|
|
|
config CRC8
|
|
tristate "CRC8 function"
|
|
help
|
|
This option provides CRC8 function. Drivers may select this
|
|
when they need to do cyclic redundancy check according CRC8
|
|
algorithm. Module will be called crc8.
|
|
|
|
config XXHASH
|
|
tristate
|
|
|
|
config AUDIT_GENERIC
|
|
bool
|
|
depends on AUDIT && !AUDIT_ARCH
|
|
default y
|
|
|
|
config AUDIT_ARCH_COMPAT_GENERIC
|
|
bool
|
|
default n
|
|
|
|
config AUDIT_COMPAT_GENERIC
|
|
bool
|
|
depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT
|
|
default y
|
|
|
|
config RANDOM32_SELFTEST
|
|
bool "PRNG perform self test on init"
|
|
default n
|
|
help
|
|
This option enables the 32 bit PRNG library functions to perform a
|
|
self test on initialization.
|
|
|
|
#
|
|
# compression support is select'ed if needed
|
|
#
|
|
config 842_COMPRESS
|
|
select CRC32
|
|
tristate
|
|
|
|
config 842_DECOMPRESS
|
|
select CRC32
|
|
tristate
|
|
|
|
config ZLIB_INFLATE
|
|
tristate
|
|
|
|
config ZLIB_DEFLATE
|
|
tristate
|
|
select BITREVERSE
|
|
|
|
config LZO_COMPRESS
|
|
tristate
|
|
|
|
config LZO_DECOMPRESS
|
|
tristate
|
|
|
|
config LZ4_COMPRESS
|
|
tristate
|
|
|
|
config LZ4HC_COMPRESS
|
|
tristate
|
|
|
|
config LZ4_DECOMPRESS
|
|
tristate
|
|
|
|
config ZSTD_COMPRESS
|
|
select XXHASH
|
|
tristate
|
|
|
|
config ZSTD_DECOMPRESS
|
|
select XXHASH
|
|
tristate
|
|
|
|
source "lib/xz/Kconfig"
|
|
|
|
#
|
|
# These all provide a common interface (hence the apparent duplication with
|
|
# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
|
|
#
|
|
config DECOMPRESS_GZIP
|
|
select ZLIB_INFLATE
|
|
tristate
|
|
|
|
config DECOMPRESS_BZIP2
|
|
tristate
|
|
|
|
config DECOMPRESS_LZMA
|
|
tristate
|
|
|
|
config DECOMPRESS_XZ
|
|
select XZ_DEC
|
|
tristate
|
|
|
|
config DECOMPRESS_LZO
|
|
select LZO_DECOMPRESS
|
|
tristate
|
|
|
|
config DECOMPRESS_LZ4
|
|
select LZ4_DECOMPRESS
|
|
tristate
|
|
|
|
#
|
|
# Generic allocator support is selected if needed
|
|
#
|
|
config GENERIC_ALLOCATOR
|
|
bool
|
|
|
|
#
|
|
# reed solomon support is select'ed if needed
|
|
#
|
|
config REED_SOLOMON
|
|
tristate
|
|
|
|
config REED_SOLOMON_ENC8
|
|
bool
|
|
|
|
config REED_SOLOMON_DEC8
|
|
bool
|
|
|
|
config REED_SOLOMON_ENC16
|
|
bool
|
|
|
|
config REED_SOLOMON_DEC16
|
|
bool
|
|
|
|
#
|
|
# BCH support is selected if needed
|
|
#
|
|
config BCH
|
|
tristate
|
|
|
|
config BCH_CONST_PARAMS
|
|
bool
|
|
help
|
|
Drivers may select this option to force specific constant
|
|
values for parameters 'm' (Galois field order) and 't'
|
|
(error correction capability). Those specific values must
|
|
be set by declaring default values for symbols BCH_CONST_M
|
|
and BCH_CONST_T.
|
|
Doing so will enable extra compiler optimizations,
|
|
improving encoding and decoding performance up to 2x for
|
|
usual (m,t) values (typically such that m*t < 200).
|
|
When this option is selected, the BCH library supports
|
|
only a single (m,t) configuration. This is mainly useful
|
|
for NAND flash board drivers requiring known, fixed BCH
|
|
parameters.
|
|
|
|
config BCH_CONST_M
|
|
int
|
|
range 5 15
|
|
help
|
|
Constant value for Galois field order 'm'. If 'k' is the
|
|
number of data bits to protect, 'm' should be chosen such
|
|
that (k + m*t) <= 2**m - 1.
|
|
Drivers should declare a default value for this symbol if
|
|
they select option BCH_CONST_PARAMS.
|
|
|
|
config BCH_CONST_T
|
|
int
|
|
help
|
|
Constant value for error correction capability in bits 't'.
|
|
Drivers should declare a default value for this symbol if
|
|
they select option BCH_CONST_PARAMS.
|
|
|
|
#
|
|
# Textsearch support is select'ed if needed
|
|
#
|
|
config TEXTSEARCH
|
|
bool
|
|
|
|
config TEXTSEARCH_KMP
|
|
tristate
|
|
|
|
config TEXTSEARCH_BM
|
|
tristate
|
|
|
|
config TEXTSEARCH_FSM
|
|
tristate
|
|
|
|
config BTREE
|
|
bool
|
|
|
|
config INTERVAL_TREE
|
|
bool
|
|
help
|
|
Simple, embeddable, interval-tree. Can find the start of an
|
|
overlapping range in log(n) time and then iterate over all
|
|
overlapping nodes. The algorithm is implemented as an
|
|
augmented rbtree.
|
|
|
|
See:
|
|
|
|
Documentation/rbtree.txt
|
|
|
|
for more information.
|
|
|
|
config RADIX_TREE_MULTIORDER
|
|
bool
|
|
|
|
config ASSOCIATIVE_ARRAY
|
|
bool
|
|
help
|
|
Generic associative array. Can be searched and iterated over whilst
|
|
it is being modified. It is also reasonably quick to search and
|
|
modify. The algorithms are non-recursive, and the trees are highly
|
|
capacious.
|
|
|
|
See:
|
|
|
|
Documentation/assoc_array.txt
|
|
|
|
for more information.
|
|
|
|
config HAS_IOMEM
|
|
bool
|
|
depends on !NO_IOMEM
|
|
select GENERIC_IO
|
|
default y
|
|
|
|
config HAS_IOPORT_MAP
|
|
bool
|
|
depends on HAS_IOMEM && !NO_IOPORT_MAP
|
|
default y
|
|
|
|
config HAS_DMA
|
|
bool
|
|
depends on !NO_DMA
|
|
default y
|
|
|
|
config DMA_NOOP_OPS
|
|
bool
|
|
depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
|
|
default n
|
|
|
|
config DMA_VIRT_OPS
|
|
bool
|
|
depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
|
|
default n
|
|
|
|
config CHECK_SIGNATURE
|
|
bool
|
|
|
|
config CPUMASK_OFFSTACK
|
|
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
|
|
help
|
|
Use dynamic allocation for cpumask_var_t, instead of putting
|
|
them on the stack. This is a bit more expensive, but avoids
|
|
stack overflow.
|
|
|
|
config CPU_RMAP
|
|
bool
|
|
depends on SMP
|
|
|
|
config DQL
|
|
bool
|
|
|
|
config GLOB
|
|
bool
|
|
# This actually supports modular compilation, but the module overhead
|
|
# is ridiculous for the amount of code involved. Until an out-of-tree
|
|
# driver asks for it, we'll just link it directly it into the kernel
|
|
# when required. Since we're ignoring out-of-tree users, there's also
|
|
# no need bother prompting for a manual decision:
|
|
# prompt "glob_match() function"
|
|
help
|
|
This option provides a glob_match function for performing
|
|
simple text pattern matching. It originated in the ATA code
|
|
to blacklist particular drive models, but other device drivers
|
|
may need similar functionality.
|
|
|
|
All drivers in the Linux kernel tree that require this function
|
|
should automatically select this option. Say N unless you
|
|
are compiling an out-of tree driver which tells you that it
|
|
depends on this.
|
|
|
|
config GLOB_SELFTEST
|
|
tristate "glob self-test on init"
|
|
depends on GLOB
|
|
help
|
|
This option enables a simple self-test of the glob_match
|
|
function on startup. It is primarily useful for people
|
|
working on the code to ensure they haven't introduced any
|
|
regressions.
|
|
|
|
It only adds a little bit of code and slows kernel boot (or
|
|
module load) by a small amount, so you're welcome to play with
|
|
it, but you probably don't need it.
|
|
|
|
#
|
|
# Netlink attribute parsing support is select'ed if needed
|
|
#
|
|
config NLATTR
|
|
bool
|
|
|
|
#
|
|
# Generic 64-bit atomic support is selected if needed
|
|
#
|
|
config GENERIC_ATOMIC64
|
|
bool
|
|
|
|
config LRU_CACHE
|
|
tristate
|
|
|
|
config CLZ_TAB
|
|
bool
|
|
|
|
config CORDIC
|
|
tristate "CORDIC algorithm"
|
|
help
|
|
This option provides an implementation of the CORDIC algorithm;
|
|
calculations are in fixed point. Module will be called cordic.
|
|
|
|
config DDR
|
|
bool "JEDEC DDR data"
|
|
help
|
|
Data from JEDEC specs for DDR SDRAM memories,
|
|
particularly the AC timing parameters and addressing
|
|
information. This data is useful for drivers handling
|
|
DDR SDRAM controllers.
|
|
|
|
config IRQ_POLL
|
|
bool "IRQ polling library"
|
|
help
|
|
Helper library to poll interrupt mitigation using polling.
|
|
|
|
config MPILIB
|
|
tristate
|
|
select CLZ_TAB
|
|
help
|
|
Multiprecision maths library from GnuPG.
|
|
It is used to implement RSA digital signature verification,
|
|
which is used by IMA/EVM digital signature extension.
|
|
|
|
config SIGNATURE
|
|
tristate
|
|
depends on KEYS
|
|
select CRYPTO
|
|
select CRYPTO_SHA1
|
|
select MPILIB
|
|
help
|
|
Digital signature verification. Currently only RSA is supported.
|
|
Implementation is done using GnuPG MPI library
|
|
|
|
#
|
|
# libfdt files, only selected if needed.
|
|
#
|
|
config LIBFDT
|
|
bool
|
|
|
|
config OID_REGISTRY
|
|
tristate
|
|
help
|
|
Enable fast lookup object identifier registry.
|
|
|
|
config UCS2_STRING
|
|
tristate
|
|
|
|
source "lib/fonts/Kconfig"
|
|
|
|
config SG_SPLIT
|
|
def_bool n
|
|
help
|
|
Provides a helper to split scatterlists into chunks, each chunk being
|
|
a scatterlist. This should be selected by a driver or an API which
|
|
whishes to split a scatterlist amongst multiple DMA channels.
|
|
|
|
config SG_POOL
|
|
def_bool n
|
|
help
|
|
Provides a helper to allocate chained scatterlists. This should be
|
|
selected by a driver or an API which whishes to allocate chained
|
|
scatterlist.
|
|
|
|
#
|
|
# sg chaining option
|
|
#
|
|
|
|
config ARCH_HAS_SG_CHAIN
|
|
def_bool n
|
|
|
|
config ARCH_HAS_PMEM_API
|
|
bool
|
|
|
|
config ARCH_HAS_UACCESS_FLUSHCACHE
|
|
bool
|
|
|
|
config STACKDEPOT
|
|
bool
|
|
select STACKTRACE
|
|
|
|
config SBITMAP
|
|
bool
|
|
|
|
config PARMAN
|
|
tristate "parman" if COMPILE_TEST
|
|
|
|
config PRIME_NUMBERS
|
|
tristate
|
|
|
|
config STRING_SELFTEST
|
|
bool "Test string functions"
|
|
|
|
endmenu
|