mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-20 16:57:26 +07:00
5ef3a8b125
Add ZSTD to the list of supported compression algorithms. ZRAM fio perf test: LZO DEFLATE ZSTD #jobs1 WRITE: (2180MB/s) (77.2MB/s) (1429MB/s) WRITE: (1617MB/s) (77.7MB/s) (1202MB/s) READ: (426MB/s) (595MB/s) (1181MB/s) READ: (422MB/s) (572MB/s) (1020MB/s) READ: (318MB/s) (67.8MB/s) (563MB/s) WRITE: (318MB/s) (67.9MB/s) (564MB/s) READ: (336MB/s) (68.3MB/s) (583MB/s) WRITE: (335MB/s) (68.2MB/s) (582MB/s) #jobs2 WRITE: (3441MB/s) (152MB/s) (2141MB/s) WRITE: (2507MB/s) (147MB/s) (1888MB/s) READ: (801MB/s) (1146MB/s) (1890MB/s) READ: (767MB/s) (1096MB/s) (2073MB/s) READ: (621MB/s) (126MB/s) (1009MB/s) WRITE: (621MB/s) (126MB/s) (1009MB/s) READ: (656MB/s) (125MB/s) (1075MB/s) WRITE: (657MB/s) (126MB/s) (1077MB/s) #jobs3 WRITE: (4772MB/s) (225MB/s) (3394MB/s) WRITE: (3905MB/s) (211MB/s) (2939MB/s) READ: (1216MB/s) (1608MB/s) (3218MB/s) READ: (1159MB/s) (1431MB/s) (2981MB/s) READ: (906MB/s) (156MB/s) (1457MB/s) WRITE: (907MB/s) (156MB/s) (1458MB/s) READ: (953MB/s) (158MB/s) (1595MB/s) WRITE: (952MB/s) (157MB/s) (1593MB/s) #jobs4 WRITE: (6036MB/s) (265MB/s) (4469MB/s) WRITE: (5059MB/s) (263MB/s) (3951MB/s) READ: (1618MB/s) (2066MB/s) (4276MB/s) READ: (1573MB/s) (1942MB/s) (3830MB/s) READ: (1202MB/s) (227MB/s) (1971MB/s) WRITE: (1200MB/s) (227MB/s) (1968MB/s) READ: (1265MB/s) (226MB/s) (2116MB/s) WRITE: (1264MB/s) (226MB/s) (2114MB/s) #jobs5 WRITE: (5339MB/s) (233MB/s) (3781MB/s) WRITE: (4298MB/s) (234MB/s) (3276MB/s) READ: (1626MB/s) (2048MB/s) (4081MB/s) READ: (1567MB/s) (1929MB/s) (3758MB/s) READ: (1174MB/s) (205MB/s) (1747MB/s) WRITE: (1173MB/s) (204MB/s) (1746MB/s) READ: (1214MB/s) (208MB/s) (1890MB/s) WRITE: (1215MB/s) (208MB/s) (1892MB/s) #jobs6 WRITE: (5666MB/s) (270MB/s) (4338MB/s) WRITE: (4828MB/s) (267MB/s) (3772MB/s) READ: (1803MB/s) (2058MB/s) (4946MB/s) READ: (1805MB/s) (2156MB/s) (4711MB/s) READ: (1334MB/s) (235MB/s) (2135MB/s) WRITE: (1335MB/s) (235MB/s) (2137MB/s) READ: (1364MB/s) (236MB/s) (2268MB/s) WRITE: (1365MB/s) (237MB/s) (2270MB/s) #jobs7 WRITE: (5474MB/s) (270MB/s) (4300MB/s) WRITE: (4666MB/s) (266MB/s) (3817MB/s) READ: (2022MB/s) (2319MB/s) (5472MB/s) READ: (1924MB/s) (2260MB/s) (5031MB/s) READ: (1369MB/s) (242MB/s) (2153MB/s) WRITE: (1370MB/s) (242MB/s) (2155MB/s) READ: (1499MB/s) (246MB/s) (2310MB/s) WRITE: (1497MB/s) (246MB/s) (2307MB/s) #jobs8 WRITE: (5558MB/s) (273MB/s) (4439MB/s) WRITE: (4763MB/s) (271MB/s) (3918MB/s) READ: (2201MB/s) (2599MB/s) (6062MB/s) READ: (2105MB/s) (2463MB/s) (5413MB/s) READ: (1490MB/s) (252MB/s) (2238MB/s) WRITE: (1488MB/s) (252MB/s) (2236MB/s) READ: (1566MB/s) (254MB/s) (2434MB/s) WRITE: (1568MB/s) (254MB/s) (2437MB/s) #jobs9 WRITE: (5120MB/s) (264MB/s) (4035MB/s) WRITE: (4531MB/s) (267MB/s) (3740MB/s) READ: (1940MB/s) (2258MB/s) (4986MB/s) READ: (2024MB/s) (2387MB/s) (4871MB/s) READ: (1343MB/s) (246MB/s) (2038MB/s) WRITE: (1342MB/s) (246MB/s) (2037MB/s) READ: (1553MB/s) (238MB/s) (2243MB/s) WRITE: (1552MB/s) (238MB/s) (2242MB/s) #jobs10 WRITE: (5345MB/s) (271MB/s) (3988MB/s) WRITE: (4750MB/s) (254MB/s) (3668MB/s) READ: (1876MB/s) (2363MB/s) (5150MB/s) READ: (1990MB/s) (2256MB/s) (5080MB/s) READ: (1355MB/s) (250MB/s) (2019MB/s) WRITE: (1356MB/s) (251MB/s) (2020MB/s) READ: (1490MB/s) (252MB/s) (2202MB/s) WRITE: (1488MB/s) (252MB/s) (2199MB/s) jobs1 perfstat instructions 52,065,555,710 ( 0.79) 855,731,114,587 ( 2.64) 54,280,709,944 ( 1.40) branches 14,020,427,116 ( 725.847) 101,733,449,582 (1074.521) 11,170,591,067 ( 992.869) branch-misses 22,626,174 ( 0.16%) 274,197,885 ( 0.27%) 25,915,805 ( 0.23%) jobs2 perfstat instructions 103,633,110,402 ( 0.75) 1,710,822,100,914 ( 2.59) 107,879,874,104 ( 1.28) branches 27,931,237,282 ( 679.203) 203,298,267,479 (1037.326) 22,185,350,842 ( 884.427) branch-misses 46,103,811 ( 0.17%) 533,747,204 ( 0.26%) 49,682,483 ( 0.22%) jobs3 perfstat instructions 154,857,283,657 ( 0.76) 2,565,748,974,197 ( 2.57) 161,515,435,813 ( 1.31) branches 41,759,490,355 ( 670.529) 304,905,605,277 ( 978.765) 33,215,805,907 ( 888.003) branch-misses 74,263,293 ( 0.18%) 759,746,240 ( 0.25%) 76,841,196 ( 0.23%) jobs4 perfstat instructions 206,215,849,076 ( 0.75) 3,420,169,460,897 ( 2.60) 215,003,061,664 ( 1.31) branches 55,632,141,739 ( 666.501) 406,394,977,433 ( 927.241) 44,214,322,251 ( 883.532) branch-misses 102,287,788 ( 0.18%) 1,098,617,314 ( 0.27%) 103,891,040 ( 0.23%) jobs5 perfstat instructions 258,711,315,588 ( 0.67) 4,275,657,533,244 ( 2.23) 269,332,235,685 ( 1.08) branches 69,802,821,166 ( 588.823) 507,996,211,252 ( 797.036) 55,450,846,129 ( 735.095) branch-misses 129,217,214 ( 0.19%) 1,243,284,991 ( 0.24%) 173,512,278 ( 0.31%) jobs6 perfstat instructions 312,796,166,008 ( 0.61) 5,133,896,344,660 ( 2.02) 323,658,769,588 ( 1.04) branches 84,372,488,583 ( 520.541) 610,310,494,402 ( 697.642) 66,683,292,992 ( 693.939) branch-misses 159,438,978 ( 0.19%) 1,396,368,563 ( 0.23%) 174,406,934 ( 0.26%) jobs7 perfstat instructions 363,211,372,930 ( 0.56) 5,988,205,600,879 ( 1.75) 377,824,674,156 ( 0.93) branches 98,057,013,765 ( 463.117) 711,841,255,974 ( 598.762) 77,879,009,954 ( 600.443) branch-misses 199,513,153 ( 0.20%) 1,507,651,077 ( 0.21%) 248,203,369 ( 0.32%) jobs8 perfstat instructions 413,960,354,615 ( 0.52) 6,842,918,558,378 ( 1.45) 431,938,486,581 ( 0.83) branches 111,812,574,884 ( 414.224) 813,299,084,518 ( 491.173) 89,062,699,827 ( 517.795) branch-misses 233,584,845 ( 0.21%) 1,531,593,921 ( 0.19%) 286,818,489 ( 0.32%) jobs9 perfstat instructions 465,976,220,300 ( 0.53) 7,698,467,237,372 ( 1.47) 486,352,600,321 ( 0.84) branches 125,931,456,162 ( 424.063) 915,207,005,715 ( 498.192) 100,370,404,090 ( 517.439) branch-misses 256,992,445 ( 0.20%) 1,782,809,816 ( 0.19%) 345,239,380 ( 0.34%) jobs10 perfstat instructions 517,406,372,715 ( 0.53) 8,553,527,312,900 ( 1.48) 540,732,653,094 ( 0.84) branches 139,839,780,676 ( 427.732) 1,016,737,699,389 ( 503.172) 111,696,557,638 ( 516.750) branch-misses 259,595,561 ( 0.19%) 1,952,570,279 ( 0.19%) 357,818,661 ( 0.32%) seconds elapsed 20.630411534 96.084546565 12.743373571 seconds elapsed 22.292627625 100.984155001 14.407413560 seconds elapsed 22.396016966 110.344880848 14.032201392 seconds elapsed 22.517330949 113.351459170 14.243074935 seconds elapsed 28.548305104 156.515193765 19.159286861 seconds elapsed 30.453538116 164.559937678 19.362492717 seconds elapsed 33.467108086 188.486827481 21.492612173 seconds elapsed 35.617727591 209.602677783 23.256422492 seconds elapsed 42.584239509 243.959902566 28.458540338 seconds elapsed 47.683632526 269.635248851 31.542404137 Over all, ZSTD has slower WRITE, but much faster READ (perhaps a static compression buffer used during the test helped ZSTD a lot), which results in faster test results. Memory consumption (zram mm_stat file): zram LZO mm_stat mm_stat (jobs1): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs2): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs3): 2147483648 23068672 33558528 0 33562624 0 0 mm_stat (jobs4): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs5): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs6): 2147483648 23068672 33558528 0 33562624 0 0 mm_stat (jobs7): 2147483648 23068672 33558528 0 33566720 0 0 mm_stat (jobs8): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs9): 2147483648 23068672 33558528 0 33558528 0 0 mm_stat (jobs10): 2147483648 23068672 33558528 0 33562624 0 0 zram DEFLATE mm_stat mm_stat (jobs1): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs2): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs3): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs4): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs5): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs6): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs7): 2147483648 16252928 25178112 0 25190400 0 0 mm_stat (jobs8): 2147483648 16252928 25178112 0 25190400 0 0 mm_stat (jobs9): 2147483648 16252928 25178112 0 25178112 0 0 mm_stat (jobs10): 2147483648 16252928 25178112 0 25178112 0 0 zram ZSTD mm_stat mm_stat (jobs1): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs2): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs3): 2147483648 11010048 16781312 0 16785408 0 0 mm_stat (jobs4): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs5): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs6): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs7): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs8): 2147483648 11010048 16781312 0 16781312 0 0 mm_stat (jobs9): 2147483648 11010048 16781312 0 16785408 0 0 mm_stat (jobs10): 2147483648 11010048 16781312 0 16781312 0 0 ================================================================================== Official benchmarks [1]: Compressor name Ratio Compression Decompress. zstd 1.1.3 -1 2.877 430 MB/s 1110 MB/s zlib 1.2.8 -1 2.743 110 MB/s 400 MB/s brotli 0.5.2 -0 2.708 400 MB/s 430 MB/s quicklz 1.5.0 -1 2.238 550 MB/s 710 MB/s lzo1x 2.09 -1 2.108 650 MB/s 830 MB/s lz4 1.7.5 2.101 720 MB/s 3600 MB/s snappy 1.1.3 2.091 500 MB/s 1650 MB/s lzf 3.6 -1 2.077 400 MB/s 860 MB/s Minchan said: : I did test with my sample data and compared zstd with deflate. zstd's : compress ratio is lower a little bit but compression speed is much faster : 3 times more and decompress speed is too 2 times more. With different : data, it is different but overall, zstd would be better for speed at the : cost of a little lower compress ratio(about 5%) so I believe it's worth to : replace deflate. [1] https://github.com/facebook/zstd Link: http://lkml.kernel.org/r/20170912050005.3247-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Acked-by: Minchan Kim <minchan@kernel.org> Tested-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
246 lines
5.9 KiB
C
246 lines
5.9 KiB
C
/*
|
|
* Copyright (C) 2014 Sergey Senozhatsky.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/err.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/crypto.h>
|
|
|
|
#include "zcomp.h"
|
|
|
|
static const char * const backends[] = {
|
|
"lzo",
|
|
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
|
|
"lz4",
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_CRYPTO_DEFLATE)
|
|
"deflate",
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
|
|
"lz4hc",
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_CRYPTO_842)
|
|
"842",
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
|
|
"zstd",
|
|
#endif
|
|
NULL
|
|
};
|
|
|
|
static void zcomp_strm_free(struct zcomp_strm *zstrm)
|
|
{
|
|
if (!IS_ERR_OR_NULL(zstrm->tfm))
|
|
crypto_free_comp(zstrm->tfm);
|
|
free_pages((unsigned long)zstrm->buffer, 1);
|
|
kfree(zstrm);
|
|
}
|
|
|
|
/*
|
|
* allocate new zcomp_strm structure with ->tfm initialized by
|
|
* backend, return NULL on error
|
|
*/
|
|
static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
|
|
{
|
|
struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
|
|
if (!zstrm)
|
|
return NULL;
|
|
|
|
zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0);
|
|
/*
|
|
* allocate 2 pages. 1 for compressed data, plus 1 extra for the
|
|
* case when compressed size is larger than the original one
|
|
*/
|
|
zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
|
|
if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
|
|
zcomp_strm_free(zstrm);
|
|
zstrm = NULL;
|
|
}
|
|
return zstrm;
|
|
}
|
|
|
|
bool zcomp_available_algorithm(const char *comp)
|
|
{
|
|
int i;
|
|
|
|
i = __sysfs_match_string(backends, -1, comp);
|
|
if (i >= 0)
|
|
return true;
|
|
|
|
/*
|
|
* Crypto does not ignore a trailing new line symbol,
|
|
* so make sure you don't supply a string containing
|
|
* one.
|
|
* This also means that we permit zcomp initialisation
|
|
* with any compressing algorithm known to crypto api.
|
|
*/
|
|
return crypto_has_comp(comp, 0, 0) == 1;
|
|
}
|
|
|
|
/* show available compressors */
|
|
ssize_t zcomp_available_show(const char *comp, char *buf)
|
|
{
|
|
bool known_algorithm = false;
|
|
ssize_t sz = 0;
|
|
int i = 0;
|
|
|
|
for (; backends[i]; i++) {
|
|
if (!strcmp(comp, backends[i])) {
|
|
known_algorithm = true;
|
|
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
|
|
"[%s] ", backends[i]);
|
|
} else {
|
|
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
|
|
"%s ", backends[i]);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Out-of-tree module known to crypto api or a missing
|
|
* entry in `backends'.
|
|
*/
|
|
if (!known_algorithm && crypto_has_comp(comp, 0, 0) == 1)
|
|
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
|
|
"[%s] ", comp);
|
|
|
|
sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
|
|
return sz;
|
|
}
|
|
|
|
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp)
|
|
{
|
|
return *get_cpu_ptr(comp->stream);
|
|
}
|
|
|
|
void zcomp_stream_put(struct zcomp *comp)
|
|
{
|
|
put_cpu_ptr(comp->stream);
|
|
}
|
|
|
|
int zcomp_compress(struct zcomp_strm *zstrm,
|
|
const void *src, unsigned int *dst_len)
|
|
{
|
|
/*
|
|
* Our dst memory (zstrm->buffer) is always `2 * PAGE_SIZE' sized
|
|
* because sometimes we can endup having a bigger compressed data
|
|
* due to various reasons: for example compression algorithms tend
|
|
* to add some padding to the compressed buffer. Speaking of padding,
|
|
* comp algorithm `842' pads the compressed length to multiple of 8
|
|
* and returns -ENOSP when the dst memory is not big enough, which
|
|
* is not something that ZRAM wants to see. We can handle the
|
|
* `compressed_size > PAGE_SIZE' case easily in ZRAM, but when we
|
|
* receive -ERRNO from the compressing backend we can't help it
|
|
* anymore. To make `842' happy we need to tell the exact size of
|
|
* the dst buffer, zram_drv will take care of the fact that
|
|
* compressed buffer is too big.
|
|
*/
|
|
*dst_len = PAGE_SIZE * 2;
|
|
|
|
return crypto_comp_compress(zstrm->tfm,
|
|
src, PAGE_SIZE,
|
|
zstrm->buffer, dst_len);
|
|
}
|
|
|
|
int zcomp_decompress(struct zcomp_strm *zstrm,
|
|
const void *src, unsigned int src_len, void *dst)
|
|
{
|
|
unsigned int dst_len = PAGE_SIZE;
|
|
|
|
return crypto_comp_decompress(zstrm->tfm,
|
|
src, src_len,
|
|
dst, &dst_len);
|
|
}
|
|
|
|
int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
|
|
{
|
|
struct zcomp *comp = hlist_entry(node, struct zcomp, node);
|
|
struct zcomp_strm *zstrm;
|
|
|
|
if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
|
|
return 0;
|
|
|
|
zstrm = zcomp_strm_alloc(comp);
|
|
if (IS_ERR_OR_NULL(zstrm)) {
|
|
pr_err("Can't allocate a compression stream\n");
|
|
return -ENOMEM;
|
|
}
|
|
*per_cpu_ptr(comp->stream, cpu) = zstrm;
|
|
return 0;
|
|
}
|
|
|
|
int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node)
|
|
{
|
|
struct zcomp *comp = hlist_entry(node, struct zcomp, node);
|
|
struct zcomp_strm *zstrm;
|
|
|
|
zstrm = *per_cpu_ptr(comp->stream, cpu);
|
|
if (!IS_ERR_OR_NULL(zstrm))
|
|
zcomp_strm_free(zstrm);
|
|
*per_cpu_ptr(comp->stream, cpu) = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static int zcomp_init(struct zcomp *comp)
|
|
{
|
|
int ret;
|
|
|
|
comp->stream = alloc_percpu(struct zcomp_strm *);
|
|
if (!comp->stream)
|
|
return -ENOMEM;
|
|
|
|
ret = cpuhp_state_add_instance(CPUHP_ZCOMP_PREPARE, &comp->node);
|
|
if (ret < 0)
|
|
goto cleanup;
|
|
return 0;
|
|
|
|
cleanup:
|
|
free_percpu(comp->stream);
|
|
return ret;
|
|
}
|
|
|
|
void zcomp_destroy(struct zcomp *comp)
|
|
{
|
|
cpuhp_state_remove_instance(CPUHP_ZCOMP_PREPARE, &comp->node);
|
|
free_percpu(comp->stream);
|
|
kfree(comp);
|
|
}
|
|
|
|
/*
|
|
* search available compressors for requested algorithm.
|
|
* allocate new zcomp and initialize it. return compressing
|
|
* backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
|
|
* if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
|
|
* case of allocation error, or any other error potentially
|
|
* returned by zcomp_init().
|
|
*/
|
|
struct zcomp *zcomp_create(const char *compress)
|
|
{
|
|
struct zcomp *comp;
|
|
int error;
|
|
|
|
if (!zcomp_available_algorithm(compress))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
|
|
if (!comp)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
comp->name = compress;
|
|
error = zcomp_init(comp);
|
|
if (error) {
|
|
kfree(comp);
|
|
return ERR_PTR(error);
|
|
}
|
|
return comp;
|
|
}
|