mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 14:58:10 +07:00
2209fda323
Update the LZ4 compression module based on LZ4 v1.8.3 in order for the erofs file system to use the newest LZ4_decompress_safe_partial() which can now decode exactly the nb of bytes requested [1] to take place of the open hacked code in the erofs file system itself. Currently, apart from the erofs file system, no other users use LZ4_decompress_safe_partial, so no worry about the interface. In addition, LZ4 v1.8.x boosts up decompression speed compared to the current code which is based on LZ4 v1.7.3, mainly due to shortcut optimization for the specific common LZ4-sequences [2]. lzbench testdata (tested in kirin710, 8 cores, 4 big cores at 2189Mhz, 2GB DDR RAM at 1622Mhz, with enwik8 testdata [3]): Compressor name Compress. Decompress. Compr. size Ratio Filename memcpy 5004 MB/s 4924 MB/s 100000000 100.00 enwik8 lz4hc 1.7.3 -9 12 MB/s 653 MB/s 42203253 42.20 enwik8 lz4hc 1.8.0 -9 12 MB/s 908 MB/s 42203096 42.20 enwik8 lz4hc 1.8.3 -9 11 MB/s 965 MB/s 42203094 42.20 enwik8 [1] https://github.com/lz4/lz4/issues/56608d347b5b2
[2] v1.8.1 perf: slightly faster compression and decompression speeda31b7058cb
v1.8.2 perf: slightly faster HC compression and decompression speed45f8603aae
1a191b3f8d
[3] http://mattmahoney.net/dc/textdata.html http://mattmahoney.net/dc/enwik8.zip Link: http://lkml.kernel.org/r/1537181207-21932-1-git-send-email-gaoxiang25@huawei.com Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Tested-by: Guo Xuenan <guoxuenan@huawei.com> Cc: Colin Ian King <colin.king@canonical.com> Cc: Yann Collet <yann.collet.73@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Fang Wei <fangwei1@huawei.com> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Kyungsik Lee <kyungsik.lee@lge.com> Cc: <weidu.du@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
235 lines
6.0 KiB
C
235 lines
6.0 KiB
C
#ifndef __LZ4DEFS_H__
|
|
#define __LZ4DEFS_H__
|
|
|
|
/*
|
|
* lz4defs.h -- common and architecture specific defines for the kernel usage
|
|
|
|
* LZ4 - Fast LZ compression algorithm
|
|
* Copyright (C) 2011-2016, Yann Collet.
|
|
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following disclaimer
|
|
* in the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* You can contact the author at :
|
|
* - LZ4 homepage : http://www.lz4.org
|
|
* - LZ4 source repository : https://github.com/lz4/lz4
|
|
*
|
|
* Changed for kernel usage by:
|
|
* Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
|
|
*/
|
|
|
|
#include <asm/unaligned.h>
|
|
#include <linux/string.h> /* memset, memcpy */
|
|
|
|
#define FORCE_INLINE __always_inline
|
|
|
|
/*-************************************
|
|
* Basic Types
|
|
**************************************/
|
|
#include <linux/types.h>
|
|
|
|
typedef uint8_t BYTE;
|
|
typedef uint16_t U16;
|
|
typedef uint32_t U32;
|
|
typedef int32_t S32;
|
|
typedef uint64_t U64;
|
|
typedef uintptr_t uptrval;
|
|
|
|
/*-************************************
|
|
* Architecture specifics
|
|
**************************************/
|
|
#if defined(CONFIG_64BIT)
|
|
#define LZ4_ARCH64 1
|
|
#else
|
|
#define LZ4_ARCH64 0
|
|
#endif
|
|
|
|
#if defined(__LITTLE_ENDIAN)
|
|
#define LZ4_LITTLE_ENDIAN 1
|
|
#else
|
|
#define LZ4_LITTLE_ENDIAN 0
|
|
#endif
|
|
|
|
/*-************************************
|
|
* Constants
|
|
**************************************/
|
|
#define MINMATCH 4
|
|
|
|
#define WILDCOPYLENGTH 8
|
|
#define LASTLITERALS 5
|
|
#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
|
|
/*
|
|
* ensure it's possible to write 2 x wildcopyLength
|
|
* without overflowing output buffer
|
|
*/
|
|
#define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH)
|
|
|
|
/* Increase this value ==> compression run slower on incompressible data */
|
|
#define LZ4_SKIPTRIGGER 6
|
|
|
|
#define HASH_UNIT sizeof(size_t)
|
|
|
|
#define KB (1 << 10)
|
|
#define MB (1 << 20)
|
|
#define GB (1U << 30)
|
|
|
|
#define MAXD_LOG 16
|
|
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
|
|
#define STEPSIZE sizeof(size_t)
|
|
|
|
#define ML_BITS 4
|
|
#define ML_MASK ((1U << ML_BITS) - 1)
|
|
#define RUN_BITS (8 - ML_BITS)
|
|
#define RUN_MASK ((1U << RUN_BITS) - 1)
|
|
|
|
/*-************************************
|
|
* Reading and writing into memory
|
|
**************************************/
|
|
static FORCE_INLINE U16 LZ4_read16(const void *ptr)
|
|
{
|
|
return get_unaligned((const U16 *)ptr);
|
|
}
|
|
|
|
static FORCE_INLINE U32 LZ4_read32(const void *ptr)
|
|
{
|
|
return get_unaligned((const U32 *)ptr);
|
|
}
|
|
|
|
static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
|
|
{
|
|
return get_unaligned((const size_t *)ptr);
|
|
}
|
|
|
|
static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
|
|
{
|
|
put_unaligned(value, (U16 *)memPtr);
|
|
}
|
|
|
|
static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
|
|
{
|
|
put_unaligned(value, (U32 *)memPtr);
|
|
}
|
|
|
|
static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
|
|
{
|
|
return get_unaligned_le16(memPtr);
|
|
}
|
|
|
|
static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
|
|
{
|
|
return put_unaligned_le16(value, memPtr);
|
|
}
|
|
|
|
static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
|
|
{
|
|
#if LZ4_ARCH64
|
|
U64 a = get_unaligned((const U64 *)src);
|
|
|
|
put_unaligned(a, (U64 *)dst);
|
|
#else
|
|
U32 a = get_unaligned((const U32 *)src);
|
|
U32 b = get_unaligned((const U32 *)src + 1);
|
|
|
|
put_unaligned(a, (U32 *)dst);
|
|
put_unaligned(b, (U32 *)dst + 1);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* customized variant of memcpy,
|
|
* which can overwrite up to 7 bytes beyond dstEnd
|
|
*/
|
|
static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
|
|
const void *srcPtr, void *dstEnd)
|
|
{
|
|
BYTE *d = (BYTE *)dstPtr;
|
|
const BYTE *s = (const BYTE *)srcPtr;
|
|
BYTE *const e = (BYTE *)dstEnd;
|
|
|
|
do {
|
|
LZ4_copy8(d, s);
|
|
d += 8;
|
|
s += 8;
|
|
} while (d < e);
|
|
}
|
|
|
|
static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
|
|
{
|
|
#if LZ4_LITTLE_ENDIAN
|
|
return __ffs(val) >> 3;
|
|
#else
|
|
return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
|
|
#endif
|
|
}
|
|
|
|
static FORCE_INLINE unsigned int LZ4_count(
|
|
const BYTE *pIn,
|
|
const BYTE *pMatch,
|
|
const BYTE *pInLimit)
|
|
{
|
|
const BYTE *const pStart = pIn;
|
|
|
|
while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
|
|
size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
|
|
|
|
if (!diff) {
|
|
pIn += STEPSIZE;
|
|
pMatch += STEPSIZE;
|
|
continue;
|
|
}
|
|
|
|
pIn += LZ4_NbCommonBytes(diff);
|
|
|
|
return (unsigned int)(pIn - pStart);
|
|
}
|
|
|
|
#if LZ4_ARCH64
|
|
if ((pIn < (pInLimit - 3))
|
|
&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
|
|
pIn += 4;
|
|
pMatch += 4;
|
|
}
|
|
#endif
|
|
|
|
if ((pIn < (pInLimit - 1))
|
|
&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
|
|
pIn += 2;
|
|
pMatch += 2;
|
|
}
|
|
|
|
if ((pIn < pInLimit) && (*pMatch == *pIn))
|
|
pIn++;
|
|
|
|
return (unsigned int)(pIn - pStart);
|
|
}
|
|
|
|
typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
|
|
typedef enum { byPtr, byU32, byU16 } tableType_t;
|
|
|
|
typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
|
|
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
|
|
|
|
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
|
|
typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
|
|
|
|
#define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c))
|
|
|
|
#endif
|