mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-23 05:20:08 +07:00
fe5cbc6e06
v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de>
60 lines
1.7 KiB
C
60 lines
1.7 KiB
C
/*
|
|
* linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics
|
|
*
|
|
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/raid/pq.h>
|
|
|
|
#ifdef __KERNEL__
|
|
#include <asm/neon.h>
|
|
#else
|
|
#define kernel_neon_begin()
|
|
#define kernel_neon_end()
|
|
#define cpu_has_neon() (1)
|
|
#endif
|
|
|
|
/*
|
|
* There are 2 reasons these wrappers are kept in a separate compilation unit
|
|
* from the actual implementations in neonN.c (generated from neon.uc by
|
|
* unroll.awk):
|
|
* - the actual implementations use NEON intrinsics, and the GCC support header
|
|
* (arm_neon.h) is not fully compatible (type wise) with the kernel;
|
|
* - the neonN.c files are compiled with -mfpu=neon and optimization enabled,
|
|
* and we have to make sure that we never use *any* NEON/VFP instructions
|
|
* outside a kernel_neon_begin()/kernel_neon_end() pair.
|
|
*/
|
|
|
|
#define RAID6_NEON_WRAPPER(_n) \
|
|
static void raid6_neon ## _n ## _gen_syndrome(int disks, \
|
|
size_t bytes, void **ptrs) \
|
|
{ \
|
|
void raid6_neon ## _n ## _gen_syndrome_real(int, \
|
|
unsigned long, void**); \
|
|
kernel_neon_begin(); \
|
|
raid6_neon ## _n ## _gen_syndrome_real(disks, \
|
|
(unsigned long)bytes, ptrs); \
|
|
kernel_neon_end(); \
|
|
} \
|
|
struct raid6_calls const raid6_neonx ## _n = { \
|
|
raid6_neon ## _n ## _gen_syndrome, \
|
|
NULL, /* XOR not yet implemented */ \
|
|
raid6_have_neon, \
|
|
"neonx" #_n, \
|
|
0 \
|
|
}
|
|
|
|
static int raid6_have_neon(void)
|
|
{
|
|
return cpu_has_neon();
|
|
}
|
|
|
|
RAID6_NEON_WRAPPER(1);
|
|
RAID6_NEON_WRAPPER(2);
|
|
RAID6_NEON_WRAPPER(4);
|
|
RAID6_NEON_WRAPPER(8);
|