linux_dsm_epyc7002/include/linux/raid/pq.h
Markus Stockhausen fe5cbc6e06 md/raid6 algorithms: delta syndrome functions
v3: s-o-b comment, explanation of performance and descision for
the start/stop implementation

Implementing rmw functionality for RAID6 requires optimized syndrome
calculation. Up to now we can only generate a complete syndrome. The
target P/Q pages are always overwritten. With this patch we provide
a framework for inplace P/Q modification. In the first place simply
fill those functions with NULL values.

xor_syndrome() has two additional parameters: start & stop. These
will indicate the first and last page that are changing during a
rmw run. That makes it possible to avoid several unneccessary loops
and speed up calculation. The caller needs to implement the following
logic to make the functions work.

1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source
blocks inside P/Q between (and including) start and end.

2) modify any block with start <= block <= stop

3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of
source blocks into P/Q between (and including) start and end.

Pages between start and stop that won't be changed should be filled
with a pointer to the kernel zero page. The reasons for not taking NULL
pages are:

1) Algorithms cross the whole source data line by line. Thus avoid
additional branches.

2) Having a NULL page avoids calculating the XOR P parity but still
need calulation steps for the Q parity. Depending on the algorithm
unrolling that might be only a difference of 2 instructions per loop.

The benchmark numbers of the gen_syndrome() functions are displayed in
the kernel log. Do the same for the xor_syndrome() functions. This
will help to analyze performance problems and give an rough estimate
how well the algorithm works. The choice of the fastest algorithm will
still depend on the gen_syndrome() performance.

With the start/stop page implementation the speed can vary a lot in real
life. E.g. a change of page 0 & page 15 on a stripe will be harder to
compute than the case where page 0 & page 1 are XOR candidates. To be not
to enthusiatic about the expected speeds we will run a worse case test
that simulates a change on the upper half of the stripe. So we do:

1) calculation of P/Q for the upper pages

2) continuation of Q for the lower (empty) pages

Signed-off-by: Markus Stockhausen <stockhausen@collogia.de>
Signed-off-by: NeilBrown <neilb@suse.de>
2015-04-22 08:00:41 +10:00

179 lines
5.4 KiB
C

/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright 2003 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#ifndef LINUX_RAID_RAID6_H
#define LINUX_RAID_RAID6_H
#ifdef __KERNEL__
/* Set to 1 to use kernel-wide empty_zero_page */
#define RAID6_USE_EMPTY_ZERO_PAGE 0
#include <linux/blkdev.h>
/* We need a pre-zeroed page... if we don't want to use the kernel-provided
one define it here */
#if RAID6_USE_EMPTY_ZERO_PAGE
# define raid6_empty_zero_page empty_zero_page
#else
extern const char raid6_empty_zero_page[PAGE_SIZE];
#endif
#else /* ! __KERNEL__ */
/* Used for testing in user space */
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <stddef.h>
#include <sys/mman.h>
#include <sys/types.h>
/* Not standard, but glibc defines it */
#define BITS_PER_LONG __WORDSIZE
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
#ifndef PAGE_SIZE
# define PAGE_SIZE 4096
#endif
extern const char raid6_empty_zero_page[PAGE_SIZE];
#define __init
#define __exit
#define __attribute_const__ __attribute__((const))
#define noinline __attribute__((noinline))
#define preempt_enable()
#define preempt_disable()
#define cpu_has_feature(x) 1
#define enable_kernel_altivec()
#define disable_kernel_altivec()
#define EXPORT_SYMBOL(sym)
#define EXPORT_SYMBOL_GPL(sym)
#define MODULE_LICENSE(licence)
#define MODULE_DESCRIPTION(desc)
#define subsys_initcall(x)
#define module_exit(x)
#endif /* __KERNEL__ */
/* Routine choices */
struct raid6_calls {
void (*gen_syndrome)(int, size_t, void **);
void (*xor_syndrome)(int, int, int, size_t, void **);
int (*valid)(void); /* Returns 1 if this routine set is usable */
const char *name; /* Name of this routine set */
int prefer; /* Has special performance attribute */
};
/* Selected algorithm */
extern struct raid6_calls raid6_call;
/* Various routine sets */
extern const struct raid6_calls raid6_intx1;
extern const struct raid6_calls raid6_intx2;
extern const struct raid6_calls raid6_intx4;
extern const struct raid6_calls raid6_intx8;
extern const struct raid6_calls raid6_intx16;
extern const struct raid6_calls raid6_intx32;
extern const struct raid6_calls raid6_mmxx1;
extern const struct raid6_calls raid6_mmxx2;
extern const struct raid6_calls raid6_sse1x1;
extern const struct raid6_calls raid6_sse1x2;
extern const struct raid6_calls raid6_sse2x1;
extern const struct raid6_calls raid6_sse2x2;
extern const struct raid6_calls raid6_sse2x4;
extern const struct raid6_calls raid6_altivec1;
extern const struct raid6_calls raid6_altivec2;
extern const struct raid6_calls raid6_altivec4;
extern const struct raid6_calls raid6_altivec8;
extern const struct raid6_calls raid6_avx2x1;
extern const struct raid6_calls raid6_avx2x2;
extern const struct raid6_calls raid6_avx2x4;
extern const struct raid6_calls raid6_tilegx8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
void (*datap)(int, size_t, int, void **);
int (*valid)(void);
const char *name;
int priority;
};
extern const struct raid6_recov_calls raid6_recov_intx1;
extern const struct raid6_recov_calls raid6_recov_ssse3;
extern const struct raid6_recov_calls raid6_recov_avx2;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
extern const struct raid6_calls raid6_neonx4;
extern const struct raid6_calls raid6_neonx8;
/* Algorithm list */
extern const struct raid6_calls * const raid6_algos[];
extern const struct raid6_recov_calls *const raid6_recov_algos[];
int raid6_select_algo(void);
/* Return values from chk_syndrome */
#define RAID6_OK 0
#define RAID6_P_BAD 1
#define RAID6_Q_BAD 2
#define RAID6_PQ_BAD 3
/* Galois field tables */
extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
extern const u8 raid6_gfexp[256] __attribute__((aligned(256)));
extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
/* Recovery routines */
extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb,
void **ptrs);
extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila,
void **ptrs);
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
void **ptrs);
/* Some definitions to allow code to be compiled for testing in userspace */
#ifndef __KERNEL__
# define jiffies raid6_jiffies()
# define printk printf
# define GFP_KERNEL 0
# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
PROT_READ|PROT_WRITE, \
MAP_PRIVATE|MAP_ANONYMOUS,\
0, 0))
# define free_pages(x, y) munmap((void *)(x), PAGE_SIZE << (y))
static inline void cpu_relax(void)
{
/* Nothing */
}
#undef HZ
#define HZ 1000
static inline uint32_t raid6_jiffies(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec*1000 + tv.tv_usec/1000;
}
#endif /* ! __KERNEL__ */
#endif /* LINUX_RAID_RAID6_H */