mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 12:30:53 +07:00
cab2e8e599
commit ceaf2966ab082bbc4d26516f97b3ca8a676e2af8 upstream.
Now we support sharing one page if PAGE_SIZE is not equal stripe size. To
support this, it needs to support calculating xor value with different
offsets for each r5dev. One offset array is used to record those offsets.
In RMW mode, parity page is used as a source page. It sets
ASYNC_TX_XOR_DROP_DST before calculating xor value in ops_run_prexor5.
So it needs to add src_list and src_offs at the same time. Now it only
needs src_list. So the xor value which is calculated is wrong. It can
cause data corruption problem.
I can reproduce this problem 100% on a POWER8 machine. The steps are:
mdadm -CR /dev/md0 -l5 -n3 /dev/sdb1 /dev/sdc1 /dev/sdd1 --size=3G
mkfs.xfs /dev/md0
mount /dev/md0 /mnt/test
mount: /mnt/test: mount(2) system call failed: Structure needs cleaning.
Fixes: 29bcff787a
("md/raid5: add new xor function to support different page offset")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
420 lines
12 KiB
C
420 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* xor offload engine api
|
|
*
|
|
* Copyright © 2006, Intel Corporation.
|
|
*
|
|
* Dan Williams <dan.j.williams@intel.com>
|
|
*
|
|
* with architecture considerations by:
|
|
* Neil Brown <neilb@suse.de>
|
|
* Jeff Garzik <jeff@garzik.org>
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/raid/xor.h>
|
|
#include <linux/async_tx.h>
|
|
|
|
/* do_async_xor - dma map the pages and perform the xor with an engine */
|
|
static __async_inline struct dma_async_tx_descriptor *
|
|
do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
|
|
struct async_submit_ctl *submit)
|
|
{
|
|
struct dma_device *dma = chan->device;
|
|
struct dma_async_tx_descriptor *tx = NULL;
|
|
dma_async_tx_callback cb_fn_orig = submit->cb_fn;
|
|
void *cb_param_orig = submit->cb_param;
|
|
enum async_tx_flags flags_orig = submit->flags;
|
|
enum dma_ctrl_flags dma_flags = 0;
|
|
int src_cnt = unmap->to_cnt;
|
|
int xor_src_cnt;
|
|
dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
|
|
dma_addr_t *src_list = unmap->addr;
|
|
|
|
while (src_cnt) {
|
|
dma_addr_t tmp;
|
|
|
|
submit->flags = flags_orig;
|
|
xor_src_cnt = min(src_cnt, (int)dma->max_xor);
|
|
/* if we are submitting additional xors, leave the chain open
|
|
* and clear the callback parameters
|
|
*/
|
|
if (src_cnt > xor_src_cnt) {
|
|
submit->flags &= ~ASYNC_TX_ACK;
|
|
submit->flags |= ASYNC_TX_FENCE;
|
|
submit->cb_fn = NULL;
|
|
submit->cb_param = NULL;
|
|
} else {
|
|
submit->cb_fn = cb_fn_orig;
|
|
submit->cb_param = cb_param_orig;
|
|
}
|
|
if (submit->cb_fn)
|
|
dma_flags |= DMA_PREP_INTERRUPT;
|
|
if (submit->flags & ASYNC_TX_FENCE)
|
|
dma_flags |= DMA_PREP_FENCE;
|
|
|
|
/* Drivers force forward progress in case they can not provide a
|
|
* descriptor
|
|
*/
|
|
tmp = src_list[0];
|
|
if (src_list > unmap->addr)
|
|
src_list[0] = dma_dest;
|
|
tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
|
|
xor_src_cnt, unmap->len,
|
|
dma_flags);
|
|
|
|
if (unlikely(!tx))
|
|
async_tx_quiesce(&submit->depend_tx);
|
|
|
|
/* spin wait for the preceding transactions to complete */
|
|
while (unlikely(!tx)) {
|
|
dma_async_issue_pending(chan);
|
|
tx = dma->device_prep_dma_xor(chan, dma_dest,
|
|
src_list,
|
|
xor_src_cnt, unmap->len,
|
|
dma_flags);
|
|
}
|
|
src_list[0] = tmp;
|
|
|
|
dma_set_unmap(tx, unmap);
|
|
async_tx_submit(chan, tx, submit);
|
|
submit->depend_tx = tx;
|
|
|
|
if (src_cnt > xor_src_cnt) {
|
|
/* drop completed sources */
|
|
src_cnt -= xor_src_cnt;
|
|
/* use the intermediate result a source */
|
|
src_cnt++;
|
|
src_list += xor_src_cnt - 1;
|
|
} else
|
|
break;
|
|
}
|
|
|
|
return tx;
|
|
}
|
|
|
|
static void
|
|
do_sync_xor_offs(struct page *dest, unsigned int offset,
|
|
struct page **src_list, unsigned int *src_offs,
|
|
int src_cnt, size_t len, struct async_submit_ctl *submit)
|
|
{
|
|
int i;
|
|
int xor_src_cnt = 0;
|
|
int src_off = 0;
|
|
void *dest_buf;
|
|
void **srcs;
|
|
|
|
if (submit->scribble)
|
|
srcs = submit->scribble;
|
|
else
|
|
srcs = (void **) src_list;
|
|
|
|
/* convert to buffer pointers */
|
|
for (i = 0; i < src_cnt; i++)
|
|
if (src_list[i])
|
|
srcs[xor_src_cnt++] = page_address(src_list[i]) +
|
|
(src_offs ? src_offs[i] : offset);
|
|
src_cnt = xor_src_cnt;
|
|
/* set destination address */
|
|
dest_buf = page_address(dest) + offset;
|
|
|
|
if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
|
|
memset(dest_buf, 0, len);
|
|
|
|
while (src_cnt > 0) {
|
|
/* process up to 'MAX_XOR_BLOCKS' sources */
|
|
xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
|
|
xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]);
|
|
|
|
/* drop completed sources */
|
|
src_cnt -= xor_src_cnt;
|
|
src_off += xor_src_cnt;
|
|
}
|
|
|
|
async_tx_sync_epilog(submit);
|
|
}
|
|
|
|
static inline bool
|
|
dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset,
|
|
unsigned int *src_offs, int src_cnt, int len)
|
|
{
|
|
int i;
|
|
|
|
if (!is_dma_xor_aligned(device, offset, 0, len))
|
|
return false;
|
|
|
|
if (!src_offs)
|
|
return true;
|
|
|
|
for (i = 0; i < src_cnt; i++) {
|
|
if (!is_dma_xor_aligned(device, src_offs[i], 0, len))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* async_xor_offs - attempt to xor a set of blocks with a dma engine.
|
|
* @dest: destination page
|
|
* @offset: dst offset to start transaction
|
|
* @src_list: array of source pages
|
|
* @src_offs: array of source pages offset, NULL means common src/dst offset
|
|
* @src_cnt: number of source pages
|
|
* @len: length in bytes
|
|
* @submit: submission / completion modifiers
|
|
*
|
|
* honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
|
|
*
|
|
* xor_blocks always uses the dest as a source so the
|
|
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
|
|
* the calculation. The assumption with dma eninges is that they only
|
|
* use the destination buffer as a source when it is explicity specified
|
|
* in the source list.
|
|
*
|
|
* src_list note: if the dest is also a source it must be at index zero.
|
|
* The contents of this array will be overwritten if a scribble region
|
|
* is not specified.
|
|
*/
|
|
struct dma_async_tx_descriptor *
|
|
async_xor_offs(struct page *dest, unsigned int offset,
|
|
struct page **src_list, unsigned int *src_offs,
|
|
int src_cnt, size_t len, struct async_submit_ctl *submit)
|
|
{
|
|
struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
|
|
&dest, 1, src_list,
|
|
src_cnt, len);
|
|
struct dma_device *device = chan ? chan->device : NULL;
|
|
struct dmaengine_unmap_data *unmap = NULL;
|
|
|
|
BUG_ON(src_cnt <= 1);
|
|
|
|
if (device)
|
|
unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);
|
|
|
|
if (unmap && dma_xor_aligned_offsets(device, offset,
|
|
src_offs, src_cnt, len)) {
|
|
struct dma_async_tx_descriptor *tx;
|
|
int i, j;
|
|
|
|
/* run the xor asynchronously */
|
|
pr_debug("%s (async): len: %zu\n", __func__, len);
|
|
|
|
unmap->len = len;
|
|
for (i = 0, j = 0; i < src_cnt; i++) {
|
|
if (!src_list[i])
|
|
continue;
|
|
unmap->to_cnt++;
|
|
unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
|
|
src_offs ? src_offs[i] : offset,
|
|
len, DMA_TO_DEVICE);
|
|
}
|
|
|
|
/* map it bidirectional as it may be re-used as a source */
|
|
unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
|
|
DMA_BIDIRECTIONAL);
|
|
unmap->bidi_cnt = 1;
|
|
|
|
tx = do_async_xor(chan, unmap, submit);
|
|
dmaengine_unmap_put(unmap);
|
|
return tx;
|
|
} else {
|
|
dmaengine_unmap_put(unmap);
|
|
/* run the xor synchronously */
|
|
pr_debug("%s (sync): len: %zu\n", __func__, len);
|
|
WARN_ONCE(chan, "%s: no space for dma address conversion\n",
|
|
__func__);
|
|
|
|
/* in the sync case the dest is an implied source
|
|
* (assumes the dest is the first source)
|
|
*/
|
|
if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
|
|
src_cnt--;
|
|
src_list++;
|
|
src_offs++;
|
|
}
|
|
|
|
/* wait for any prerequisite operations */
|
|
async_tx_quiesce(&submit->depend_tx);
|
|
|
|
do_sync_xor_offs(dest, offset, src_list, src_offs,
|
|
src_cnt, len, submit);
|
|
|
|
return NULL;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(async_xor_offs);
|
|
|
|
/**
|
|
* async_xor - attempt to xor a set of blocks with a dma engine.
|
|
* @dest: destination page
|
|
* @src_list: array of source pages
|
|
* @offset: common src/dst offset to start transaction
|
|
* @src_cnt: number of source pages
|
|
* @len: length in bytes
|
|
* @submit: submission / completion modifiers
|
|
*
|
|
* honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
|
|
*
|
|
* xor_blocks always uses the dest as a source so the
|
|
* ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
|
|
* the calculation. The assumption with dma eninges is that they only
|
|
* use the destination buffer as a source when it is explicity specified
|
|
* in the source list.
|
|
*
|
|
* src_list note: if the dest is also a source it must be at index zero.
|
|
* The contents of this array will be overwritten if a scribble region
|
|
* is not specified.
|
|
*/
|
|
struct dma_async_tx_descriptor *
|
|
async_xor(struct page *dest, struct page **src_list, unsigned int offset,
|
|
int src_cnt, size_t len, struct async_submit_ctl *submit)
|
|
{
|
|
return async_xor_offs(dest, offset, src_list, NULL,
|
|
src_cnt, len, submit);
|
|
}
|
|
EXPORT_SYMBOL_GPL(async_xor);
|
|
|
|
static int page_is_zero(struct page *p, unsigned int offset, size_t len)
|
|
{
|
|
return !memchr_inv(page_address(p) + offset, 0, len);
|
|
}
|
|
|
|
static inline struct dma_chan *
|
|
xor_val_chan(struct async_submit_ctl *submit, struct page *dest,
|
|
struct page **src_list, int src_cnt, size_t len)
|
|
{
|
|
#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
|
|
return NULL;
|
|
#endif
|
|
return async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list,
|
|
src_cnt, len);
|
|
}
|
|
|
|
/**
|
|
* async_xor_val_offs - attempt a xor parity check with a dma engine.
|
|
* @dest: destination page used if the xor is performed synchronously
|
|
* @offset: des offset in pages to start transaction
|
|
* @src_list: array of source pages
|
|
* @src_offs: array of source pages offset, NULL means common src/det offset
|
|
* @src_cnt: number of source pages
|
|
* @len: length in bytes
|
|
* @result: 0 if sum == 0 else non-zero
|
|
* @submit: submission / completion modifiers
|
|
*
|
|
* honored flags: ASYNC_TX_ACK
|
|
*
|
|
* src_list note: if the dest is also a source it must be at index zero.
|
|
* The contents of this array will be overwritten if a scribble region
|
|
* is not specified.
|
|
*/
|
|
struct dma_async_tx_descriptor *
|
|
async_xor_val_offs(struct page *dest, unsigned int offset,
|
|
struct page **src_list, unsigned int *src_offs,
|
|
int src_cnt, size_t len, enum sum_check_flags *result,
|
|
struct async_submit_ctl *submit)
|
|
{
|
|
struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
|
|
struct dma_device *device = chan ? chan->device : NULL;
|
|
struct dma_async_tx_descriptor *tx = NULL;
|
|
struct dmaengine_unmap_data *unmap = NULL;
|
|
|
|
BUG_ON(src_cnt <= 1);
|
|
|
|
if (device)
|
|
unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT);
|
|
|
|
if (unmap && src_cnt <= device->max_xor &&
|
|
dma_xor_aligned_offsets(device, offset, src_offs, src_cnt, len)) {
|
|
unsigned long dma_prep_flags = 0;
|
|
int i;
|
|
|
|
pr_debug("%s: (async) len: %zu\n", __func__, len);
|
|
|
|
if (submit->cb_fn)
|
|
dma_prep_flags |= DMA_PREP_INTERRUPT;
|
|
if (submit->flags & ASYNC_TX_FENCE)
|
|
dma_prep_flags |= DMA_PREP_FENCE;
|
|
|
|
for (i = 0; i < src_cnt; i++) {
|
|
unmap->addr[i] = dma_map_page(device->dev, src_list[i],
|
|
src_offs ? src_offs[i] : offset,
|
|
len, DMA_TO_DEVICE);
|
|
unmap->to_cnt++;
|
|
}
|
|
unmap->len = len;
|
|
|
|
tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
|
|
len, result,
|
|
dma_prep_flags);
|
|
if (unlikely(!tx)) {
|
|
async_tx_quiesce(&submit->depend_tx);
|
|
|
|
while (!tx) {
|
|
dma_async_issue_pending(chan);
|
|
tx = device->device_prep_dma_xor_val(chan,
|
|
unmap->addr, src_cnt, len, result,
|
|
dma_prep_flags);
|
|
}
|
|
}
|
|
dma_set_unmap(tx, unmap);
|
|
async_tx_submit(chan, tx, submit);
|
|
} else {
|
|
enum async_tx_flags flags_orig = submit->flags;
|
|
|
|
pr_debug("%s: (sync) len: %zu\n", __func__, len);
|
|
WARN_ONCE(device && src_cnt <= device->max_xor,
|
|
"%s: no space for dma address conversion\n",
|
|
__func__);
|
|
|
|
submit->flags |= ASYNC_TX_XOR_DROP_DST;
|
|
submit->flags &= ~ASYNC_TX_ACK;
|
|
|
|
tx = async_xor_offs(dest, offset, src_list, src_offs,
|
|
src_cnt, len, submit);
|
|
|
|
async_tx_quiesce(&tx);
|
|
|
|
*result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
|
|
|
|
async_tx_sync_epilog(submit);
|
|
submit->flags = flags_orig;
|
|
}
|
|
dmaengine_unmap_put(unmap);
|
|
|
|
return tx;
|
|
}
|
|
EXPORT_SYMBOL_GPL(async_xor_val_offs);
|
|
|
|
/**
|
|
* async_xor_val - attempt a xor parity check with a dma engine.
|
|
* @dest: destination page used if the xor is performed synchronously
|
|
* @src_list: array of source pages
|
|
* @offset: offset in pages to start transaction
|
|
* @src_cnt: number of source pages
|
|
* @len: length in bytes
|
|
* @result: 0 if sum == 0 else non-zero
|
|
* @submit: submission / completion modifiers
|
|
*
|
|
* honored flags: ASYNC_TX_ACK
|
|
*
|
|
* src_list note: if the dest is also a source it must be at index zero.
|
|
* The contents of this array will be overwritten if a scribble region
|
|
* is not specified.
|
|
*/
|
|
struct dma_async_tx_descriptor *
|
|
async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
|
|
int src_cnt, size_t len, enum sum_check_flags *result,
|
|
struct async_submit_ctl *submit)
|
|
{
|
|
return async_xor_val_offs(dest, offset, src_list, NULL, src_cnt,
|
|
len, result, submit);
|
|
}
|
|
EXPORT_SYMBOL_GPL(async_xor_val);
|
|
|
|
MODULE_AUTHOR("Intel Corporation");
|
|
MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
|
|
MODULE_LICENSE("GPL");
|