From b819ed4b2afcaf86a370a004d7b2252f981f0c50 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2016 14:48:49 -0500 Subject: [PATCH 1/3] pNFS/flexfiles: Improve merging of errors in LAYOUTRETURN When we hit 22 errors, we start to overflow the memory buffers allocated to the LAYOUTRETURN errors. The issue is that currently, RPC call reply ordering determines how successful we are in merging errors that refer to contiguous READ or WRITE requests. Fix is to use an insertion sort to help detect contiguity. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 97 +++++++++-------------- 1 file changed, 39 insertions(+), 58 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e125e55de86d..bcf79aad86fc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, err->length = end - err->offset; } -static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset, - u64 length, int status, enum nfs_opnum4 opnum, - nfs4_stateid *stateid, - struct nfs4_deviceid *deviceid) +static int +ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, + const struct nfs4_ff_layout_ds_err *e2) { - return err->status == status && err->opnum == opnum && - nfs4_stateid_match(&err->stateid, stateid) && - !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && - end_offset(err->offset, err->length) >= offset && - err->offset <= end_offset(offset, length); + int ret; + + if (e1->opnum != e2->opnum) + return e1->opnum < e2->opnum ? -1 : 1; + if (e1->status != e2->status) + return e1->status < e2->status ? -1 : 1; + ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid)); + if (ret != 0) + return ret; + ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); + if (ret != 0) + return ret; + if (end_offset(e1->offset, e1->length) < e2->offset) + return -1; + if (e1->offset > end_offset(e2->offset, e2->length)) + return 1; + /* If ranges overlap or are contiguous, they are the same */ + return 0; } -static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, - struct nfs4_ff_layout_ds_err *new) -{ - if (!ds_error_can_merge(old, new->offset, new->length, new->status, - new->opnum, &new->stateid, &new->deviceid)) - return false; - - extend_ds_error(old, new->offset, new->length); - return true; -} - -static bool +static void ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, struct nfs4_ff_layout_ds_err *dserr) { - struct nfs4_ff_layout_ds_err *err; + struct nfs4_ff_layout_ds_err *err, *tmp; + struct list_head *head = &flo->error_list; + int match; - list_for_each_entry(err, &flo->error_list, list) { - if (merge_ds_error(err, dserr)) { - return true; - } - } - - list_add(&dserr->list, &flo->error_list); - return false; -} - -static bool -ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, - u64 length, int status, enum nfs_opnum4 opnum, - nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) -{ - bool found = false; - struct nfs4_ff_layout_ds_err *err; - - list_for_each_entry(err, &flo->error_list, list) { - if (ds_error_can_merge(err, offset, length, status, opnum, - stateid, deviceid)) { - found = true; - extend_ds_error(err, offset, length); + /* Do insertion sort w/ merges */ + list_for_each_entry_safe(err, tmp, &flo->error_list, list) { + match = ff_ds_error_match(err, dserr); + if (match < 0) + continue; + if (match > 0) { + /* Add entry "dserr" _before_ entry "err" */ + head = &err->list; break; } + /* Entries match, so merge "err" into "dserr" */ + extend_ds_error(dserr, err->offset, err->length); + list_del(&err->list); + kfree(err); } - return found; + list_add_tail(&dserr->list, head); } int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, @@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, gfp_t gfp_flags) { struct nfs4_ff_layout_ds_err *dserr; - bool needfree; if (status == 0) return 0; @@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, if (mirror->mirror_ds == NULL) return -EINVAL; - spin_lock(&flo->generic_hdr.plh_inode->i_lock); - if (ff_layout_update_ds_error(flo, offset, length, status, opnum, - &mirror->stateid, - &mirror->mirror_ds->id_node.deviceid)) { - spin_unlock(&flo->generic_hdr.plh_inode->i_lock); - return 0; - } - spin_unlock(&flo->generic_hdr.plh_inode->i_lock); dserr = kmalloc(sizeof(*dserr), gfp_flags); if (!dserr) return -ENOMEM; @@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, NFS4_DEVICEID4_SIZE); spin_lock(&flo->generic_hdr.plh_inode->i_lock); - needfree = ff_layout_add_ds_error_locked(flo, dserr); + ff_layout_add_ds_error_locked(flo, dserr); spin_unlock(&flo->generic_hdr.plh_inode->i_lock); - if (needfree) - kfree(dserr); return 0; } From 6272dcc6beebbc2d8cf4165b628169e878f143e0 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Jan 2016 16:54:15 -0500 Subject: [PATCH 2/3] NFS: Simplify nfs_request_add_commit_list() arguments I noticed that all the callers of this function pass cinfo->mds->list as an argument in addition to the cinfo structure itself. Let's get rid of the extra argument, since it doesn't seem to be adding anything. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/internal.h | 2 +- fs/nfs/write.c | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bb1f4e7a3270..3384dc8e6683 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req, u32 i, j; if (fl->commit_through_mds) { - nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); + nfs_request_add_commit_list(req, cinfo); } else { /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4e8cc942336c..9a547aa3ec8e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list, struct nfs_commit_info *cinfo, u32 ds_commit_idx); void nfs_commitdata_release(struct nfs_commit_data *data); -void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, +void nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo); void nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ce43cd6d88c6..5754835a2886 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked); * holding the nfs_page lock. */ void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, - struct nfs_commit_info *cinfo) +nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) { spin_lock(cinfo->lock); - nfs_request_add_commit_list_locked(req, dst, cinfo); + nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); spin_unlock(cinfo->lock); nfs_mark_page_unstable(req->wb_page, cinfo); } @@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, { if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) return; - nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); + nfs_request_add_commit_list(req, cinfo); } static void From 082fa37d1351a41afc491d44a1d095cb8d919aa2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2016 15:39:40 -0500 Subject: [PATCH 3/3] pNFS/flexfiles: Fix an XDR encoding bug in layoutreturn We must not skip encoding the statistics, or the server will see an XDR encoding error. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # 4.0+ --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 405f46ba490e..2a2e2d8ddee5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1859,11 +1859,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - if (ff_layout_encode_ioerr(flo, xdr, args)) - goto out; - + ff_layout_encode_ioerr(flo, xdr, args); ff_layout_encode_iostats(flo, xdr, args); -out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); }