From 00a7a00e2d2fc2ebbaf2c3ef25427ae13e0f94da Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Tue, 3 Mar 2020 18:30:52 +0800 Subject: [PATCH 01/27] NFS: move dprintk after nfs_alloc_fattr in nfs3_proc_lookup In nfs3_proc_lookup, if nfs_alloc_fattr fails, will only print "NFS call lookup". This may be confusing, move dprintk after nfs_alloc_fattr. Signed-off-by: Xu Wang Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a46d1d5d16d8..2397ceedba8a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -179,11 +179,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; - dprintk("NFS call lookup %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; + dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); From 118917d696dc59fd3e1741012c2f9db2294bed6f Mon Sep 17 00:00:00 2001 From: Fedor Tokarev Date: Sat, 28 Mar 2020 14:56:55 +0300 Subject: [PATCH 02/27] net: sunrpc: Fix off-by-one issues in 'rpc_ntop6' Fix off-by-one issues in 'rpc_ntop6': - 'snprintf' returns the number of characters which would have been written if enough space had been available, excluding the terminating null byte. Thus, a return value of 'sizeof(scopebuf)' means that the last character was dropped. - 'strcat' adds a terminating null byte to the string, thus if len == buflen, the null byte is written past the end of the buffer. Signed-off-by: Fedor Tokarev Signed-off-by: Anna Schumaker --- net/sunrpc/addr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 8b4d72b1a066..010dcb876f9d 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -82,11 +82,11 @@ static size_t rpc_ntop6(const struct sockaddr *sap, rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); - if (unlikely((size_t)rc > sizeof(scopebuf))) + if (unlikely((size_t)rc >= sizeof(scopebuf))) return 0; len += rc; - if (unlikely(len > buflen)) + if (unlikely(len >= buflen)) return 0; strcat(buf, scopebuf); From d04659db7b8089531ccbee364c52909fe4670408 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Fri, 24 Apr 2020 16:17:23 +0800 Subject: [PATCH 03/27] nfs4: Remove unneeded semicolon Fixes coccicheck warning: include/linux/nfs4.h:298:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Zheng Bin Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 82d8fb422092..4a51db7363a5 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -295,7 +295,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_NOFILEHANDLE: case NFS4ERR_MOVED: return false; - }; + } return true; } From 1f9b0f3afb4f804f26d60c373e589eb9b2d247af Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 25 Apr 2020 18:57:58 +0530 Subject: [PATCH 04/27] NFS: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header file related to NFS Client support. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used). Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Anna Schumaker --- fs/nfs/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index f1b27411dcc0..ebcbdc40483b 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2019 Hammerspace Inc */ From 1c709b766e73e54d64b1dde1b7cfbcf25bcb15b9 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Sun, 26 Apr 2020 11:30:00 -0400 Subject: [PATCH 05/27] NFSv4.1 fix rpc_call_done assignment for BIND_CONN_TO_SESSION Fixes: 02a95dee8cf0 ("NFS add callback_ops to nfs4_proc_bind_conn_to_session_callback") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9056f3dd380e..e32717fd1169 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7909,7 +7909,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { - .rpc_call_done = &nfs4_bind_one_conn_to_session_done, + .rpc_call_done = nfs4_bind_one_conn_to_session_done, }; /* From 1ad8dd939a9826ff6f8c69ac13e4e1dbba076703 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:02:23 -0500 Subject: [PATCH 06/27] NFS: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 +- include/linux/nfs_xdr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4a51db7363a5..4dba3c948932 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -38,7 +38,7 @@ struct nfs4_ace { struct nfs4_acl { uint32_t naces; - struct nfs4_ace aces[0]; + struct nfs4_ace aces[]; }; #define NFS4_MAXLABELLEN 2048 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e5f3e7d8d3d5..5fd0a9ef425f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1227,7 +1227,7 @@ struct nfs4_secinfo4 { struct nfs4_secinfo_flavors { unsigned int num_flavors; - struct nfs4_secinfo4 flavors[0]; + struct nfs4_secinfo4 flavors[]; }; struct nfs4_secinfo_arg { From ef31d878b2e7856cb5d2f76f14b37f4b834eb0f3 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 8 May 2020 20:22:41 +0800 Subject: [PATCH 07/27] NFS: remove duplicate headers Remove duplicate headers which are included twice. Signed-off-by: Chen Zhou Signed-off-by: Anna Schumaker --- fs/nfs/dns_resolve.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 963800037609..e87d500ad95a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -39,7 +39,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include -#include #include #include #include From 53bc19f17f21738735706fabcae3070f16c833db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:01 -0400 Subject: [PATCH 08/27] SUNRPC: receive buffer size estimation values almost never change Avoid unnecessary cache sloshing by placing the buffer size estimation update logic behind an atomic bit flag. The size of GSS information included in each wrapped Reply does not change during the lifetime of a GSS context. Therefore, the au_rslack and au_ralign fields need to be updated only once after establishing a fresh GSS credential. Thus a slack size update must occur after a cred is created, duplicated, renewed, or expires. I'm not sure I have this exactly right. A trace point is introduced to track updates to these variables to enable troubleshooting the problem if I missed a spot. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 5 ++-- include/trace/events/rpcgss.h | 34 ++++++++++++++++++++++++++ net/sunrpc/auth_gss/auth_gss.c | 44 +++++++++++++++++++++++----------- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4f6b28487f28..98da816b5fc2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -76,7 +76,7 @@ struct rpc_auth { unsigned int au_verfsize; /* size of reply verifier */ unsigned int au_ralign; /* words before UL header */ - unsigned int au_flags; + unsigned long au_flags; const struct rpc_authops *au_ops; rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in @@ -89,7 +89,8 @@ struct rpc_auth { }; /* rpc_auth au_flags */ -#define RPCAUTH_AUTH_DATATOUCH 0x00000002 +#define RPCAUTH_AUTH_DATATOUCH (1) +#define RPCAUTH_AUTH_UPDATE_SLACK (2) struct rpc_auth_create_args { rpc_authflavor_t pseudoflavor; diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 32d88c4fb063..421b14db87ae 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -291,6 +291,40 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->ret ? "" : "un") ); +TRACE_EVENT(rpcgss_update_slack, + TP_PROTO( + const struct rpc_task *task, + const struct rpc_auth *auth + ), + + TP_ARGS(task, auth), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(const void *, auth) + __field(unsigned int, rslack) + __field(unsigned int, ralign) + __field(unsigned int, verfsize) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->auth = auth; + __entry->rslack = auth->au_rslack; + __entry->ralign = auth->au_ralign; + __entry->verfsize = auth->au_verfsize; + ), + + TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->auth, __entry->rslack, __entry->ralign, + __entry->verfsize) +); + DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, TP_PROTO( __be32 xid, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ac5cac0dd24b..5f097c8cacd1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1054,11 +1054,11 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2; auth->au_verfsize = GSS_VERF_SLACK >> 2; auth->au_ralign = GSS_VERF_SLACK >> 2; - auth->au_flags = 0; + __set_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags); auth->au_ops = &authgss_ops; auth->au_flavor = flavor; if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) - auth->au_flags |= RPCAUTH_AUTH_DATATOUCH; + __set_bit(RPCAUTH_AUTH_DATATOUCH, &auth->au_flags); refcount_set(&auth->au_count, 1); kref_init(&gss_auth->kref); @@ -1613,6 +1613,7 @@ static int gss_renew_cred(struct rpc_task *task) new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); + task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; @@ -1709,7 +1710,8 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; + if (test_bit(RPCAUTH_AUTH_UPDATE_SLACK, &cred->cr_auth->au_flags)) + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; status = 0; out: gss_put_ctx(ctx); @@ -1927,13 +1929,30 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) return status; } -static int -gss_unwrap_resp_auth(struct rpc_cred *cred) +/** + * gss_update_rslack - Possibly update RPC receive buffer size estimates + * @task: rpc_task for incoming RPC Reply being unwrapped + * @cred: controlling rpc_cred for @task + * @before: XDR words needed before each RPC Reply message + * @after: XDR words needed following each RPC Reply message + * + */ +static void gss_update_rslack(struct rpc_task *task, struct rpc_cred *cred, + unsigned int before, unsigned int after) { struct rpc_auth *auth = cred->cr_auth; - auth->au_rslack = auth->au_verfsize; - auth->au_ralign = auth->au_verfsize; + if (test_and_clear_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags)) { + auth->au_ralign = auth->au_verfsize + before; + auth->au_rslack = auth->au_verfsize + after; + trace_rpcgss_update_slack(task, auth); + } +} + +static int +gss_unwrap_resp_auth(struct rpc_task *task, struct rpc_cred *cred) +{ + gss_update_rslack(task, cred, 0, 0); return 0; } @@ -1956,7 +1975,6 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, struct xdr_stream *xdr) { struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf; - struct rpc_auth *auth = cred->cr_auth; u32 len, offset, seqno, maj_stat; struct xdr_netobj mic; int ret; @@ -2005,8 +2023,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, if (maj_stat != GSS_S_COMPLETE) goto bad_mic; - auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len); - auth->au_ralign = auth->au_verfsize + 2; + gss_update_rslack(task, cred, 2, 2 + 1 + XDR_QUADLEN(mic.len)); ret = 0; out: @@ -2031,7 +2048,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; - struct rpc_auth *auth = cred->cr_auth; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -2058,8 +2074,8 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack; - auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align; + gss_update_rslack(task, cred, 2 + ctx->gc_gss_ctx->align, + 2 + ctx->gc_gss_ctx->slack); return 0; unwrap_failed: @@ -2130,7 +2146,7 @@ gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - status = gss_unwrap_resp_auth(cred); + status = gss_unwrap_resp_auth(task, cred); break; case RPC_GSS_SVC_INTEGRITY: status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3c627dc685cc..2081c8fbfa48 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -892,8 +892,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) * or privacy, direct data placement of individual data items * is not allowed. */ - ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags & - RPCAUTH_AUTH_DATATOUCH); + ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH, + &rqst->rq_cred->cr_auth->au_flags); /* * Chunks needed for results? From 74fb8fecee99f61415dcb6e22dbc24f42988a1d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:07 -0400 Subject: [PATCH 09/27] SUNRPC: Trace GSS context lifetimes Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcgss.h | 55 +++++++++++++++++++++++++++++++--- net/sunrpc/auth_gss/auth_gss.c | 10 ++++--- net/sunrpc/auth_gss/trace.c | 1 + 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 421b14db87ae..b9b51a4b1db1 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -17,6 +17,16 @@ ** GSS-API related trace events **/ +TRACE_DEFINE_ENUM(RPC_GSS_SVC_NONE); +TRACE_DEFINE_ENUM(RPC_GSS_SVC_INTEGRITY); +TRACE_DEFINE_ENUM(RPC_GSS_SVC_PRIVACY); + +#define show_gss_service(x) \ + __print_symbolic(x, \ + { RPC_GSS_SVC_NONE, "none" }, \ + { RPC_GSS_SVC_INTEGRITY, "integrity" }, \ + { RPC_GSS_SVC_PRIVACY, "privacy" }) + TRACE_DEFINE_ENUM(GSS_S_BAD_MECH); TRACE_DEFINE_ENUM(GSS_S_BAD_NAME); TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE); @@ -126,6 +136,40 @@ DEFINE_GSSAPI_EVENT(verify_mic); DEFINE_GSSAPI_EVENT(wrap); DEFINE_GSSAPI_EVENT(unwrap); +DECLARE_EVENT_CLASS(rpcgss_ctx_class, + TP_PROTO( + const struct gss_cred *gc + ), + + TP_ARGS(gc), + + TP_STRUCT__entry( + __field(const void *, cred) + __field(unsigned long, service) + __string(principal, gc->gc_principal) + ), + + TP_fast_assign( + __entry->cred = gc; + __entry->service = gc->gc_service; + __assign_str(principal, gc->gc_principal) + ), + + TP_printk("cred=%p service=%s principal='%s'", + __entry->cred, show_gss_service(__entry->service), + __get_str(principal)) +); + +#define DEFINE_CTX_EVENT(name) \ + DEFINE_EVENT(rpcgss_ctx_class, rpcgss_ctx_##name, \ + TP_PROTO( \ + const struct gss_cred *gc \ + ), \ + TP_ARGS(gc)) + +DEFINE_CTX_EVENT(init); +DEFINE_CTX_EVENT(destroy); + TRACE_EVENT(rpcgss_svc_accept_upcall, TP_PROTO( __be32 xid, @@ -405,6 +449,7 @@ TRACE_EVENT(rpcgss_upcall_result, TRACE_EVENT(rpcgss_context, TP_PROTO( + u32 window_size, unsigned long expiry, unsigned long now, unsigned int timeout, @@ -412,12 +457,13 @@ TRACE_EVENT(rpcgss_context, const u8 *data ), - TP_ARGS(expiry, now, timeout, len, data), + TP_ARGS(window_size, expiry, now, timeout, len, data), TP_STRUCT__entry( __field(unsigned long, expiry) __field(unsigned long, now) __field(unsigned int, timeout) + __field(u32, window_size) __field(int, len) __string(acceptor, data) ), @@ -426,13 +472,14 @@ TRACE_EVENT(rpcgss_context, __entry->expiry = expiry; __entry->now = now; __entry->timeout = timeout; + __entry->window_size = window_size; __entry->len = len; strncpy(__get_str(acceptor), data, len); ), - TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s", - __entry->expiry, __entry->now, __entry->timeout, - __entry->len, __get_str(acceptor)) + TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", + __entry->window_size, __entry->expiry, __entry->now, + __entry->timeout, __entry->len, __get_str(acceptor)) ); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5f097c8cacd1..429f17459ae3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -254,7 +254,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct if (IS_ERR(p)) goto err; done: - trace_rpcgss_context(ctx->gc_expiry, now, timeout, + trace_rpcgss_context(window_size, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: return p; @@ -697,10 +697,12 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) } schedule(); } - if (gss_msg->ctx) + if (gss_msg->ctx) { + trace_rpcgss_ctx_init(gss_cred); gss_cred_set_ctx(cred, gss_msg->ctx); - else + } else { err = gss_msg->msg.errno; + } spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); @@ -1284,6 +1286,7 @@ gss_send_destroy_context(struct rpc_cred *cred) if (new) { ctx->gc_proc = RPC_GSS_PROC_DESTROY; + trace_rpcgss_ctx_destroy(gss_cred); task = rpc_call_null(gss_auth->client, &new->gc_base, RPC_TASK_ASYNC|RPC_TASK_SOFT); if (!IS_ERR(task)) @@ -1349,7 +1352,6 @@ gss_destroy_nullcred(struct rpc_cred *cred) static void gss_destroy_cred(struct rpc_cred *cred) { - if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) gss_send_destroy_context(cred); gss_destroy_nullcred(cred); diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c index 5576f1e66de9..49fa583d7f91 100644 --- a/net/sunrpc/auth_gss/trace.c +++ b/net/sunrpc/auth_gss/trace.c @@ -6,6 +6,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include From 7a34c8e0c38b5fd21a764031b1f8664c28b595af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:12 -0400 Subject: [PATCH 10/27] SUNRPC: Update the rpc_show_task_flags() macro Recent additions to the RPC_TASK flags neglected to update the tracepoint ENUM definitions. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffd2215950dc..7d64aea7489e 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -142,29 +142,35 @@ TRACE_EVENT(rpc_request, TRACE_DEFINE_ENUM(RPC_TASK_ASYNC); TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER); +TRACE_DEFINE_ENUM(RPC_TASK_NULLCREDS); TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN); TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS); TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC); +TRACE_DEFINE_ENUM(RPC_TASK_NO_ROUND_ROBIN); TRACE_DEFINE_ENUM(RPC_TASK_SOFT); TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN); TRACE_DEFINE_ENUM(RPC_TASK_SENT); TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT); TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT); TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT); +TRACE_DEFINE_ENUM(RPC_TASK_CRED_NOREF); #define rpc_show_task_flags(flags) \ __print_flags(flags, "|", \ { RPC_TASK_ASYNC, "ASYNC" }, \ { RPC_TASK_SWAPPER, "SWAPPER" }, \ + { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ + { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ { RPC_TASK_SOFT, "SOFT" }, \ { RPC_TASK_SOFTCONN, "SOFTCONN" }, \ { RPC_TASK_SENT, "SENT" }, \ { RPC_TASK_TIMEOUT, "TIMEOUT" }, \ { RPC_TASK_NOCONNECT, "NOCONNECT" }, \ - { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" }) + { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" }, \ + { RPC_TASK_CRED_NOREF, "CRED_NOREF" }) TRACE_DEFINE_ENUM(RPC_TASK_RUNNING); TRACE_DEFINE_ENUM(RPC_TASK_QUEUED); From 82909dc546461c6ea060879e9b6fbe5eeac37cea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:18 -0400 Subject: [PATCH 11/27] SUNRPC: Update the RPC_SHOW_SOCKET() macro Clean up: remove unnecessary commas, and fix a white-space nit. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 7d64aea7489e..f6896bcfd97f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -581,9 +581,9 @@ TRACE_EVENT(rpc_reply_pages, #define RPC_SHOW_SOCKET \ EM( SS_FREE, "FREE" ) \ EM( SS_UNCONNECTED, "UNCONNECTED" ) \ - EM( SS_CONNECTING, "CONNECTING," ) \ - EM( SS_CONNECTED, "CONNECTED," ) \ - EMe(SS_DISCONNECTING, "DISCONNECTING" ) + EM( SS_CONNECTING, "CONNECTING" ) \ + EM( SS_CONNECTED, "CONNECTED" ) \ + EMe( SS_DISCONNECTING, "DISCONNECTING" ) #define rpc_show_socket_state(state) \ __print_symbolic(state, RPC_SHOW_SOCKET) From 0125ecbb526745930c63585314fa4e1c45432979 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:23 -0400 Subject: [PATCH 12/27] SUNRPC: Add tracepoint to rpc_call_rpcerror() Add a tracepoint in another common exit point for failing RPCs. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 28 ++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 1 + 2 files changed, 29 insertions(+) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f6896bcfd97f..f1fd3fae5b0f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -365,6 +365,34 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +TRACE_EVENT(rpc_call_rpcerror, + TP_PROTO( + const struct rpc_task *task, + int tk_status, + int rpc_status + ), + + TP_ARGS(task, tk_status, rpc_status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, tk_status) + __field(int, rpc_status) + ), + + TP_fast_assign( + __entry->client_id = task->tk_client->cl_clid; + __entry->task_id = task->tk_pid; + __entry->tk_status = tk_status; + __entry->rpc_status = rpc_status; + ), + + TP_printk("task:%u@%u tk_status=%d rpc_status=%d", + __entry->task_id, __entry->client_id, + __entry->tk_status, __entry->rpc_status) +); + TRACE_EVENT(rpc_stats_latency, TP_PROTO( diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 61b21dafd7c0..2621258bd8fc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1624,6 +1624,7 @@ const char static void __rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) { + trace_rpc_call_rpcerror(task, tk_status, rpc_status); task->tk_rpc_status = rpc_status; rpc_exit(task, tk_status); } From c509f15a5801605652126831d965751e8d602471 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:28 -0400 Subject: [PATCH 13/27] SUNRPC: Split the xdr_buf event class To help tie the recorded xdr_buf to a particular RPC transaction, the client side version of this class should display task ID information and the server side one should show the request's XID. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 113 ++++++++++++++++++++-------------- net/sunrpc/clnt.c | 4 +- net/sunrpc/svc_xprt.c | 4 +- net/sunrpc/xprt.c | 2 +- 4 files changed, 71 insertions(+), 52 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f1fd3fae5b0f..4747803b370e 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,14 +14,17 @@ #include #include -DECLARE_EVENT_CLASS(xdr_buf_class, +DECLARE_EVENT_CLASS(rpc_xdr_buf_class, TP_PROTO( + const struct rpc_task *task, const struct xdr_buf *xdr ), - TP_ARGS(xdr), + TP_ARGS(task, xdr), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(const void *, head_base) __field(size_t, head_len) __field(const void *, tail_base) @@ -31,6 +34,8 @@ DECLARE_EVENT_CLASS(xdr_buf_class, ), TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; @@ -39,23 +44,26 @@ DECLARE_EVENT_CLASS(xdr_buf_class, __entry->msg_len = xdr->len; ), - TP_printk("head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + __entry->task_id, __entry->client_id, __entry->head_base, __entry->head_len, __entry->page_len, __entry->tail_base, __entry->tail_len, __entry->msg_len ) ); -#define DEFINE_XDRBUF_EVENT(name) \ - DEFINE_EVENT(xdr_buf_class, name, \ +#define DEFINE_RPCXDRBUF_EVENT(name) \ + DEFINE_EVENT(rpc_xdr_buf_class, \ + rpc_xdr_##name, \ TP_PROTO( \ + const struct rpc_task *task, \ const struct xdr_buf *xdr \ ), \ - TP_ARGS(xdr)) + TP_ARGS(task, xdr)) + +DEFINE_RPCXDRBUF_EVENT(sendto); +DEFINE_RPCXDRBUF_EVENT(recvfrom); +DEFINE_RPCXDRBUF_EVENT(reply_pages); -DEFINE_XDRBUF_EVENT(xprt_sendto); -DEFINE_XDRBUF_EVENT(xprt_recvfrom); -DEFINE_XDRBUF_EVENT(svc_recvfrom); -DEFINE_XDRBUF_EVENT(svc_sendto); TRACE_DEFINE_ENUM(RPC_AUTH_OK); TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); @@ -560,43 +568,6 @@ TRACE_EVENT(rpc_xdr_alignment, ) ); -TRACE_EVENT(rpc_reply_pages, - TP_PROTO( - const struct rpc_rqst *req - ), - - TP_ARGS(req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, head_base) - __field(size_t, head_len) - __field(const void *, tail_base) - __field(size_t, tail_len) - __field(unsigned int, page_len) - ), - - TP_fast_assign( - __entry->task_id = req->rq_task->tk_pid; - __entry->client_id = req->rq_task->tk_client->cl_clid; - - __entry->head_base = req->rq_rcv_buf.head[0].iov_base; - __entry->head_len = req->rq_rcv_buf.head[0].iov_len; - __entry->page_len = req->rq_rcv_buf.page_len; - __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base; - __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len; - ), - - TP_printk( - "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n", - __entry->task_id, __entry->client_id, - __entry->head_base, __entry->head_len, - __entry->page_len, - __entry->tail_base, __entry->tail_len - ) -); - /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). @@ -1024,6 +995,54 @@ TRACE_EVENT(xs_stream_read_request, __entry->copied, __entry->reclen, __entry->offset) ); + +DECLARE_EVENT_CLASS(svc_xdr_buf_class, + TP_PROTO( + const struct svc_rqst *rqst, + const struct xdr_buf *xdr + ), + + TP_ARGS(rqst, xdr), + + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, msg_len) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->head_base = xdr->head[0].iov_base; + __entry->head_len = xdr->head[0].iov_len; + __entry->tail_base = xdr->tail[0].iov_base; + __entry->tail_len = xdr->tail[0].iov_len; + __entry->page_len = xdr->page_len; + __entry->msg_len = xdr->len; + ), + + TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + __entry->xid, + __entry->head_base, __entry->head_len, __entry->page_len, + __entry->tail_base, __entry->tail_len, __entry->msg_len + ) +); + +#define DEFINE_SVCXDRBUF_EVENT(name) \ + DEFINE_EVENT(svc_xdr_buf_class, \ + svc_xdr_##name, \ + TP_PROTO( \ + const struct svc_rqst *rqst, \ + const struct xdr_buf *xdr \ + ), \ + TP_ARGS(rqst, xdr)) + +DEFINE_SVCXDRBUF_EVENT(recvfrom); +DEFINE_SVCXDRBUF_EVENT(sendto); + #define show_rqstp_flags(flags) \ __print_flags(flags, "|", \ { (1UL << RQ_SECURE), "RQ_SECURE"}, \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2621258bd8fc..0e684d93c9b1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1270,7 +1270,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1; xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); - trace_rpc_reply_pages(req); + trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf); } EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages); @@ -2532,7 +2532,7 @@ call_decode(struct rpc_task *task) goto out; req->rq_rcv_buf.len = req->rq_private_buf.len; - trace_xprt_recvfrom(&req->rq_rcv_buf); + trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf); /* Check that the softirq receive buffer is valid */ WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2284ff038dad..8ef44275c255 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -812,7 +812,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); if (len > 0) - trace_svc_recvfrom(&rqstp->rq_arg); + trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg); rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); @@ -913,7 +913,7 @@ int svc_send(struct svc_rqst *rqstp) xb->len = xb->head[0].iov_len + xb->page_len + xb->tail[0].iov_len; - trace_svc_sendto(xb); + trace_svc_xdr_sendto(rqstp, xb); /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 493a30a296fc..053de053a024 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1460,7 +1460,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) */ req->rq_ntrans++; - trace_xprt_sendto(&req->rq_snd_buf); + trace_rpc_xdr_sendto(task, &req->rq_snd_buf); connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(req); if (status != 0) { From 911813d7a15221362d1655d246f3502c0f898c14 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:34 -0400 Subject: [PATCH 14/27] SUNRPC: Trace transport lifetime events Refactor: Hoist create/destroy/disconnect tracepoints out of xprtrdma and into the generic RPC client. Some benefits include: - Enable tracing of xprt lifetime events for the socket transport types - Expose the different types of disconnect to help run down issues with lingering connections Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 4 ---- include/trace/events/sunrpc.h | 35 +++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 21 ++++++++++---------- net/sunrpc/xprtrdma/transport.c | 8 -------- net/sunrpc/xprtrdma/verbs.c | 1 - 5 files changed, 46 insertions(+), 23 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 132c3c778a43..edb55eab4762 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -380,12 +380,8 @@ TRACE_EVENT(xprtrdma_inline_thresh, DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); -DEFINE_CONN_EVENT(flush_dct); -DEFINE_RXPRT_EVENT(xprtrdma_create); -DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 4747803b370e..fc8a969ba306 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -724,6 +724,41 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, + TP_PROTO( + const struct rpc_xprt *xprt + ), + + TP_ARGS(xprt), + + TP_STRUCT__entry( + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=[%s]:%s", __get_str(addr), __get_str(port)) +); + +#define DEFINE_RPC_XPRT_LIFETIME_EVENT(name) \ + DEFINE_EVENT(rpc_xprt_lifetime_class, \ + xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_RPC_XPRT_LIFETIME_EVENT(create); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); +DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); + DECLARE_EVENT_CLASS(rpc_xprt_event, TP_PROTO( const struct rpc_xprt *xprt, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 053de053a024..d5cc5db9dbf3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -663,6 +663,7 @@ static void xprt_autoclose(struct work_struct *work) container_of(work, struct rpc_xprt, task_cleanup); unsigned int pflags = memalloc_nofs_save(); + trace_xprt_disconnect_auto(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); @@ -677,7 +678,7 @@ static void xprt_autoclose(struct work_struct *work) */ void xprt_disconnect_done(struct rpc_xprt *xprt) { - dprintk("RPC: disconnected transport %p\n", xprt); + trace_xprt_disconnect_done(xprt); spin_lock(&xprt->transport_lock); xprt_clear_connected(xprt); xprt_clear_write_space_locked(xprt); @@ -694,6 +695,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done); */ void xprt_force_disconnect(struct rpc_xprt *xprt) { + trace_xprt_disconnect_force(xprt); + /* Don't race with the test_bit() in xprt_clear_locked() */ spin_lock(&xprt->transport_lock); set_bit(XPRT_CLOSE_WAIT, &xprt->state); @@ -832,8 +835,10 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { + trace_xprt_disconnect_cleanup(xprt); xprt->ops->close(xprt); + } if (!xprt_connected(xprt)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; @@ -1903,11 +1908,8 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) found: xprt = t->setup(args); - if (IS_ERR(xprt)) { - dprintk("RPC: xprt_create_transport: failed, %ld\n", - -PTR_ERR(xprt)); + if (IS_ERR(xprt)) goto out; - } if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT) xprt->idle_timeout = 0; INIT_WORK(&xprt->task_cleanup, xprt_autoclose); @@ -1928,8 +1930,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) rpc_xprt_debugfs_register(xprt); - dprintk("RPC: created transport %p with %u slots\n", xprt, - xprt->max_reqs); + trace_xprt_create(xprt); out: return xprt; } @@ -1939,6 +1940,8 @@ static void xprt_destroy_cb(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); + trace_xprt_destroy(xprt); + rpc_xprt_debugfs_unregister(xprt); rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); @@ -1963,8 +1966,6 @@ static void xprt_destroy_cb(struct work_struct *work) */ static void xprt_destroy(struct rpc_xprt *xprt) { - dprintk("RPC: destroying transport %p\n", xprt); - /* * Exclude transport connect/disconnect handlers and autoclose */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 659da37020a4..048c2fd85728 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -281,8 +281,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_op_destroy(r_xprt); - cancel_delayed_work_sync(&r_xprt->rx_connect_worker); rpcrdma_xprt_disconnect(r_xprt); @@ -365,10 +363,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; - dprintk("RPC: %s: %s:%s\n", __func__, - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); - trace_xprtrdma_create(new_xprt); return xprt; } @@ -385,8 +379,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_op_close(r_xprt); - rpcrdma_xprt_disconnect(r_xprt); xprt->reestablish_timeout = 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 05c4d3a9cda2..2ae348377806 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -141,7 +141,6 @@ void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS && r_xprt->rx_ep->re_connect_status == 1) { r_xprt->rx_ep->re_connect_status = -ECONNABORTED; - trace_xprtrdma_flush_dct(r_xprt, wc->status); xprt_force_disconnect(xprt); } } From 42aad0d7f9486b72155892c689bea2ff9793d8a8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:39 -0400 Subject: [PATCH 15/27] SUNRPC: trace RPC client lifetime events The "create" tracepoint records parts of the rpc_create arguments, and the shutdown tracepoint records when the rpc_clnt is about to signal pending tasks and destroy auths. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 111 ++++++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 39 +++++------- 2 files changed, 126 insertions(+), 24 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index fc8a969ba306..098c84750fb7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -65,6 +65,117 @@ DEFINE_RPCXDRBUF_EVENT(recvfrom); DEFINE_RPCXDRBUF_EVENT(reply_pages); +DECLARE_EVENT_CLASS(rpc_clnt_class, + TP_PROTO( + const struct rpc_clnt *clnt + ), + + TP_ARGS(clnt), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + ), + + TP_printk("clid=%u", __entry->client_id) +); + +#define DEFINE_RPC_CLNT_EVENT(name) \ + DEFINE_EVENT(rpc_clnt_class, \ + rpc_clnt_##name, \ + TP_PROTO( \ + const struct rpc_clnt *clnt \ + ), \ + TP_ARGS(clnt)) + +DEFINE_RPC_CLNT_EVENT(free); +DEFINE_RPC_CLNT_EVENT(killall); +DEFINE_RPC_CLNT_EVENT(shutdown); +DEFINE_RPC_CLNT_EVENT(release); +DEFINE_RPC_CLNT_EVENT(replace_xprt); +DEFINE_RPC_CLNT_EVENT(replace_xprt_err); + +TRACE_EVENT(rpc_clnt_new, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_xprt *xprt, + const char *program, + const char *server + ), + + TP_ARGS(clnt, xprt, program, server), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + __string(program, program) + __string(server, server) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + __assign_str(program, program) + __assign_str(server, server) + ), + + TP_printk("client=%u peer=[%s]:%s program=%s server=%s", + __entry->client_id, __get_str(addr), __get_str(port), + __get_str(program), __get_str(server)) +); + +TRACE_EVENT(rpc_clnt_new_err, + TP_PROTO( + const char *program, + const char *server, + int error + ), + + TP_ARGS(program, server, error), + + TP_STRUCT__entry( + __field(int, error) + __string(program, program) + __string(server, server) + ), + + TP_fast_assign( + __entry->error = error; + __assign_str(program, program) + __assign_str(server, server) + ), + + TP_printk("program=%s server=%s error=%d", + __get_str(program), __get_str(server), __entry->error) +); + +TRACE_EVENT(rpc_clnt_clone_err, + TP_PROTO( + const struct rpc_clnt *clnt, + int error + ), + + TP_ARGS(clnt, error), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + __field(int, error) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + __entry->error = error; + ), + + TP_printk("client=%u error=%d", __entry->client_id, __entry->error) +); + + TRACE_DEFINE_ENUM(RPC_AUTH_OK); TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDCRED); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0e684d93c9b1..4614a6eebfe1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -370,10 +370,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, const char *nodename = args->nodename; int err; - /* sanity check the name before trying to print it */ - dprintk("RPC: creating %s client for %s (xprt %p)\n", - program->name, args->servername, xprt); - err = rpciod_up(); if (err) goto out_no_rpciod; @@ -436,6 +432,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, goto out_no_path; if (parent) atomic_inc(&parent->cl_count); + + trace_rpc_clnt_new(clnt, xprt, program->name, args->servername); return clnt; out_no_path: @@ -450,6 +448,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, out_no_rpciod: xprt_switch_put(xps); xprt_put(xprt); + trace_rpc_clnt_new_err(program->name, args->servername, err); return ERR_PTR(err); } @@ -634,10 +633,8 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, args->nodename = clnt->cl_nodename; new = rpc_new_client(args, xps, xprt, clnt); - if (IS_ERR(new)) { - err = PTR_ERR(new); - goto out_err; - } + if (IS_ERR(new)) + return new; /* Turn off autobind on clones */ new->cl_autobind = 0; @@ -650,7 +647,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, return new; out_err: - dprintk("RPC: %s: returned error %d\n", __func__, err); + trace_rpc_clnt_clone_err(clnt, err); return ERR_PTR(err); } @@ -723,11 +720,8 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, int err; xprt = xprt_create_transport(args); - if (IS_ERR(xprt)) { - dprintk("RPC: failed to create new xprt for clnt %p\n", - clnt); + if (IS_ERR(xprt)) return PTR_ERR(xprt); - } xps = xprt_switch_alloc(xprt, GFP_KERNEL); if (xps == NULL) { @@ -767,7 +761,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_release_client(parent); xprt_switch_put(oldxps); xprt_put(old); - dprintk("RPC: replaced xprt for clnt %p\n", clnt); + trace_rpc_clnt_replace_xprt(clnt); return 0; out_revert: @@ -777,7 +771,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_client_register(clnt, pseudoflavor, NULL); xprt_switch_put(xps); xprt_put(xprt); - dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); + trace_rpc_clnt_replace_xprt_err(clnt); return err; } EXPORT_SYMBOL_GPL(rpc_switch_client_transport); @@ -844,10 +838,11 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) return; - dprintk("RPC: killing all tasks for client %p\n", clnt); + /* * Spin lock all_tasks to prevent changes... */ + trace_rpc_clnt_killall(clnt); spin_lock(&clnt->cl_lock); list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) rpc_signal_task(rovr); @@ -863,9 +858,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) { might_sleep(); - dprintk_rcu("RPC: shutting down %s client for %s\n", - clnt->cl_program->name, - rcu_dereference(clnt->cl_xprt)->servername); + trace_rpc_clnt_shutdown(clnt); while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); @@ -884,6 +877,8 @@ static void rpc_free_client_work(struct work_struct *work) { struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); + trace_rpc_clnt_free(clnt); + /* These might block on processes that might allocate memory, * so they cannot be called in rpciod, so they are handled separately * here. @@ -901,9 +896,7 @@ rpc_free_client(struct rpc_clnt *clnt) { struct rpc_clnt *parent = NULL; - dprintk_rcu("RPC: destroying %s client for %s\n", - clnt->cl_program->name, - rcu_dereference(clnt->cl_xprt)->servername); + trace_rpc_clnt_release(clnt); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; rpc_unregister_client(clnt); @@ -945,8 +938,6 @@ rpc_free_auth(struct rpc_clnt *clnt) void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p)\n", clnt); - do { if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); From eefc536dbdf1c55777bb192dfac2faf0993d8e10 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:44 -0400 Subject: [PATCH 16/27] SUNRPC: rpc_call_null_helper() already sets RPC_TASK_NULLCREDS Clean up. Commit a52458b48af1 ("NFS/NFSD/SUNRPC: replace generic creds with 'struct cred'.") made rpc_call_null_helper() set RPC_TASK_NULLCREDS unconditionally. Therefore there's no need for rpc_call_null_helper()'s call sites to set RPC_TASK_NULLCREDS. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4614a6eebfe1..db2eaac1c1ef 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2816,7 +2816,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, } task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, + RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); rpc_put_task(task); @@ -2860,7 +2860,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, /* Test the connection */ task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, + RPC_TASK_SOFT | RPC_TASK_SOFTCONN, NULL, NULL); if (IS_ERR(task)) { status = PTR_ERR(task); From 6fc3737aac19c81181d5ec3f9c86e324e85e733a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:50 -0400 Subject: [PATCH 17/27] SUNRPC: rpc_call_null_helper() should set RPC_TASK_SOFT Clean up. All of rpc_call_null_helper() call sites assert RPC_TASK_SOFT, so move that setting into rpc_call_null_helper() itself. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/clnt.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 429f17459ae3..4ecc2a959567 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1288,7 +1288,7 @@ gss_send_destroy_context(struct rpc_cred *cred) trace_rpcgss_ctx_destroy(gss_cred); task = rpc_call_null(gss_auth->client, &new->gc_base, - RPC_TASK_ASYNC|RPC_TASK_SOFT); + RPC_TASK_ASYNC); if (!IS_ERR(task)) rpc_put_task(task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index db2eaac1c1ef..056606839f52 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2752,7 +2752,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, .rpc_op_cred = cred, .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, .callback_data = data, - .flags = flags | RPC_TASK_NULLCREDS, + .flags = flags | RPC_TASK_SOFT | RPC_TASK_NULLCREDS, }; return rpc_run_task(&task_setup_data); @@ -2816,7 +2816,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, } task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, + RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); rpc_put_task(task); @@ -2859,8 +2859,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, goto out_err; /* Test the connection */ - task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_SOFTCONN, NULL, NULL); if (IS_ERR(task)) { status = PTR_ERR(task); From 841a2ed9a13d0d54ec7f961c54e7b52ffa97c8bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:13:55 -0400 Subject: [PATCH 18/27] SUNRPC: Set SOFTCONN when destroying GSS contexts Move the RPC_TASK_SOFTCONN flag into rpc_call_null_helper(). The only minor behavior change is that it is now also set when destroying GSS contexts. This gives a better guarantee that gss_send_destroy_context() will not hang for long if a connection cannot be established. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 056606839f52..a91d1cdad9d7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2752,7 +2752,8 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, .rpc_op_cred = cred, .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, .callback_data = data, - .flags = flags | RPC_TASK_SOFT | RPC_TASK_NULLCREDS, + .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN | + RPC_TASK_NULLCREDS, }; return rpc_run_task(&task_setup_data); @@ -2815,8 +2816,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, goto success; } - task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, + task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); rpc_put_task(task); @@ -2859,8 +2859,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, goto out_err; /* Test the connection */ - task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_SOFTCONN, - NULL, NULL); + task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL); if (IS_ERR(task)) { status = PTR_ERR(task); goto out_err; From 5be5945864ea143fda628e8179c8474457af1f43 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:14:00 -0400 Subject: [PATCH 19/27] NFS: nfs_xdr_status should record the procedure name When sunrpc trace points are not enabled, the recorded task ID information alone is not helpful. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 7e7a97ae21ed..e90651431804 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1312,7 +1312,12 @@ TRACE_EVENT(nfs_xdr_status, __field(unsigned int, task_id) __field(unsigned int, client_id) __field(u32, xid) + __field(int, version) __field(unsigned long, error) + __string(program, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) ), TP_fast_assign( @@ -1322,13 +1327,19 @@ TRACE_EVENT(nfs_xdr_status, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->version = task->tk_client->cl_vers; __entry->error = error; + __assign_str(program, + task->tk_client->cl_program->name) + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) ), TP_printk( - "task:%u@%d xid=0x%08x error=%ld (%s)", + "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, - -__entry->error, nfs_show_status(__entry->error) + __get_str(program), __entry->version, + __get_str(procedure), -__entry->error, + nfs_show_status(__entry->error) ) ); From fd2b6121415539c394f7c610da5ffa6df919db46 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:14:05 -0400 Subject: [PATCH 20/27] NFS: Trace short NFS READs A short read can generate an -EIO error without there being an error on the wire. This tracepoint acts as an eyecatcher when there is no obvious I/O error. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/read.c | 2 ++ 2 files changed, 49 insertions(+) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index e90651431804..b35998c5c9ca 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -961,6 +961,53 @@ TRACE_EVENT(nfs_readpage_done, ) ); +TRACE_EVENT(nfs_readpage_short, + TP_PROTO( + const struct rpc_task *task, + const struct nfs_pgio_header *hdr + ), + + TP_ARGS(task, hdr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(bool, eof) + __field(int, status) + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->eof = hdr->res.eof; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u status=%d%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, + __entry->eof ? " eof" : "" + ) +); + TRACE_DEFINE_ENUM(NFS_UNSTABLE); TRACE_DEFINE_ENUM(NFS_DATA_SYNC); TRACE_DEFINE_ENUM(NFS_FILE_SYNC); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 13b22e898116..eb854f1f86e2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -264,6 +264,8 @@ static void nfs_readpage_retry(struct rpc_task *task, /* This is a short read! */ nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD); + trace_nfs_readpage_short(task, hdr); + /* Has the server at least made some progress? */ if (resp->count == 0) { nfs_set_pgio_error(hdr, -EIO, argp->offset); From cd2ed9bdc0794464d57a5ae2d979b1543a75694b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 12 May 2020 17:14:11 -0400 Subject: [PATCH 21/27] NFS: Add a tracepoint in nfs_set_pgio_error() Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pagelist.c | 2 ++ 2 files changed, 46 insertions(+) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index b35998c5c9ca..547cec79899f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1008,6 +1008,50 @@ TRACE_EVENT(nfs_readpage_short, ) ); +TRACE_EVENT(nfs_pgio_error, + TP_PROTO( + const struct nfs_pgio_header *hdr, + int error, + loff_t pos + ), + + TP_ARGS(hdr, error, pos), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(loff_t, pos) + __field(int, status) + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = error; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + ), + + TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u pos=%llu status=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, __entry->fhandle, + (long long)__entry->offset, __entry->arg_count, __entry->res_count, + __entry->pos, __entry->status + ) +); + TRACE_DEFINE_ENUM(NFS_UNSTABLE); TRACE_DEFINE_ENUM(NFS_DATA_SYNC); TRACE_DEFINE_ENUM(NFS_FILE_SYNC); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6ca421cbe19c..6ea4cac41e46 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -24,6 +24,7 @@ #include "internal.h" #include "pnfs.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -64,6 +65,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { unsigned int new = pos - hdr->io_start; + trace_nfs_pgio_error(hdr, error, pos); if (hdr->good_bytes > new) { hdr->good_bytes = new; clear_bit(NFS_IOHDR_EOF, &hdr->flags); From 2ac3ddc723469c66846262e1d7954199ef95d6dc Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 8 May 2020 09:33:00 +0800 Subject: [PATCH 22/27] sunrpc: add missing newline when printing parameter 'auth_hashtable_size' by sysfs When I cat parameter '/sys/module/sunrpc/parameters/auth_hashtable_size', it displays as follows. It is better to add a newline for easy reading. [root@hulk-202 ~]# cat /sys/module/sunrpc/parameters/auth_hashtable_size 16[root@hulk-202 ~]# Signed-off-by: Xiongfeng Wang Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 5748ad0ba1bd..a9f0d17fdb0d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -81,7 +81,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp) unsigned int nbits; nbits = *(unsigned int *)kp->arg; - return sprintf(buffer, "%u", 1U << nbits); + return sprintf(buffer, "%u\n", 1U << nbits); } #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); From 86b936672e55388bd5674e50e5ff4ef95f3477b8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 May 2020 13:56:11 +0100 Subject: [PATCH 23/27] NFS: remove redundant initialization of variable result The variable result is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a57e7c72c7f4..bb63e2b93ff7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -446,7 +446,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL, requested; + ssize_t result, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); From 3a39e778690500066b31fe982d18e2e394d3bce2 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 21 May 2020 17:17:21 +0800 Subject: [PATCH 24/27] nfs: set invalid blocks after NFSv4 writes Use the following command to test nfsv4(size of file1M is 1MB): mount -t nfs -o vers=4.0,actimeo=60 127.0.0.1/dir1 /mnt cp file1M /mnt du -h /mnt/file1M -->0 within 60s, then 1M When write is done(cp file1M /mnt), will call this: nfs_writeback_done nfs4_write_done nfs4_write_done_cb nfs_writeback_update_inode nfs_post_op_update_inode_force_wcc_locked(change, ctime, mtime nfs_post_op_update_inode_force_wcc_locked nfs_set_cache_invalid nfs_refresh_inode_locked nfs_update_inode nfsd write response contains change, ctime, mtime, the flag will be clear after nfs_update_inode. Howerver, write response does not contain space_used, previous open response contains space_used whose value is 0, so inode->i_blocks is still 0. nfs_getattr -->called by "du -h" do_update |= force_sync || nfs_attribute_cache_expired -->false in 60s cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) do_update |= cache_validity & (NFS_INO_INVALID_ATTR -->false if (do_update) { __nfs_revalidate_inode } Within 60s, does not send getattr request to nfsd, thus "du -h /mnt/file1M" is 0. Add a NFS_INO_INVALID_BLOCKS flag, set it when nfsv4 write is done. Fixes: 16e143751727 ("NFS: More fine grained attribute tracking") Signed-off-by: Zheng Bin Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 14 +++++++++++--- include/linux/nfs_fs.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9d0921cb4fe..0bf1f835de01 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -833,6 +833,8 @@ int nfs_getattr(const struct path *path, struct kstat *stat, do_update |= cache_validity & NFS_INO_INVALID_ATIME; if (request_mask & (STATX_CTIME|STATX_MTIME)) do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; + if (request_mask & STATX_BLOCKS) + do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; if (do_update) { /* Update the attribute cache */ if (!(server->flags & NFS_MOUNT_NOAC)) @@ -1764,7 +1766,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_MTIME); + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_BLOCKS); return status; } @@ -1871,7 +1874,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_BLOCKS); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -2033,8 +2037,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - else + else { + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_BLOCKS + | NFS_INO_REVAL_FORCED); cache_revalidated = false; + } /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 73eda45f1cfd..6ee9119acc5d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -230,6 +230,7 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ #define NFS_INO_DATA_INVAL_DEFER \ BIT(13) /* Deferred cache invalidation */ +#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ From 5bffb00621b121c9f4726f1e755ad4246cf4b800 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 15:10:02 +0800 Subject: [PATCH 25/27] xprtrdma: Make xprt_rdma_slot_table_entries static Fix the following sparse warning: net/sunrpc/xprtrdma/transport.c:71:14: warning: symbol 'xprt_rdma_slot_table_entries' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Zou Wei Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 048c2fd85728..0c4af7f5e241 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -68,7 +68,7 @@ * tunables */ -unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; From 94afd9c489fe6720ea3f49084125344e22610880 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 May 2020 10:13:02 -0400 Subject: [PATCH 26/27] SUNRPC: rpc_xprt lifetime events should record xprt->state Help troubleshoot the logic that uses these flags. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 098c84750fb7..73193c79fcaa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -835,6 +835,30 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_DEFINE_ENUM(XPRT_LOCKED); +TRACE_DEFINE_ENUM(XPRT_CONNECTED); +TRACE_DEFINE_ENUM(XPRT_CONNECTING); +TRACE_DEFINE_ENUM(XPRT_CLOSE_WAIT); +TRACE_DEFINE_ENUM(XPRT_BOUND); +TRACE_DEFINE_ENUM(XPRT_BINDING); +TRACE_DEFINE_ENUM(XPRT_CLOSING); +TRACE_DEFINE_ENUM(XPRT_CONGESTED); +TRACE_DEFINE_ENUM(XPRT_CWND_WAIT); +TRACE_DEFINE_ENUM(XPRT_WRITE_SPACE); + +#define rpc_show_xprt_state(x) \ + __print_flags(x, "|", \ + { (1UL << XPRT_LOCKED), "LOCKED"}, \ + { (1UL << XPRT_CONNECTED), "CONNECTED"}, \ + { (1UL << XPRT_CONNECTING), "CONNECTING"}, \ + { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \ + { (1UL << XPRT_BOUND), "BOUND"}, \ + { (1UL << XPRT_BINDING), "BINDING"}, \ + { (1UL << XPRT_CLOSING), "CLOSING"}, \ + { (1UL << XPRT_CONGESTED), "CONGESTED"}, \ + { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \ + { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"}) + DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_PROTO( const struct rpc_xprt *xprt @@ -843,16 +867,20 @@ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_ARGS(xprt), TP_STRUCT__entry( + __field(unsigned long, state) __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) ), TP_fast_assign( + __entry->state = xprt->state; __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); ), - TP_printk("peer=[%s]:%s", __get_str(addr), __get_str(port)) + TP_printk("peer=[%s]:%s state=%s", + __get_str(addr), __get_str(port), + rpc_show_xprt_state(__entry->state)) ); #define DEFINE_RPC_XPRT_LIFETIME_EVENT(name) \ From ba838a75e73f55a780f1ee896b8e3ecb032dba0f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 29 May 2020 14:14:40 -0400 Subject: [PATCH 27/27] NFS: Fix direct WRITE throughput regression I measured a 50% throughput regression for large direct writes. The observed on-the-wire behavior is that the client sends every NFS WRITE twice: once as an UNSTABLE WRITE plus a COMMIT, and once as a FILE_SYNC WRITE. This is because the nfs_write_match_verf() check in nfs_direct_commit_complete() fails for every WRITE. Buffered writes use nfs_write_completion(), which sets req->wb_verf correctly. Direct writes use nfs_direct_write_completion(), which does not set req->wb_verf at all. This leaves req->wb_verf set to all zeroes for every direct WRITE, and thus nfs_direct_commit_completion() always sets NFS_ODIRECT_RESCHED_WRITES. This fix appears to restore nearly all of the lost performance. Fixes: 1f28476dcb98 ("NFS: Fix O_DIRECT commit verifier handling") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bb63e2b93ff7..1b79dd5cf661 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -731,6 +731,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (request_commit) { kref_get(&req->wb_kref); + memcpy(&req->wb_verf, &hdr->verf.verifier, + sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->ds_commit_idx); }