linux_dsm_epyc7002/include/trace/events/sunrpc.h

606 lines
15 KiB
C
Raw Normal View History

#undef TRACE_SYSTEM
#define TRACE_SYSTEM sunrpc
#if !defined(_TRACE_SUNRPC_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SUNRPC_H
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/xprtsock.h>
#include <linux/sunrpc/svc_xprt.h>
#include <net/tcp_states.h>
#include <linux/net.h>
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(rpc_task_status,
TP_PROTO(struct rpc_task *task),
TP_ARGS(task),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, status)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->status = task->tk_status;
),
TP_printk("task:%u@%u, status %d",
__entry->task_id, __entry->client_id,
__entry->status)
);
DEFINE_EVENT(rpc_task_status, rpc_call_status,
TP_PROTO(struct rpc_task *task),
TP_ARGS(task)
);
DEFINE_EVENT(rpc_task_status, rpc_bind_status,
TP_PROTO(struct rpc_task *task),
TP_ARGS(task)
);
TRACE_EVENT(rpc_connect_status,
TP_PROTO(struct rpc_task *task, int status),
TP_ARGS(task, status),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(int, status)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->status = status;
),
TP_printk("task:%u@%u, status %d",
__entry->task_id, __entry->client_id,
__entry->status)
);
DECLARE_EVENT_CLASS(rpc_task_running,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
TP_ARGS(clnt, task, action),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, action)
__field(unsigned long, runstate)
__field(int, status)
__field(unsigned short, flags)
),
TP_fast_assign(
SUNRPC: Fix oops when trace sunrpc_task events in nfs client When tracking sunrpc_task events in nfs client, the clnt pointer may be NULL. [ 139.269266] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 139.269915] IP: [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] PGD 1d293067 PUD 1d294067 PMD 0 [ 139.269915] Oops: 0000 [#1] SMP [ 139.269915] Modules linked in: nfsv4 dns_resolver nfs lockd sunrpc fscache sg ppdev e1000 serio_raw pcspkr parport_pc parport i2c_piix4 i2c_core microcode xfs libcrc32c sd_mod sr_mod cdrom ata_generic crc_t10dif crct10dif_common pata_acpi ahci libahci ata_piix libata dm_mirror dm_region_hash dm_log dm_mod [ 139.269915] CPU: 0 PID: 59 Comm: kworker/0:2 Not tainted 3.10.0-84.el7.x86_64 #1 [ 139.269915] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 139.269915] Workqueue: rpciod rpc_async_schedule [sunrpc] [ 139.269915] task: ffff88001b598000 ti: ffff88001b632000 task.ti: ffff88001b632000 [ 139.269915] RIP: 0010:[<ffffffffa026f216>] [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP: 0018:ffff88001b633d70 EFLAGS: 00010206 [ 139.269915] RAX: ffff88001dfc5338 RBX: ffff88001cc37a00 RCX: ffff88001dfc5334 [ 139.269915] RDX: ffff88001dfc5338 RSI: 0000000000000000 RDI: ffff88001dfc533c [ 139.269915] RBP: ffff88001b633db0 R08: 000000000000002c R09: 000000000000000a [ 139.269915] R10: 0000000000062180 R11: 00000020759fb9dc R12: ffffffffa0292c20 [ 139.269915] R13: ffff88001dfc5334 R14: 0000000000000000 R15: 0000000000000000 [ 139.269915] FS: 0000000000000000(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 139.269915] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 139.269915] CR2: 0000000000000004 CR3: 000000001d290000 CR4: 00000000000006f0 [ 139.269915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 139.269915] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 139.269915] Stack: [ 139.269915] 000000001b633d98 0000000000000246 ffff88001df1dc00 ffff88001cc37a00 [ 139.269915] ffff88001bc35e60 0000000000000000 ffff88001ffa0a48 ffff88001bc35ee0 [ 139.269915] ffff88001b633e08 ffffffffa02704b5 0000000000010000 ffff88001cc37a70 [ 139.269915] Call Trace: [ 139.269915] [<ffffffffa02704b5>] __rpc_execute+0x1d5/0x400 [sunrpc] [ 139.269915] [<ffffffffa0270706>] rpc_async_schedule+0x26/0x30 [sunrpc] [ 139.269915] [<ffffffff8107867b>] process_one_work+0x17b/0x460 [ 139.269915] [<ffffffff8107942b>] worker_thread+0x11b/0x400 [ 139.269915] [<ffffffff81079310>] ? rescuer_thread+0x3e0/0x3e0 [ 139.269915] [<ffffffff8107fc80>] kthread+0xc0/0xd0 [ 139.269915] [<ffffffff8107fbc0>] ? kthread_create_on_node+0x110/0x110 [ 139.269915] [<ffffffff815d122c>] ret_from_fork+0x7c/0xb0 [ 139.269915] [<ffffffff8107fbc0>] ? kthread_create_on_node+0x110/0x110 [ 139.269915] Code: 4c 8b 45 c8 48 8d 7d d0 89 4d c4 41 89 c9 b9 28 00 00 00 e8 9d b4 e9 e0 48 85 c0 49 89 c5 74 a2 48 89 c7 e8 9d 3f e9 e0 48 89 c2 <41> 8b 46 04 48 8b 7d d0 4c 89 e9 4c 89 e6 89 42 0c 0f b7 83 d4 [ 139.269915] RIP [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP <ffff88001b633d70> [ 139.269915] CR2: 0000000000000004 [ 140.946406] ---[ end trace ba486328b98d7622 ]--- Signed-off-by: Ditang Chen <chendt.fnst@cn.fujitsu.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-03-07 12:27:57 +07:00
__entry->client_id = clnt ? clnt->cl_clid : -1;
__entry->task_id = task->tk_pid;
__entry->action = action;
__entry->runstate = task->tk_runstate;
__entry->status = task->tk_status;
__entry->flags = task->tk_flags;
),
SUNRPC: Fix oops when trace sunrpc_task events in nfs client When tracking sunrpc_task events in nfs client, the clnt pointer may be NULL. [ 139.269266] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 139.269915] IP: [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] PGD 1d293067 PUD 1d294067 PMD 0 [ 139.269915] Oops: 0000 [#1] SMP [ 139.269915] Modules linked in: nfsv4 dns_resolver nfs lockd sunrpc fscache sg ppdev e1000 serio_raw pcspkr parport_pc parport i2c_piix4 i2c_core microcode xfs libcrc32c sd_mod sr_mod cdrom ata_generic crc_t10dif crct10dif_common pata_acpi ahci libahci ata_piix libata dm_mirror dm_region_hash dm_log dm_mod [ 139.269915] CPU: 0 PID: 59 Comm: kworker/0:2 Not tainted 3.10.0-84.el7.x86_64 #1 [ 139.269915] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 139.269915] Workqueue: rpciod rpc_async_schedule [sunrpc] [ 139.269915] task: ffff88001b598000 ti: ffff88001b632000 task.ti: ffff88001b632000 [ 139.269915] RIP: 0010:[<ffffffffa026f216>] [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP: 0018:ffff88001b633d70 EFLAGS: 00010206 [ 139.269915] RAX: ffff88001dfc5338 RBX: ffff88001cc37a00 RCX: ffff88001dfc5334 [ 139.269915] RDX: ffff88001dfc5338 RSI: 0000000000000000 RDI: ffff88001dfc533c [ 139.269915] RBP: ffff88001b633db0 R08: 000000000000002c R09: 000000000000000a [ 139.269915] R10: 0000000000062180 R11: 00000020759fb9dc R12: ffffffffa0292c20 [ 139.269915] R13: ffff88001dfc5334 R14: 0000000000000000 R15: 0000000000000000 [ 139.269915] FS: 0000000000000000(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 139.269915] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 139.269915] CR2: 0000000000000004 CR3: 000000001d290000 CR4: 00000000000006f0 [ 139.269915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 139.269915] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 139.269915] Stack: [ 139.269915] 000000001b633d98 0000000000000246 ffff88001df1dc00 ffff88001cc37a00 [ 139.269915] ffff88001bc35e60 0000000000000000 ffff88001ffa0a48 ffff88001bc35ee0 [ 139.269915] ffff88001b633e08 ffffffffa02704b5 0000000000010000 ffff88001cc37a70 [ 139.269915] Call Trace: [ 139.269915] [<ffffffffa02704b5>] __rpc_execute+0x1d5/0x400 [sunrpc] [ 139.269915] [<ffffffffa0270706>] rpc_async_schedule+0x26/0x30 [sunrpc] [ 139.269915] [<ffffffff8107867b>] process_one_work+0x17b/0x460 [ 139.269915] [<ffffffff8107942b>] worker_thread+0x11b/0x400 [ 139.269915] [<ffffffff81079310>] ? rescuer_thread+0x3e0/0x3e0 [ 139.269915] [<ffffffff8107fc80>] kthread+0xc0/0xd0 [ 139.269915] [<ffffffff8107fbc0>] ? kthread_create_on_node+0x110/0x110 [ 139.269915] [<ffffffff815d122c>] ret_from_fork+0x7c/0xb0 [ 139.269915] [<ffffffff8107fbc0>] ? kthread_create_on_node+0x110/0x110 [ 139.269915] Code: 4c 8b 45 c8 48 8d 7d d0 89 4d c4 41 89 c9 b9 28 00 00 00 e8 9d b4 e9 e0 48 85 c0 49 89 c5 74 a2 48 89 c7 e8 9d 3f e9 e0 48 89 c2 <41> 8b 46 04 48 8b 7d d0 4c 89 e9 4c 89 e6 89 42 0c 0f b7 83 d4 [ 139.269915] RIP [<ffffffffa026f216>] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP <ffff88001b633d70> [ 139.269915] CR2: 0000000000000004 [ 140.946406] ---[ end trace ba486328b98d7622 ]--- Signed-off-by: Ditang Chen <chendt.fnst@cn.fujitsu.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-03-07 12:27:57 +07:00
TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf",
__entry->task_id, __entry->client_id,
__entry->flags,
__entry->runstate,
__entry->status,
__entry->action
)
);
DEFINE_EVENT(rpc_task_running, rpc_task_begin,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
TP_ARGS(clnt, task, action)
);
DEFINE_EVENT(rpc_task_running, rpc_task_run_action,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
TP_ARGS(clnt, task, action)
);
DEFINE_EVENT(rpc_task_running, rpc_task_complete,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
TP_ARGS(clnt, task, action)
);
DECLARE_EVENT_CLASS(rpc_task_queued,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
TP_ARGS(clnt, task, q),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(unsigned long, timeout)
__field(unsigned long, runstate)
__field(int, status)
__field(unsigned short, flags)
__string(q_name, rpc_qname(q))
),
TP_fast_assign(
__entry->client_id = clnt->cl_clid;
__entry->task_id = task->tk_pid;
__entry->timeout = task->tk_timeout;
__entry->runstate = task->tk_runstate;
__entry->status = task->tk_status;
__entry->flags = task->tk_flags;
__assign_str(q_name, rpc_qname(q));
),
TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id,
__entry->flags,
__entry->runstate,
__entry->status,
__entry->timeout,
__get_str(q_name)
)
);
DEFINE_EVENT(rpc_task_queued, rpc_task_sleep,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
TP_ARGS(clnt, task, q)
);
DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup,
TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
TP_ARGS(clnt, task, q)
);
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
*/
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
#define RPC_SHOW_SOCKET \
EM( SS_FREE, "FREE" ) \
EM( SS_UNCONNECTED, "UNCONNECTED" ) \
EM( SS_CONNECTING, "CONNECTING," ) \
EM( SS_CONNECTED, "CONNECTED," ) \
EMe(SS_DISCONNECTING, "DISCONNECTING" )
#define rpc_show_socket_state(state) \
__print_symbolic(state, RPC_SHOW_SOCKET)
RPC_SHOW_SOCKET
#define RPC_SHOW_SOCK \
EM( TCP_ESTABLISHED, "ESTABLISHED" ) \
EM( TCP_SYN_SENT, "SYN_SENT" ) \
EM( TCP_SYN_RECV, "SYN_RECV" ) \
EM( TCP_FIN_WAIT1, "FIN_WAIT1" ) \
EM( TCP_FIN_WAIT2, "FIN_WAIT2" ) \
EM( TCP_TIME_WAIT, "TIME_WAIT" ) \
EM( TCP_CLOSE, "CLOSE" ) \
EM( TCP_CLOSE_WAIT, "CLOSE_WAIT" ) \
EM( TCP_LAST_ACK, "LAST_ACK" ) \
EM( TCP_LISTEN, "LISTEN" ) \
EMe( TCP_CLOSING, "CLOSING" )
#define rpc_show_sock_state(state) \
__print_symbolic(state, RPC_SHOW_SOCK)
RPC_SHOW_SOCK
/*
* Now redefine the EM() and EMe() macros to map the enums to the strings
* that will be printed in the output.
*/
#undef EM
#undef EMe
#define EM(a, b) {a, b},
#define EMe(a, b) {a, b}
DECLARE_EVENT_CLASS(xs_socket_event,
TP_PROTO(
struct rpc_xprt *xprt,
struct socket *socket
),
TP_ARGS(xprt, socket),
TP_STRUCT__entry(
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
__string(dstaddr,
xprt->address_strings[RPC_DISPLAY_ADDR])
__string(dstport,
xprt->address_strings[RPC_DISPLAY_PORT])
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
__assign_str(dstaddr,
xprt->address_strings[RPC_DISPLAY_ADDR]);
__assign_str(dstport,
xprt->address_strings[RPC_DISPLAY_PORT]);
),
TP_printk(
"socket:[%llu] dstaddr=%s/%s "
"state=%u (%s) sk_state=%u (%s)",
__entry->ino, __get_str(dstaddr), __get_str(dstport),
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
rpc_show_sock_state(__entry->sock_state)
)
);
#define DEFINE_RPC_SOCKET_EVENT(name) \
DEFINE_EVENT(xs_socket_event, name, \
TP_PROTO( \
struct rpc_xprt *xprt, \
struct socket *socket \
), \
TP_ARGS(xprt, socket))
DECLARE_EVENT_CLASS(xs_socket_event_done,
TP_PROTO(
struct rpc_xprt *xprt,
struct socket *socket,
int error
),
TP_ARGS(xprt, socket, error),
TP_STRUCT__entry(
__field(int, error)
__field(unsigned int, socket_state)
__field(unsigned int, sock_state)
__field(unsigned long long, ino)
__string(dstaddr,
xprt->address_strings[RPC_DISPLAY_ADDR])
__string(dstport,
xprt->address_strings[RPC_DISPLAY_PORT])
),
TP_fast_assign(
struct inode *inode = SOCK_INODE(socket);
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->ino = (unsigned long long)inode->i_ino;
__entry->error = error;
__assign_str(dstaddr,
xprt->address_strings[RPC_DISPLAY_ADDR]);
__assign_str(dstport,
xprt->address_strings[RPC_DISPLAY_PORT]);
),
TP_printk(
"error=%d socket:[%llu] dstaddr=%s/%s "
"state=%u (%s) sk_state=%u (%s)",
__entry->error,
__entry->ino, __get_str(dstaddr), __get_str(dstport),
__entry->socket_state,
rpc_show_socket_state(__entry->socket_state),
__entry->sock_state,
rpc_show_sock_state(__entry->sock_state)
)
);
#define DEFINE_RPC_SOCKET_EVENT_DONE(name) \
DEFINE_EVENT(xs_socket_event_done, name, \
TP_PROTO( \
struct rpc_xprt *xprt, \
struct socket *socket, \
int error \
), \
TP_ARGS(xprt, socket, error))
DEFINE_RPC_SOCKET_EVENT(rpc_socket_state_change);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_connect);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_error);
DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_close);
DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown);
DECLARE_EVENT_CLASS(rpc_xprt_event,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status),
TP_STRUCT__entry(
__field(__be32, xid)
__field(int, status)
__string(addr, xprt->address_strings[RPC_DISPLAY_ADDR])
__string(port, xprt->address_strings[RPC_DISPLAY_PORT])
),
TP_fast_assign(
__entry->xid = xid;
__entry->status = status;
__assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
),
TP_printk("peer=[%s]:%s xid=0x%x status=%d", __get_str(addr),
__get_str(port), be32_to_cpu(__entry->xid),
__entry->status)
);
DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
DEFINE_EVENT(rpc_xprt_event, xprt_transmit,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
TRACE_EVENT(xs_tcp_data_ready,
TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total),
TP_ARGS(xprt, err, total),
TP_STRUCT__entry(
__field(int, err)
__field(unsigned int, total)
__string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] :
"(null)")
__string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] :
"(null)")
),
TP_fast_assign(
__entry->err = err;
__entry->total = total;
__assign_str(addr, xprt ?
xprt->address_strings[RPC_DISPLAY_ADDR] : "(null)");
__assign_str(port, xprt ?
xprt->address_strings[RPC_DISPLAY_PORT] : "(null)");
),
TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr),
__get_str(port), __entry->err, __entry->total)
);
#define rpc_show_sock_xprt_flags(flags) \
__print_flags(flags, "|", \
{ TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
{ TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
{ TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
{ TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
{ TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
{ TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
{ TCP_RPC_REPLY, "TCP_RPC_REPLY" })
TRACE_EVENT(xs_tcp_data_recv,
TP_PROTO(struct sock_xprt *xs),
TP_ARGS(xs),
TP_STRUCT__entry(
__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
__field(__be32, xid)
__field(unsigned long, flags)
__field(unsigned long, copied)
__field(unsigned int, reclen)
__field(unsigned long, offset)
),
TP_fast_assign(
__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
__entry->xid = xs->tcp_xid;
__entry->flags = xs->tcp_flags;
__entry->copied = xs->tcp_copied;
__entry->reclen = xs->tcp_reclen;
__entry->offset = xs->tcp_offset;
),
TP_printk("peer=[%s]:%s xid=0x%x flags=%s copied=%lu reclen=%u offset=%lu",
__get_str(addr), __get_str(port), be32_to_cpu(__entry->xid),
rpc_show_sock_xprt_flags(__entry->flags),
__entry->copied, __entry->reclen, __entry->offset)
);
#define show_rqstp_flags(flags) \
__print_flags(flags, "|", \
{ (1UL << RQ_SECURE), "RQ_SECURE"}, \
{ (1UL << RQ_LOCAL), "RQ_LOCAL"}, \
{ (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \
{ (1UL << RQ_DROPME), "RQ_DROPME"}, \
{ (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}, \
sunrpc: convert to lockless lookup of queued server threads Testing has shown that the pool->sp_lock can be a bottleneck on a busy server. Every time data is received on a socket, the server must take that lock in order to dequeue a thread from the sp_threads list. Address this problem by eliminating the sp_threads list (which contains threads that are currently idle) and replacing it with a RQ_BUSY flag in svc_rqst. This allows us to walk the sp_all_threads list under the rcu_read_lock and find a suitable thread for the xprt by doing a test_and_set_bit. Note that we do still have a potential atomicity problem however with this approach. We don't want svc_xprt_do_enqueue to set the rqst->rq_xprt pointer unless a test_and_set_bit of RQ_BUSY returned zero (which indicates that the thread was idle). But, by the time we check that, the bit could be flipped by a waking thread. To address this, we acquire a new per-rqst spinlock (rq_lock) and take that before doing the test_and_set_bit. If that returns false, then we can set rq_xprt and drop the spinlock. Then, when the thread wakes up, it must set the bit under the same spinlock and can trust that if it was already set then the rq_xprt is also properly set. With this scheme, the case where we have an idle thread no longer needs to take the highly contended pool->sp_lock at all, and that removes the bottleneck. That still leaves one issue: What of the case where we walk the whole sp_all_threads list and don't find an idle thread? Because the search is lockess, it's possible for the queueing to race with a thread that is going to sleep. To address that, we queue the xprt and then search again. If we find an idle thread at that point, we can't attach the xprt to it directly since that might race with a different thread waking up and finding it. All we can do is wake the idle thread back up and let it attempt to find the now-queued xprt. Signed-off-by: Jeff Layton <jlayton@primarydata.com> Tested-by: Chris Worley <chris.worley@primarydata.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
2014-11-22 02:19:30 +07:00
{ (1UL << RQ_VICTIM), "RQ_VICTIM"}, \
{ (1UL << RQ_BUSY), "RQ_BUSY"})
TRACE_EVENT(svc_recv,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status),
TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(__be32, xid)
__field(int, status)
__field(unsigned long, flags)
),
TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? rqst->rq_xid : 0;
__entry->status = status;
__entry->flags = rqst->rq_flags;
),
TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
be32_to_cpu(__entry->xid), __entry->status,
show_rqstp_flags(__entry->flags))
);
DECLARE_EVENT_CLASS(svc_rqst_status,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status),
TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(__be32, xid)
__field(int, dropme)
__field(int, status)
__field(unsigned long, flags)
),
TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = rqst->rq_xid;
__entry->status = status;
__entry->flags = rqst->rq_flags;
),
TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
__entry->addr, be32_to_cpu(__entry->xid),
__entry->status, show_rqstp_flags(__entry->flags))
);
DEFINE_EVENT(svc_rqst_status, svc_process,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status));
DEFINE_EVENT(svc_rqst_status, svc_send,
TP_PROTO(struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status));
#define show_svc_xprt_flags(flags) \
__print_flags(flags, "|", \
{ (1UL << XPT_BUSY), "XPT_BUSY"}, \
{ (1UL << XPT_CONN), "XPT_CONN"}, \
{ (1UL << XPT_CLOSE), "XPT_CLOSE"}, \
{ (1UL << XPT_DATA), "XPT_DATA"}, \
{ (1UL << XPT_TEMP), "XPT_TEMP"}, \
{ (1UL << XPT_DEAD), "XPT_DEAD"}, \
{ (1UL << XPT_CHNGBUF), "XPT_CHNGBUF"}, \
{ (1UL << XPT_DEFERRED), "XPT_DEFERRED"}, \
{ (1UL << XPT_OLD), "XPT_OLD"}, \
{ (1UL << XPT_LISTENER), "XPT_LISTENER"}, \
{ (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \
{ (1UL << XPT_LOCAL), "XPT_LOCAL"})
TRACE_EVENT(svc_xprt_do_enqueue,
TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
TP_ARGS(xprt, rqst),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(struct svc_rqst *, rqst)
),
TP_fast_assign(
__entry->xprt = xprt;
__entry->rqst = rqst;
),
TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->xprt->xpt_remote,
__entry->rqst ? __entry->rqst->rq_task->pid : 0,
show_svc_xprt_flags(__entry->xprt->xpt_flags))
);
TRACE_EVENT(svc_xprt_dequeue,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field_struct(struct sockaddr_storage, ss)
__field(unsigned long, flags)
),
TP_fast_assign(
__entry->xprt = xprt,
xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
__entry->flags = xprt ? xprt->xpt_flags : 0;
),
TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->ss,
show_svc_xprt_flags(__entry->flags))
);
TRACE_EVENT(svc_wake_up,
TP_PROTO(int pid),
TP_ARGS(pid),
TP_STRUCT__entry(
__field(int, pid)
),
TP_fast_assign(
__entry->pid = pid;
),
TP_printk("pid=%d", __entry->pid)
);
TRACE_EVENT(svc_handle_xprt,
TP_PROTO(struct svc_xprt *xprt, int len),
TP_ARGS(xprt, len),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(int, len)
),
TP_fast_assign(
__entry->xprt = xprt;
__entry->len = len;
),
TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
(struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len,
show_svc_xprt_flags(__entry->xprt->xpt_flags))
);
#endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h>