diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index a25fd43650ad..441e67e3a9d7 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 + +ccflags-y += -I$(src) + obj-$(CONFIG_NVME_CORE) += nvme-core.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o @@ -6,6 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o nvme-core-y := core.o +nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fde6fd2e7eef..b3af8e914570 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -29,6 +29,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #include "nvme.h" #include "fabrics.h" @@ -217,6 +220,8 @@ void nvme_complete_rq(struct request *req) { blk_status_t status = nvme_error_status(req); + trace_nvme_complete_rq(req); + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if (nvme_req_needs_failover(req, status)) { nvme_failover_req(req); @@ -260,7 +265,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (new_state) { case NVME_CTRL_ADMIN_ONLY: switch (old_state) { - case NVME_CTRL_RESETTING: + case NVME_CTRL_RECONNECTING: changed = true; /* FALLTHRU */ default: @@ -628,6 +633,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, } cmd->common.command_id = req->tag; + if (ns) + trace_nvme_setup_nvm_cmd(req->q->id, cmd); + else + trace_nvme_setup_admin_cmd(cmd); return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index eb46967bb0d5..9cee72a80472 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -739,12 +739,13 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } ret = uuid_parse(p, &hostid); - kfree(p); if (ret) { pr_err("Invalid hostid %s\n", p); ret = -EINVAL; + kfree(p); goto out; } + kfree(p); break; case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a2ffb557b616..0bc6a9e48c8e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1140,9 +1140,14 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) */ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); - /* If there is a reset ongoing, we shouldn't reset again. */ - if (dev->ctrl.state == NVME_CTRL_RESETTING) + /* If there is a reset/reinit ongoing, we shouldn't reset again. */ + switch (dev->ctrl.state) { + case NVME_CTRL_RESETTING: + case NVME_CTRL_RECONNECTING: return false; + default: + break; + } /* We shouldn't reset unless the controller is on fatal error state * _or_ if we lost the communication with it. @@ -1324,9 +1329,6 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - if (nvme_suspend_queue(nvmeq)) - return; - if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else @@ -1384,6 +1386,9 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, { struct nvme_queue *nvmeq = &dev->queues[qid]; + if (dev->ctrl.queue_count > qid) + return 0; + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) @@ -2008,9 +2013,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) +static void nvme_disable_io_queues(struct nvme_dev *dev) { - int pass; + int pass, queues = dev->online_queues - 1; unsigned long timeout; u8 opcode = nvme_admin_delete_sq; @@ -2161,7 +2166,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i, queues; + int i; bool dead = true; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -2196,21 +2201,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) } nvme_stop_queues(&dev->ctrl); - queues = dev->online_queues - 1; - for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(&dev->queues[i]); - - if (dead) { - /* A device might become IO incapable very soon during - * probe, before the admin queue is configured. Thus, - * queue_count can be 0 here. - */ - if (dev->ctrl.queue_count) - nvme_suspend_queue(&dev->queues[0]); - } else { - nvme_disable_io_queues(dev, queues); + if (!dead) { + nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } + for (i = dev->ctrl.queue_count - 1; i >= 0; i--) + nvme_suspend_queue(&dev->queues[i]); + nvme_pci_disable(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); @@ -2292,6 +2289,16 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); + /* + * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller RECONNECTING\n"); + goto out; + } + result = nvme_pci_enable(dev); if (result) goto out; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 38e183461d9d..6c2fdfa4c86a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -66,7 +66,6 @@ struct nvme_rdma_request { struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; - bool inline_data; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; @@ -1086,7 +1085,6 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl)); sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; - req->inline_data = true; req->num_sge++; return 0; } @@ -1158,7 +1156,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, int count, ret; req->num_sge = 1; - req->inline_data = false; refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c new file mode 100644 index 000000000000..41944bbef835 --- /dev/null +++ b/drivers/nvme/host/trace.c @@ -0,0 +1,130 @@ +/* + * NVM Express device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "trace.h" + +static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 sqid = get_unaligned_le16(cdw10); + u16 qsize = get_unaligned_le16(cdw10 + 2); + u16 sq_flags = get_unaligned_le16(cdw10 + 4); + u16 cqid = get_unaligned_le16(cdw10 + 6); + + + trace_seq_printf(p, "sqid=%u, qsize=%u, sq_flags=0x%x, cqid=%u", + sqid, qsize, sq_flags, cqid); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 cqid = get_unaligned_le16(cdw10); + u16 qsize = get_unaligned_le16(cdw10 + 2); + u16 cq_flags = get_unaligned_le16(cdw10 + 4); + u16 irq_vector = get_unaligned_le16(cdw10 + 6); + + trace_seq_printf(p, "cqid=%u, qsize=%u, cq_flags=0x%x, irq_vector=%u", + cqid, qsize, cq_flags, irq_vector); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 cns = cdw10[0]; + u16 ctrlid = get_unaligned_le16(cdw10 + 2); + + trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid); + trace_seq_putc(p, 0); + + return ret; +} + + + +static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u16 length = get_unaligned_le16(cdw10 + 8); + u16 control = get_unaligned_le16(cdw10 + 10); + u32 dsmgmt = get_unaligned_le32(cdw10 + 12); + u32 reftag = get_unaligned_le32(cdw10 + 16); + + trace_seq_printf(p, + "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u", + slba, length, control, dsmgmt, reftag); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "nr=%u, attributes=%u", + get_unaligned_le32(cdw10), + get_unaligned_le32(cdw10 + 4)); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_common(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "cdw10=%*ph", 24, cdw10); + trace_seq_putc(p, 0); + + return ret; +} + +const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_admin_create_sq: + return nvme_trace_create_sq(p, cdw10); + case nvme_admin_create_cq: + return nvme_trace_create_cq(p, cdw10); + case nvme_admin_identify: + return nvme_trace_admin_identify(p, cdw10); + default: + return nvme_trace_common(p, cdw10); + } +} + +const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + case nvme_cmd_write_zeroes: + return nvme_trace_read_write(p, cdw10); + case nvme_cmd_dsm: + return nvme_trace_dsm(p, cdw10); + default: + return nvme_trace_common(p, cdw10); + } +} diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h new file mode 100644 index 000000000000..ea91fccd1bc0 --- /dev/null +++ b/drivers/nvme/host/trace.h @@ -0,0 +1,165 @@ +/* + * NVM Express device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvme + +#if !defined(_TRACE_NVME_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVME_H + +#include +#include +#include + +#include "nvme.h" + +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +#define __parse_nvme_admin_cmd(opcode, cdw10) \ + nvme_trace_parse_admin_cmd(p, opcode, cdw10) + +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + +const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +#define __parse_nvme_cmd(opcode, cdw10) \ + nvme_trace_parse_nvm_cmd(p, opcode, cdw10) + +TRACE_EVENT(nvme_setup_admin_cmd, + TP_PROTO(struct nvme_command *cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(u8, opcode) + __field(u8, flags) + __field(u16, cid) + __field(u64, metadata) + __array(u8, cdw10, 24) + ), + TP_fast_assign( + __entry->opcode = cmd->common.opcode; + __entry->flags = cmd->common.flags; + __entry->cid = cmd->common.command_id; + __entry->metadata = le64_to_cpu(cmd->common.metadata); + memcpy(__entry->cdw10, cmd->common.cdw10, + sizeof(__entry->cdw10)); + ), + TP_printk(" cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + __entry->cid, __entry->flags, __entry->metadata, + show_admin_opcode_name(__entry->opcode), + __parse_nvme_admin_cmd(__entry->opcode, __entry->cdw10)) +); + + +TRACE_EVENT(nvme_setup_nvm_cmd, + TP_PROTO(int qid, struct nvme_command *cmd), + TP_ARGS(qid, cmd), + TP_STRUCT__entry( + __field(int, qid) + __field(u8, opcode) + __field(u8, flags) + __field(u16, cid) + __field(u32, nsid) + __field(u64, metadata) + __array(u8, cdw10, 24) + ), + TP_fast_assign( + __entry->qid = qid; + __entry->opcode = cmd->common.opcode; + __entry->flags = cmd->common.flags; + __entry->cid = cmd->common.command_id; + __entry->nsid = le32_to_cpu(cmd->common.nsid); + __entry->metadata = le64_to_cpu(cmd->common.metadata); + memcpy(__entry->cdw10, cmd->common.cdw10, + sizeof(__entry->cdw10)); + ), + TP_printk("qid=%d, nsid=%u, cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + __entry->qid, __entry->nsid, __entry->cid, + __entry->flags, __entry->metadata, + show_opcode_name(__entry->opcode), + __parse_nvme_cmd(__entry->opcode, __entry->cdw10)) +); + +TRACE_EVENT(nvme_complete_rq, + TP_PROTO(struct request *req), + TP_ARGS(req), + TP_STRUCT__entry( + __field(int, qid) + __field(int, cid) + __field(u64, result) + __field(u8, retries) + __field(u8, flags) + __field(u16, status) + ), + TP_fast_assign( + __entry->qid = req->q->id; + __entry->cid = req->tag; + __entry->result = le64_to_cpu(nvme_req(req)->result.u64); + __entry->retries = nvme_req(req)->retries; + __entry->flags = nvme_req(req)->flags; + __entry->status = nvme_req(req)->status; + ), + TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u", + __entry->cid, __entry->qid, __entry->result, + __entry->retries, __entry->flags, __entry->status) + +); + +#endif /* _TRACE_NVME_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include