block-5.10-2020-10-24

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+UQjkQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpvN9D/9iHU7Vgi8J3SiLrHYUiDtMSI5VnEmSBo6K
 Ej/wbbrk4tm2UYi550krOk0dMaHxWD3XWSTlpnrE0sUfjs69G676yzxrlnPt50f5
 XbcMc0YOHZfffeu9xXykUO8Q2918PTPC08eLaTK1I8lhKAuuTFCT/syGYu+prfd7
 AogyuczaDok8nqJEK9QNr0iaEUbe17GQwmvpWyjHl/qfKhWvV2r6jCZZf6pzQj2c
 zv3kbiT3u6xw9OEuhY0sgpTEfhAHEXbNIln6Ob4qVgxmOjwgiZdU/QXyw1i2s6pc
 ks7e28P43r3VfNYGBfr/hQCeAJT9gOeUG5yBiQr7ooX6uNPL6GOCG7DO/g5y2thQ
 NkV4hub/FjYWbSmRzDlJGj1fWn4L+3r/O8g5nMr+F1L3JYeaW0hOyStqBQ4O74Cj
 04tvWQ8ndXdPQrm/iDhM6KxfCvR5TC6k4fy9XPpRW8JOxauhIwTZQJyEQUnXTH3v
 pwv3IxRmuWGa3mrJZ5kGhsNAEGHdZCL5soLI+BXAD2MUW2IB5v2HpD/z1bvWL/51
 uYiVIt/2LxgLkF7BXP40PnY0qqTsOwGxdd6wQhi5Jn9Et+JkmAAR6cVwXx4AhuQg
 FT5mq7ZTQBZrErQu4Mr1k3UyqBFm4MB+mbJhWrVWnUnnyA6pcr1NUsUTz5JcyrWz
 jWI7T1Si7w==
 =dFJi
 -----END PGP SIGNATURE-----

Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph
     - rdma error handling fixes (Chao Leng)
     - fc error handling and reconnect fixes (James Smart)
     - fix the qid displace when tracing ioctl command (Keith Busch)
     - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni)
     - fix MTDT for passthru (Logan Gunthorpe)
     - blacklist Write Same on more devices (Kai-Heng Feng)
     - fix an uninitialized work struct (zhenwei pi)"

 - lightnvm out-of-bounds fix (Colin)

 - SG allocation leak fix (Doug)

 - rnbd fixes (Gioh, Guoqing, Jack)

 - zone error translation fixes (Keith)

 - kerneldoc markup fix (Mauro)

 - zram lockdep fix (Peter)

 - Kill unused io_context members (Yufen)

 - NUMA memory allocation cleanup (Xianting)

 - NBD config wakeup fix (Xiubo)

* tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block: (27 commits)
  block: blk-mq: fix a kernel-doc markup
  nvme-fc: shorten reconnect delay if possible for FC
  nvme-fc: wait for queues to freeze before calling update_hr_hw_queues
  nvme-fc: fix error loop in create_hw_io_queues
  nvme-fc: fix io timeout to abort I/O
  null_blk: use zone status for max active/open
  nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru
  nvmet: cleanup nvmet_passthru_map_sg()
  nvmet: limit passthru MTDS by BIO_MAX_PAGES
  nvmet: fix uninitialized work for zero kato
  nvme-pci: disable Write Zeroes on Sandisk Skyhawk
  nvme: use queuedata for nvme_req_qid
  nvme-rdma: fix crash due to incorrect cqe
  nvme-rdma: fix crash when connect rejected
  block: remove unused members for io_context
  blk-mq: remove the calling of local_memory_node()
  zram: Fix __zram_bvec_{read,write}() locking order
  skd_main: remove unused including <linux/version.h>
  sgl_alloc_order: fix memory leak
  lightnvm: fix out-of-bounds write to array devices->info[]
  ...
This commit is contained in:
Linus Torvalds 2020-10-24 12:46:42 -07:00
commit d769139081
21 changed files with 244 additions and 130 deletions

View File

@ -124,6 +124,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value. EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value.
If this value is 0, there is no limit. If this value is 0, there is no limit.
If the host attempts to exceed this limit, the driver should report this error
with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW
errno.
max_open_zones (RO) max_open_zones (RO)
------------------- -------------------
For zoned block devices (zoned attribute indicating "host-managed" or For zoned block devices (zoned attribute indicating "host-managed" or
@ -131,6 +135,10 @@ For zoned block devices (zoned attribute indicating "host-managed" or
EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
If this value is 0, there is no limit. If this value is 0, there is no limit.
If the host attempts to exceed this limit, the driver should report this error
with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS
errno.
max_sectors_kb (RW) max_sectors_kb (RW)
------------------- -------------------
This is the maximum number of kilobytes that the block layer will allow This is the maximum number of kilobytes that the block layer will allow

View File

@ -186,6 +186,10 @@ static const struct {
/* device mapper special case, should not leak out: */ /* device mapper special case, should not leak out: */
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
/* zone device specific errors */
[BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" },
[BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" },
/* everything else not covered above: */ /* everything else not covered above: */
[BLK_STS_IOERR] = { -EIO, "I/O" }, [BLK_STS_IOERR] = { -EIO, "I/O" },
}; };

View File

@ -89,7 +89,7 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
if (index == qmap->mq_map[i]) if (index == qmap->mq_map[i])
return local_memory_node(cpu_to_node(i)); return cpu_to_node(i);
} }
return NUMA_NO_NODE; return NUMA_NO_NODE;

View File

@ -1664,7 +1664,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
EXPORT_SYMBOL(blk_mq_run_hw_queue); EXPORT_SYMBOL(blk_mq_run_hw_queue);
/** /**
* blk_mq_run_hw_queue - Run all hardware queues in a request queue. * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
* @q: Pointer to the request queue to run. * @q: Pointer to the request queue to run.
* @async: If we want to run the queue asynchronously. * @async: If we want to run the queue asynchronously.
*/ */
@ -2743,7 +2743,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
for (j = 0; j < set->nr_maps; j++) { for (j = 0; j < set->nr_maps; j++) {
hctx = blk_mq_map_queue_type(q, j, i); hctx = blk_mq_map_queue_type(q, j, i);
if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
hctx->numa_node = local_memory_node(cpu_to_node(i)); hctx->numa_node = cpu_to_node(i);
} }
} }
} }

View File

@ -802,9 +802,9 @@ static void recv_work(struct work_struct *work)
if (likely(!blk_should_fake_timeout(rq->q))) if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq); blk_mq_complete_request(rq);
} }
nbd_config_put(nbd);
atomic_dec(&config->recv_threads); atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq); wake_up(&config->recv_wq);
nbd_config_put(nbd);
kfree(args); kfree(args);
} }

View File

@ -220,29 +220,34 @@ static void null_close_first_imp_zone(struct nullb_device *dev)
} }
} }
static bool null_can_set_active(struct nullb_device *dev) static blk_status_t null_check_active(struct nullb_device *dev)
{ {
if (!dev->zone_max_active) if (!dev->zone_max_active)
return true; return BLK_STS_OK;
return dev->nr_zones_exp_open + dev->nr_zones_imp_open + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
dev->nr_zones_closed < dev->zone_max_active; dev->nr_zones_closed < dev->zone_max_active)
return BLK_STS_OK;
return BLK_STS_ZONE_ACTIVE_RESOURCE;
} }
static bool null_can_open(struct nullb_device *dev) static blk_status_t null_check_open(struct nullb_device *dev)
{ {
if (!dev->zone_max_open) if (!dev->zone_max_open)
return true; return BLK_STS_OK;
if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open) if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
return true; return BLK_STS_OK;
if (dev->nr_zones_imp_open && null_can_set_active(dev)) { if (dev->nr_zones_imp_open) {
null_close_first_imp_zone(dev); if (null_check_active(dev) == BLK_STS_OK) {
return true; null_close_first_imp_zone(dev);
return BLK_STS_OK;
}
} }
return false; return BLK_STS_ZONE_OPEN_RESOURCE;
} }
/* /*
@ -258,19 +263,22 @@ static bool null_can_open(struct nullb_device *dev)
* it is not certain that closing an implicit open zone will allow a new zone * it is not certain that closing an implicit open zone will allow a new zone
* to be opened, since we might already be at the active limit capacity. * to be opened, since we might already be at the active limit capacity.
*/ */
static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone) static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone)
{ {
blk_status_t ret;
switch (zone->cond) { switch (zone->cond) {
case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_EMPTY:
if (!null_can_set_active(dev)) ret = null_check_active(dev);
return false; if (ret != BLK_STS_OK)
return ret;
fallthrough; fallthrough;
case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_CLOSED:
return null_can_open(dev); return null_check_open(dev);
default: default:
/* Should never be called for other states */ /* Should never be called for other states */
WARN_ON(1); WARN_ON(1);
return false; return BLK_STS_IOERR;
} }
} }
@ -293,8 +301,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return BLK_STS_IOERR; return BLK_STS_IOERR;
case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone)) ret = null_check_zone_resources(dev, zone);
return BLK_STS_IOERR; if (ret != BLK_STS_OK)
return ret;
break; break;
case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_EXP_OPEN:
@ -349,6 +358,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
{ {
blk_status_t ret;
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR; return BLK_STS_IOERR;
@ -357,15 +368,17 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
/* open operation on exp open is not an error */ /* open operation on exp open is not an error */
return BLK_STS_OK; return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_EMPTY:
if (!null_has_zone_resources(dev, zone)) ret = null_check_zone_resources(dev, zone);
return BLK_STS_IOERR; if (ret != BLK_STS_OK)
return ret;
break; break;
case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--; dev->nr_zones_imp_open--;
break; break;
case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone)) ret = null_check_zone_resources(dev, zone);
return BLK_STS_IOERR; if (ret != BLK_STS_OK)
return ret;
dev->nr_zones_closed--; dev->nr_zones_closed--;
break; break;
case BLK_ZONE_COND_FULL: case BLK_ZONE_COND_FULL:
@ -381,6 +394,8 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo
static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone)
{ {
blk_status_t ret;
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return BLK_STS_IOERR; return BLK_STS_IOERR;
@ -389,8 +404,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
/* finish operation on full is not an error */ /* finish operation on full is not an error */
return BLK_STS_OK; return BLK_STS_OK;
case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_EMPTY:
if (!null_has_zone_resources(dev, zone)) ret = null_check_zone_resources(dev, zone);
return BLK_STS_IOERR; if (ret != BLK_STS_OK)
return ret;
break; break;
case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_IMP_OPEN:
dev->nr_zones_imp_open--; dev->nr_zones_imp_open--;
@ -399,8 +415,9 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *
dev->nr_zones_exp_open--; dev->nr_zones_exp_open--;
break; break;
case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_CLOSED:
if (!null_has_zone_resources(dev, zone)) ret = null_check_zone_resources(dev, zone);
return BLK_STS_IOERR; if (ret != BLK_STS_OK)
return ret;
dev->nr_zones_closed--; dev->nr_zones_closed--;
break; break;
default: default:

View File

@ -91,11 +91,6 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
dev->max_segments = BMAX_SEGMENTS; dev->max_segments = BMAX_SEGMENTS;
dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors,
le32_to_cpu(rsp->max_hw_sectors));
dev->max_segments = min_t(u16, dev->max_segments,
le16_to_cpu(rsp->max_segments));
return 0; return 0;
} }
@ -427,7 +422,7 @@ enum wait_type {
}; };
static int send_usr_msg(struct rtrs_clt *rtrs, int dir, static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
struct rnbd_iu *iu, struct kvec *vec, size_t nr, struct rnbd_iu *iu, struct kvec *vec,
size_t len, struct scatterlist *sg, unsigned int sg_len, size_t len, struct scatterlist *sg, unsigned int sg_len,
void (*conf)(struct work_struct *work), void (*conf)(struct work_struct *work),
int *errno, enum wait_type wait) int *errno, enum wait_type wait)
@ -441,7 +436,7 @@ static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
.conf_fn = msg_conf, .conf_fn = msg_conf,
}; };
err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit, err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit,
vec, nr, len, sg, sg_len); vec, 1, len, sg, sg_len);
if (!err && wait) { if (!err && wait) {
wait_event(iu->comp.wait, iu->comp.errno != INT_MAX); wait_event(iu->comp.wait, iu->comp.errno != INT_MAX);
*errno = iu->comp.errno; *errno = iu->comp.errno;
@ -486,7 +481,7 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
msg.device_id = cpu_to_le32(device_id); msg.device_id = cpu_to_le32(device_id);
WARN_ON(!rnbd_clt_get_dev(dev)); WARN_ON(!rnbd_clt_get_dev(dev));
err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0, err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0,
msg_close_conf, &errno, wait); msg_close_conf, &errno, wait);
if (err) { if (err) {
rnbd_clt_put_dev(dev); rnbd_clt_put_dev(dev);
@ -575,7 +570,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
WARN_ON(!rnbd_clt_get_dev(dev)); WARN_ON(!rnbd_clt_get_dev(dev));
err = send_usr_msg(sess->rtrs, READ, iu, err = send_usr_msg(sess->rtrs, READ, iu,
&vec, 1, sizeof(*rsp), iu->sglist, 1, &vec, sizeof(*rsp), iu->sglist, 1,
msg_open_conf, &errno, wait); msg_open_conf, &errno, wait);
if (err) { if (err) {
rnbd_clt_put_dev(dev); rnbd_clt_put_dev(dev);
@ -629,7 +624,7 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
goto put_iu; goto put_iu;
} }
err = send_usr_msg(sess->rtrs, READ, iu, err = send_usr_msg(sess->rtrs, READ, iu,
&vec, 1, sizeof(*rsp), iu->sglist, 1, &vec, sizeof(*rsp), iu->sglist, 1,
msg_sess_info_conf, &errno, wait); msg_sess_info_conf, &errno, wait);
if (err) { if (err) {
rnbd_clt_put_sess(sess); rnbd_clt_put_sess(sess);
@ -1514,7 +1509,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
"map_device: Failed to configure device, err: %d\n", "map_device: Failed to configure device, err: %d\n",
ret); ret);
mutex_unlock(&dev->lock); mutex_unlock(&dev->lock);
goto del_dev; goto send_close;
} }
rnbd_clt_info(dev, rnbd_clt_info(dev,
@ -1533,6 +1528,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
return dev; return dev;
send_close:
send_msg_close(dev, dev->device_id, WAIT);
del_dev: del_dev:
delete_dev(dev); delete_dev(dev);
put_dev: put_dev:

View File

@ -25,7 +25,6 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/version.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/aer.h> #include <linux/aer.h>
#include <linux/wait.h> #include <linux/wait.h>

View File

@ -1218,10 +1218,11 @@ static void zram_free_page(struct zram *zram, size_t index)
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
struct bio *bio, bool partial_io) struct bio *bio, bool partial_io)
{ {
int ret; struct zcomp_strm *zstrm;
unsigned long handle; unsigned long handle;
unsigned int size; unsigned int size;
void *src, *dst; void *src, *dst;
int ret;
zram_slot_lock(zram, index); zram_slot_lock(zram, index);
if (zram_test_flag(zram, index, ZRAM_WB)) { if (zram_test_flag(zram, index, ZRAM_WB)) {
@ -1252,6 +1253,9 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
size = zram_get_obj_size(zram, index); size = zram_get_obj_size(zram, index);
if (size != PAGE_SIZE)
zstrm = zcomp_stream_get(zram->comp);
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) { if (size == PAGE_SIZE) {
dst = kmap_atomic(page); dst = kmap_atomic(page);
@ -1259,8 +1263,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
kunmap_atomic(dst); kunmap_atomic(dst);
ret = 0; ret = 0;
} else { } else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
dst = kmap_atomic(page); dst = kmap_atomic(page);
ret = zcomp_decompress(zstrm, src, size, dst); ret = zcomp_decompress(zstrm, src, size, dst);
kunmap_atomic(dst); kunmap_atomic(dst);

View File

@ -1311,8 +1311,9 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
i++; i++;
if (i > 31) { if (i >= ARRAY_SIZE(devices->info)) {
pr_err("max 31 devices can be reported.\n"); pr_err("max %zd devices can be reported.\n",
ARRAY_SIZE(devices->info));
break; break;
} }
} }

View File

@ -248,6 +248,10 @@ static blk_status_t nvme_error_status(u16 status)
return BLK_STS_NEXUS; return BLK_STS_NEXUS;
case NVME_SC_HOST_PATH_ERROR: case NVME_SC_HOST_PATH_ERROR:
return BLK_STS_TRANSPORT; return BLK_STS_TRANSPORT;
case NVME_SC_ZONE_TOO_MANY_ACTIVE:
return BLK_STS_ZONE_ACTIVE_RESOURCE;
case NVME_SC_ZONE_TOO_MANY_OPEN:
return BLK_STS_ZONE_OPEN_RESOURCE;
default: default:
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }

View File

@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
}; };
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
* when connected and a
* connection failure.
*/
struct nvme_fc_queue { struct nvme_fc_queue {
struct nvme_fc_ctrl *ctrl; struct nvme_fc_ctrl *ctrl;
@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
if (opstate != FCPOP_STATE_ACTIVE) if (opstate != FCPOP_STATE_ACTIVE)
atomic_set(&op->state, opstate); atomic_set(&op->state, opstate);
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
op->flags |= FCOP_FLAGS_TERMIO;
ctrl->iocnt++; ctrl->iocnt++;
}
spin_unlock_irqrestore(&ctrl->lock, flags); spin_unlock_irqrestore(&ctrl->lock, flags);
if (opstate != FCPOP_STATE_ACTIVE) if (opstate != FCPOP_STATE_ACTIVE)
@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
if (opstate == FCPOP_STATE_ABORTED) { if (opstate == FCPOP_STATE_ABORTED) {
spin_lock_irqsave(&ctrl->lock, flags); spin_lock_irqsave(&ctrl->lock, flags);
if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
op->flags & FCOP_FLAGS_TERMIO) {
if (!--ctrl->iocnt) if (!--ctrl->iocnt)
wake_up(&ctrl->ioabort_wait); wake_up(&ctrl->ioabort_wait);
} }
@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
return 0; return 0;
delete_queues: delete_queues:
for (; i >= 0; i--) for (; i > 0; i--)
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
return ret; return ret;
} }
@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
return; return;
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n", "NVME-FC{%d}: transport association event: %s\n",
ctrl->cnum, errmsg); ctrl->cnum, errmsg);
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum); "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
{ {
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_ctrl *ctrl = op->ctrl;
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe;
/* /*
* we can't individually ABTS an io without affecting the queue, * Attempt to abort the offending command. Command completion
* thus killing the queue, and thus the association. * will detect the aborted io and will fail the connection.
* So resolve by performing a controller reset, which will stop
* the host/io stack, terminate the association on the link,
* and recreate an association on the link.
*/ */
nvme_fc_error_recovery(ctrl, "io timeout error"); dev_info(ctrl->ctrl.device,
"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
"x%08x/x%08x\n",
ctrl->cnum, op->queue->qnum, sqe->common.opcode,
sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
if (__nvme_fc_abort_op(ctrl, op))
nvme_fc_error_recovery(ctrl, "io timeout abort failed");
/* /*
* the io abort has been initiated. Have the reset timer * the io abort has been initiated. Have the reset timer
@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_ctrl *ctrl = op->ctrl;
atomic_set(&op->state, FCPOP_STATE_IDLE); atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags &= ~FCOP_FLAGS_TERMIO;
nvme_fc_unmap_data(ctrl, rq, op); nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq); nvme_complete_rq(rq);
@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret) if (ret)
goto out_delete_hw_queues; goto out_delete_hw_queues;
if (prior_ioq_cnt != nr_io_queues) if (prior_ioq_cnt != nr_io_queues) {
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"reconnect: revising io queue count from %d to %d\n", "reconnect: revising io queue count from %d to %d\n",
prior_ioq_cnt, nr_io_queues); prior_ioq_cnt, nr_io_queues);
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); nvme_wait_freeze(&ctrl->ctrl);
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
nvme_unfreeze(&ctrl->ctrl);
}
return 0; return 0;
@ -3090,6 +3106,61 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
return ret; return ret;
} }
/*
* This routine runs through all outstanding commands on the association
* and aborts them. This routine is typically be called by the
* delete_association routine. It is also called due to an error during
* reconnect. In that scenario, it is most likely a command that initializes
* the controller, including fabric Connect commands on io queues, that
* may have timed out or failed thus the io must be killed for the connect
* thread to see the error.
*/
static void
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
{
/*
* If io queues are present, stop them and terminate all outstanding
* ios on them. As FC allocates FC exchange for each io, the
* transport must contact the LLDD to terminate the exchange,
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
* to tell us what io's are busy and invoke a transport routine
* to kill them with the LLDD. After terminating the exchange
* the LLDD will call the transport's normal io done path, but it
* will have an aborted status. The done path will return the
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
if (start_queues)
nvme_start_queues(&ctrl->ctrl);
}
/*
* Other transports, which don't have link-level contexts bound
* to sqe's, would try to gracefully shutdown the controller by
* writing the registers for shutdown and polling (call
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
* just aborted and we will wait on those contexts, and given
* there was no indication of how live the controlelr is on the
* link, don't send more io to create more contexts for the
* shutdown. Let the controller fail via keepalive failure if
* its still present.
*/
/*
* clean up the admin queue. Same thing as above.
*/
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
}
/* /*
* This routine stops operation of the controller on the host side. * This routine stops operation of the controller on the host side.
* On the host os stack side: Admin and IO queues are stopped, * On the host os stack side: Admin and IO queues are stopped,
@ -3110,46 +3181,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
ctrl->iocnt = 0; ctrl->iocnt = 0;
spin_unlock_irqrestore(&ctrl->lock, flags); spin_unlock_irqrestore(&ctrl->lock, flags);
/* __nvme_fc_abort_outstanding_ios(ctrl, false);
* If io queues are present, stop them and terminate all outstanding
* ios on them. As FC allocates FC exchange for each io, the
* transport must contact the LLDD to terminate the exchange,
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
* to tell us what io's are busy and invoke a transport routine
* to kill them with the LLDD. After terminating the exchange
* the LLDD will call the transport's normal io done path, but it
* will have an aborted status. The done path will return the
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
}
/*
* Other transports, which don't have link-level contexts bound
* to sqe's, would try to gracefully shutdown the controller by
* writing the registers for shutdown and polling (call
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
* just aborted and we will wait on those contexts, and given
* there was no indication of how live the controlelr is on the
* link, don't send more io to create more contexts for the
* shutdown. Let the controller fail via keepalive failure if
* its still present.
*/
/*
* clean up the admin queue. Same thing as above.
* use blk_mq_tagset_busy_itr() and the transport routine to
* terminate the exchanges.
*/
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
/* kill the aens as they are a separate path */ /* kill the aens as they are a separate path */
nvme_fc_abort_aen_ops(ctrl); nvme_fc_abort_aen_ops(ctrl);
@ -3263,22 +3295,27 @@ static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{ {
/* /*
* if state is connecting - the error occurred as part of a * if state is CONNECTING - the error occurred as part of a
* reconnect attempt. The create_association error paths will * reconnect attempt. Abort any ios on the association and
* clean up any outstanding io. * let the create_association error paths resolve things.
*
* if it's a different state - ensure all pending io is
* terminated. Given this can delay while waiting for the
* aborted io to return, we recheck adapter state below
* before changing state.
*/ */
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
nvme_stop_keep_alive(&ctrl->ctrl); __nvme_fc_abort_outstanding_ios(ctrl, true);
return;
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
} }
/*
* For any other state, kill the association. As this routine
* is a common io abort routine for resetting and such, after
* the association is terminated, ensure that the state is set
* to CONNECTING.
*/
nvme_stop_keep_alive(&ctrl->ctrl);
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
dev_err(ctrl->ctrl.device, dev_err(ctrl->ctrl.device,
@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{ {
struct nvme_fc_ctrl *ctrl; struct nvme_fc_ctrl *ctrl;
unsigned long flags; unsigned long flags;
int ret, idx; int ret, idx, ctrl_loss_tmo;
if (!(rport->remoteport.port_role & if (!(rport->remoteport.port_role &
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_free_ctrl; goto out_free_ctrl;
} }
/*
* if ctrl_loss_tmo is being enforced and the default reconnect delay
* is being used, change to a shorter reconnect delay for FC.
*/
if (opts->max_reconnects != -1 &&
opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
opts->reconnect_delay);
}
ctrl->ctrl.opts = opts; ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0; ctrl->ctrl.nr_reconnects = 0;
if (lport->dev) if (lport->dev)

View File

@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
static inline u16 nvme_req_qid(struct request *req) static inline u16 nvme_req_qid(struct request *req)
{ {
if (!req->rq_disk) if (!req->q->queuedata)
return 0; return 0;
return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
} }

View File

@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, }, NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
.driver_data = NVME_QUIRK_SINGLE_VECTOR }, .driver_data = NVME_QUIRK_SINGLE_VECTOR },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

View File

@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
req->result = cqe->result; req->result = cqe->result;
if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { if (unlikely(!req->mr ||
wc->ex.invalidate_rkey != req->mr->rkey)) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"Bogus remote invalidation for rkey %#x\n", "Bogus remote invalidation for rkey %#x\n",
req->mr->rkey); req->mr ? req->mr->rkey : 0);
nvme_rdma_error_recovery(queue->ctrl); nvme_rdma_error_recovery(queue->ctrl);
} }
} else if (req->mr) { } else if (req->mr) {
@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
complete(&queue->cm_done); complete(&queue->cm_done);
return 0; return 0;
case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_REJECTED:
nvme_rdma_destroy_queue_ib(queue);
cm_error = nvme_rdma_conn_rejected(queue, ev); cm_error = nvme_rdma_conn_rejected(queue, ev);
break; break;
case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR:

View File

@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
* in case a host died before it enabled the controller. Hence, simply * in case a host died before it enabled the controller. Hence, simply
* reset the keep alive timer when the controller is enabled. * reset the keep alive timer when the controller is enabled.
*/ */
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); if (ctrl->kato)
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
} }
static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)

View File

@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
u16 status = NVME_SC_SUCCESS; u16 status = NVME_SC_SUCCESS;
struct nvme_id_ctrl *id; struct nvme_id_ctrl *id;
u32 max_hw_sectors; int max_hw_sectors;
int page_shift; int page_shift;
id = kzalloc(sizeof(*id), GFP_KERNEL); id = kzalloc(sizeof(*id), GFP_KERNEL);
@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
pctrl->max_hw_sectors); pctrl->max_hw_sectors);
/*
* nvmet_passthru_map_sg is limitted to using a single bio so limit
* the mdts based on BIO_MAX_PAGES as well
*/
max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
max_hw_sectors);
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
id->mdts = ilog2(max_hw_sectors) + 9 - page_shift; id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
{ {
int sg_cnt = req->sg_cnt;
struct scatterlist *sg; struct scatterlist *sg;
int op_flags = 0; int op_flags = 0;
struct bio *bio; struct bio *bio;
int i, ret; int i, ret;
if (req->sg_cnt > BIO_MAX_PAGES)
return -EINVAL;
if (req->cmd->common.opcode == nvme_cmd_flush) if (req->cmd->common.opcode == nvme_cmd_flush)
op_flags = REQ_FUA; op_flags = REQ_FUA;
else if (nvme_is_write(req->cmd)) else if (nvme_is_write(req->cmd))
op_flags = REQ_SYNC | REQ_IDLE; op_flags = REQ_SYNC | REQ_IDLE;
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
bio->bi_end_io = bio_put; bio->bi_end_io = bio_put;
bio->bi_opf = req_op(rq) | op_flags; bio->bi_opf = req_op(rq) | op_flags;
@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
bio_put(bio); bio_put(bio);
return -EINVAL; return -EINVAL;
} }
sg_cnt--;
} }
ret = blk_rq_append_bio(rq, &bio); ret = blk_rq_append_bio(rq, &bio);
@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
q = ns->queue; q = ns->queue;
} }
rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
status = NVME_SC_INTERNAL; status = NVME_SC_INTERNAL;
goto out_put_ns; goto out_put_ns;

View File

@ -777,6 +777,15 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
/* See SSC3rXX or current. */ /* See SSC3rXX or current. */
action = ACTION_FAIL; action = ACTION_FAIL;
break; break;
case DATA_PROTECT:
action = ACTION_FAIL;
if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) ||
(sshdr.asc == 0x55 &&
(sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) {
/* Insufficient zone resources */
blk_stat = BLK_STS_ZONE_OPEN_RESOURCE;
}
break;
default: default:
action = ACTION_FAIL; action = ACTION_FAIL;
break; break;

View File

@ -104,6 +104,24 @@ typedef u8 __bitwise blk_status_t;
*/ */
#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
/*
* BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
* path if the device returns a status indicating that too many zone resources
* are currently open. The same command should be successful if resubmitted
* after the number of open zones decreases below the device's limits, which is
* reported in the request_queue's max_open_zones.
*/
#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15)
/*
* BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
* path if the device returns a status indicating that too many zone resources
* are currently active. The same command should be successful if resubmitted
* after the number of active zones decreases below the device's limits, which
* is reported in the request_queue's max_active_zones.
*/
#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
/** /**
* blk_path_error - returns true if error may be path related * blk_path_error - returns true if error may be path related
* @error: status the request was completed with * @error: status the request was completed with

View File

@ -106,12 +106,6 @@ struct io_context {
unsigned short ioprio; unsigned short ioprio;
/*
* For request batching
*/
int nr_batch_requests; /* Number of requests left in the batch */
unsigned long last_waited; /* Time last woken after wait for request */
struct radix_tree_root icq_tree; struct radix_tree_root icq_tree;
struct io_cq __rcu *icq_hint; struct io_cq __rcu *icq_hint;
struct hlist_head icq_list; struct hlist_head icq_list;

View File

@ -595,7 +595,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
elem_len = min_t(u64, length, PAGE_SIZE << order); elem_len = min_t(u64, length, PAGE_SIZE << order);
page = alloc_pages(gfp, order); page = alloc_pages(gfp, order);
if (!page) { if (!page) {
sgl_free(sgl); sgl_free_order(sgl, order);
return NULL; return NULL;
} }