mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-27 07:20:52 +07:00
IB/srp: Use SRP transport layer error recovery
Enable fast_io_fail_tmo and dev_loss_tmo functionality for the IB SRP initiator. Add kernel module parameters that allow to specify default values for these parameters. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: David Dillow <dillowda@ornl.gov> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
parent
29c1732480
commit
ed9b2264fb
@ -86,6 +86,27 @@ module_param(topspin_workarounds, int, 0444);
|
||||
MODULE_PARM_DESC(topspin_workarounds,
|
||||
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
|
||||
|
||||
static struct kernel_param_ops srp_tmo_ops;
|
||||
|
||||
static int srp_fast_io_fail_tmo = 15;
|
||||
module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
|
||||
S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(fast_io_fail_tmo,
|
||||
"Number of seconds between the observation of a transport"
|
||||
" layer error and failing all I/O. \"off\" means that this"
|
||||
" functionality is disabled.");
|
||||
|
||||
static int srp_dev_loss_tmo = 60;
|
||||
module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
|
||||
S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(dev_loss_tmo,
|
||||
"Maximum number of seconds that the SRP transport should"
|
||||
" insulate transport layer errors. After this time has been"
|
||||
" exceeded the SCSI host is removed. Should be"
|
||||
" between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
|
||||
" if fast_io_fail_tmo has not been set. \"off\" means that"
|
||||
" this functionality is disabled.");
|
||||
|
||||
static void srp_add_one(struct ib_device *device);
|
||||
static void srp_remove_one(struct ib_device *device);
|
||||
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
|
||||
@ -102,6 +123,44 @@ static struct ib_client srp_client = {
|
||||
|
||||
static struct ib_sa_client srp_sa_client;
|
||||
|
||||
static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
|
||||
{
|
||||
int tmo = *(int *)kp->arg;
|
||||
|
||||
if (tmo >= 0)
|
||||
return sprintf(buffer, "%d", tmo);
|
||||
else
|
||||
return sprintf(buffer, "off");
|
||||
}
|
||||
|
||||
static int srp_tmo_set(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
int tmo, res;
|
||||
|
||||
if (strncmp(val, "off", 3) != 0) {
|
||||
res = kstrtoint(val, 0, &tmo);
|
||||
if (res)
|
||||
goto out;
|
||||
} else {
|
||||
tmo = -1;
|
||||
}
|
||||
if (kp->arg == &srp_fast_io_fail_tmo)
|
||||
res = srp_tmo_valid(tmo, srp_dev_loss_tmo);
|
||||
else
|
||||
res = srp_tmo_valid(srp_fast_io_fail_tmo, tmo);
|
||||
if (res)
|
||||
goto out;
|
||||
*(int *)kp->arg = tmo;
|
||||
|
||||
out:
|
||||
return res;
|
||||
}
|
||||
|
||||
static struct kernel_param_ops srp_tmo_ops = {
|
||||
.get = srp_tmo_get,
|
||||
.set = srp_tmo_set,
|
||||
};
|
||||
|
||||
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
|
||||
{
|
||||
return (struct srp_target_port *) host->hostdata;
|
||||
@ -688,23 +747,42 @@ static void srp_free_req(struct srp_target_port *target,
|
||||
spin_unlock_irqrestore(&target->lock, flags);
|
||||
}
|
||||
|
||||
static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
|
||||
static void srp_finish_req(struct srp_target_port *target,
|
||||
struct srp_request *req, int result)
|
||||
{
|
||||
struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
|
||||
|
||||
if (scmnd) {
|
||||
srp_free_req(target, req, scmnd, 0);
|
||||
scmnd->result = DID_RESET << 16;
|
||||
scmnd->result = result;
|
||||
scmnd->scsi_done(scmnd);
|
||||
}
|
||||
}
|
||||
|
||||
static int srp_reconnect_target(struct srp_target_port *target)
|
||||
static void srp_terminate_io(struct srp_rport *rport)
|
||||
{
|
||||
struct Scsi_Host *shost = target->scsi_host;
|
||||
int i, ret;
|
||||
struct srp_target_port *target = rport->lld_data;
|
||||
int i;
|
||||
|
||||
scsi_target_block(&shost->shost_gendev);
|
||||
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
|
||||
struct srp_request *req = &target->req_ring[i];
|
||||
srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It is up to the caller to ensure that srp_rport_reconnect() calls are
|
||||
* serialized and that no concurrent srp_queuecommand(), srp_abort(),
|
||||
* srp_reset_device() or srp_reset_host() calls will occur while this function
|
||||
* is in progress. One way to realize that is not to call this function
|
||||
* directly but to call srp_reconnect_rport() instead since that last function
|
||||
* serializes calls of this function via rport->mutex and also blocks
|
||||
* srp_queuecommand() calls before invoking this function.
|
||||
*/
|
||||
static int srp_rport_reconnect(struct srp_rport *rport)
|
||||
{
|
||||
struct srp_target_port *target = rport->lld_data;
|
||||
int i, ret;
|
||||
|
||||
srp_disconnect_target(target);
|
||||
/*
|
||||
@ -725,8 +803,7 @@ static int srp_reconnect_target(struct srp_target_port *target)
|
||||
|
||||
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
|
||||
struct srp_request *req = &target->req_ring[i];
|
||||
if (req->scmnd)
|
||||
srp_reset_req(target, req);
|
||||
srp_finish_req(target, req, DID_RESET << 16);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&target->free_tx);
|
||||
@ -736,28 +813,9 @@ static int srp_reconnect_target(struct srp_target_port *target)
|
||||
if (ret == 0)
|
||||
ret = srp_connect_target(target);
|
||||
|
||||
scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
|
||||
SDEV_TRANSPORT_OFFLINE);
|
||||
target->transport_offline = !!ret;
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
|
||||
|
||||
return ret;
|
||||
|
||||
err:
|
||||
shost_printk(KERN_ERR, target->scsi_host,
|
||||
PFX "reconnect failed (%d), removing target port.\n", ret);
|
||||
|
||||
/*
|
||||
* We couldn't reconnect, so kill our target port off.
|
||||
* However, we have to defer the real removal because we
|
||||
* are in the context of the SCSI error handler now, which
|
||||
* will deadlock if we call scsi_remove_host().
|
||||
*/
|
||||
srp_queue_remove_work(target);
|
||||
if (ret == 0)
|
||||
shost_printk(KERN_INFO, target->scsi_host,
|
||||
PFX "reconnect succeeded\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1356,10 +1414,11 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
|
||||
struct srp_cmd *cmd;
|
||||
struct ib_device *dev;
|
||||
unsigned long flags;
|
||||
int len;
|
||||
int len, result;
|
||||
|
||||
if (unlikely(target->transport_offline)) {
|
||||
scmnd->result = DID_NO_CONNECT << 16;
|
||||
result = srp_chkready(target->rport);
|
||||
if (unlikely(result)) {
|
||||
scmnd->result = result;
|
||||
scmnd->scsi_done(scmnd);
|
||||
return 0;
|
||||
}
|
||||
@ -1757,7 +1816,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
|
||||
if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
|
||||
SRP_TSK_ABORT_TASK) == 0)
|
||||
ret = SUCCESS;
|
||||
else if (target->transport_offline)
|
||||
else if (target->rport->state == SRP_RPORT_LOST)
|
||||
ret = FAST_IO_FAIL;
|
||||
else
|
||||
ret = FAILED;
|
||||
@ -1784,7 +1843,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
|
||||
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
|
||||
struct srp_request *req = &target->req_ring[i];
|
||||
if (req->scmnd && req->scmnd->device == scmnd->device)
|
||||
srp_reset_req(target, req);
|
||||
srp_finish_req(target, req, DID_RESET << 16);
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
@ -1793,14 +1852,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
|
||||
static int srp_reset_host(struct scsi_cmnd *scmnd)
|
||||
{
|
||||
struct srp_target_port *target = host_to_target(scmnd->device->host);
|
||||
int ret = FAILED;
|
||||
|
||||
shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
|
||||
|
||||
if (!srp_reconnect_target(target))
|
||||
ret = SUCCESS;
|
||||
|
||||
return ret;
|
||||
return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
|
||||
}
|
||||
|
||||
static int srp_slave_configure(struct scsi_device *sdev)
|
||||
@ -2637,7 +2692,13 @@ static void srp_remove_one(struct ib_device *device)
|
||||
}
|
||||
|
||||
static struct srp_function_template ib_srp_transport_functions = {
|
||||
.has_rport_state = true,
|
||||
.reset_timer_if_blocked = true,
|
||||
.fast_io_fail_tmo = &srp_fast_io_fail_tmo,
|
||||
.dev_loss_tmo = &srp_dev_loss_tmo,
|
||||
.reconnect = srp_rport_reconnect,
|
||||
.rport_delete = srp_rport_delete,
|
||||
.terminate_rport_io = srp_terminate_io,
|
||||
};
|
||||
|
||||
static int __init srp_init_module(void)
|
||||
|
@ -140,7 +140,6 @@ struct srp_target_port {
|
||||
unsigned int cmd_sg_cnt;
|
||||
unsigned int indirect_size;
|
||||
bool allow_ext_sg;
|
||||
bool transport_offline;
|
||||
|
||||
/* Everything above this point is used in the hot path of
|
||||
* command processing. Try to keep them packed into cachelines.
|
||||
|
Loading…
Reference in New Issue
Block a user