dasd: fix error recovery for alias devices during format

Kernel panic or a hanging device during format if an alias device is
set offline or I/O errors occur.

Omit the error recovery procedure for alias devices and do retries on
the base device with full erp.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Stefan Haberland 2014-07-18 14:26:01 +02:00 committed by Martin Schwidefsky
parent 8fa56aed12
commit 29b8dd9d42
4 changed files with 61 additions and 21 deletions

View File

@ -2307,17 +2307,27 @@ static int _dasd_sleep_on_queue(struct list_head *ccw_queue, int interruptible)
rc = 0;
list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) {
if (__dasd_sleep_on_erp(cqr)) {
if (!cqr->status == DASD_CQR_TERMINATED &&
!cqr->status == DASD_CQR_NEED_ERP)
break;
rc = 1;
/*
* for alias devices simplify error recovery and
* return to upper layer
*/
if (cqr->startdev != cqr->basedev &&
(cqr->status == DASD_CQR_TERMINATED ||
cqr->status == DASD_CQR_NEED_ERP))
return -EAGAIN;
else {
/* normal recovery for basedev IO */
if (__dasd_sleep_on_erp(cqr)) {
if (!cqr->status == DASD_CQR_TERMINATED &&
!cqr->status == DASD_CQR_NEED_ERP)
break;
rc = 1;
}
}
}
if (rc)
goto retry;
return 0;
}

View File

@ -2061,11 +2061,12 @@ dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo)
static struct dasd_ccw_req *
dasd_eckd_build_format(struct dasd_device *base,
struct format_data_t *fdata)
struct format_data_t *fdata,
int enable_PAV)
{
struct dasd_eckd_private *base_priv;
struct dasd_eckd_private *start_priv;
struct dasd_device *startdev;
struct dasd_device *startdev = NULL;
struct dasd_ccw_req *fcp;
struct eckd_count *ect;
struct ch_t address;
@ -2079,7 +2080,9 @@ dasd_eckd_build_format(struct dasd_device *base,
int nr_tracks;
int use_prefix;
startdev = dasd_alias_get_start_dev(base);
if (enable_PAV)
startdev = dasd_alias_get_start_dev(base);
if (!startdev)
startdev = base;
@ -2309,6 +2312,7 @@ dasd_eckd_build_format(struct dasd_device *base,
fcp->startdev = startdev;
fcp->memdev = startdev;
fcp->basedev = base;
fcp->retries = 256;
fcp->expires = startdev->default_expires * HZ;
fcp->buildclk = get_tod_clock();
@ -2319,7 +2323,8 @@ dasd_eckd_build_format(struct dasd_device *base,
static int
dasd_eckd_format_device(struct dasd_device *base,
struct format_data_t *fdata)
struct format_data_t *fdata,
int enable_PAV)
{
struct dasd_ccw_req *cqr, *n;
struct dasd_block *block;
@ -2327,7 +2332,7 @@ dasd_eckd_format_device(struct dasd_device *base,
struct list_head format_queue;
struct dasd_device *device;
int old_stop, format_step;
int step, rc = 0;
int step, rc = 0, sleep_rc;
block = base->block;
private = (struct dasd_eckd_private *) base->private;
@ -2361,11 +2366,11 @@ dasd_eckd_format_device(struct dasd_device *base,
}
INIT_LIST_HEAD(&format_queue);
old_stop = fdata->stop_unit;
old_stop = fdata->stop_unit;
while (fdata->start_unit <= 1) {
fdata->stop_unit = fdata->start_unit;
cqr = dasd_eckd_build_format(base, fdata);
cqr = dasd_eckd_build_format(base, fdata, enable_PAV);
list_add(&cqr->blocklist, &format_queue);
fdata->stop_unit = old_stop;
@ -2383,7 +2388,7 @@ dasd_eckd_format_device(struct dasd_device *base,
if (step > format_step)
fdata->stop_unit = fdata->start_unit + format_step - 1;
cqr = dasd_eckd_build_format(base, fdata);
cqr = dasd_eckd_build_format(base, fdata, enable_PAV);
if (IS_ERR(cqr)) {
if (PTR_ERR(cqr) == -ENOMEM) {
/*
@ -2403,7 +2408,7 @@ dasd_eckd_format_device(struct dasd_device *base,
}
sleep:
dasd_sleep_on_queue(&format_queue);
sleep_rc = dasd_sleep_on_queue(&format_queue);
list_for_each_entry_safe(cqr, n, &format_queue, blocklist) {
device = cqr->startdev;
@ -2415,6 +2420,9 @@ dasd_eckd_format_device(struct dasd_device *base,
private->count--;
}
if (sleep_rc)
return sleep_rc;
/*
* in case of ENOMEM we need to retry after
* first requests are finished

View File

@ -175,6 +175,7 @@ struct dasd_ccw_req {
struct dasd_block *block; /* the originating block device */
struct dasd_device *memdev; /* the device used to allocate this */
struct dasd_device *startdev; /* device the request is started on */
struct dasd_device *basedev; /* base device if no block->base */
void *cpaddr; /* address of ccw or tcw */
unsigned char cpmode; /* 0 = cmd mode, 1 = itcw */
char status; /* status of this request */
@ -321,7 +322,7 @@ struct dasd_discipline {
int (*term_IO) (struct dasd_ccw_req *);
void (*handle_terminated_request) (struct dasd_ccw_req *);
int (*format_device) (struct dasd_device *,
struct format_data_t *);
struct format_data_t *, int enable_PAV);
int (*free_cp) (struct dasd_ccw_req *, struct request *);
/*

View File

@ -203,7 +203,9 @@ static int
dasd_format(struct dasd_block *block, struct format_data_t *fdata)
{
struct dasd_device *base;
int rc;
int enable_PAV = 1;
int rc, retries;
int start, stop;
base = block->base;
if (base->discipline->format_device == NULL)
@ -231,11 +233,30 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata)
bdput(bdev);
}
rc = base->discipline->format_device(base, fdata);
if (rc)
return rc;
retries = 255;
/* backup start- and endtrack for retries */
start = fdata->start_unit;
stop = fdata->stop_unit;
do {
rc = base->discipline->format_device(base, fdata, enable_PAV);
if (rc) {
if (rc == -EAGAIN) {
retries--;
/* disable PAV in case of errors */
enable_PAV = 0;
fdata->start_unit = start;
fdata->stop_unit = stop;
} else
return rc;
} else
/* success */
break;
} while (retries);
return 0;
if (!retries)
return -EIO;
else
return 0;
}
/*