linux_dsm_epyc7002/drivers/s390/block/dasd_eer.c
Stefan Weinhuber 5a27e60dec [S390] dasd: Improve handling of stolen DASD reservation
If a DASD device has been reserved by a Linux system, and later
this reservation is ‘stolen’ by a second system by means of an
unconditional reserve, then the first system receives a
notification about this fact. With this patch such an event can
be either ignored, as before, or it can be used to let the device
fail all I/O request, so that the device will not block anymore.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2011-01-05 12:47:30 +01:00

710 lines
20 KiB
C

/*
* Character device driver for extended error reporting.
*
* Copyright (C) 2005 IBM Corporation
* extended error reporting for DASD ECKD devices
* Author(s): Stefan Weinhuber <wein@de.ibm.com>
*/
#define KMSG_COMPONENT "dasd-eckd"
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/device.h>
#include <linux/poll.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
#include <asm/ebcdic.h>
#include "dasd_int.h"
#include "dasd_eckd.h"
#ifdef PRINTK_HEADER
#undef PRINTK_HEADER
#endif /* PRINTK_HEADER */
#define PRINTK_HEADER "dasd(eer):"
/*
* SECTION: the internal buffer
*/
/*
* The internal buffer is meant to store obaque blobs of data, so it does
* not know of higher level concepts like triggers.
* It consists of a number of pages that are used as a ringbuffer. Each data
* blob is stored in a simple record that consists of an integer, which
* contains the size of the following data, and the data bytes themselfes.
*
* To allow for multiple independent readers we create one internal buffer
* each time the device is opened and destroy the buffer when the file is
* closed again. The number of pages used for this buffer is determined by
* the module parmeter eer_pages.
*
* One record can be written to a buffer by using the functions
* - dasd_eer_start_record (one time per record to write the size to the
* buffer and reserve the space for the data)
* - dasd_eer_write_buffer (one or more times per record to write the data)
* The data can be written in several steps but you will have to compute
* the total size up front for the invocation of dasd_eer_start_record.
* If the ringbuffer is full, dasd_eer_start_record will remove the required
* number of old records.
*
* A record is typically read in two steps, first read the integer that
* specifies the size of the following data, then read the data.
* Both can be done by
* - dasd_eer_read_buffer
*
* For all mentioned functions you need to get the bufferlock first and keep
* it until a complete record is written or read.
*
* All information necessary to keep track of an internal buffer is kept in
* a struct eerbuffer. The buffer specific to a file pointer is strored in
* the private_data field of that file. To be able to write data to all
* existing buffers, each buffer is also added to the bufferlist.
* If the user does not want to read a complete record in one go, we have to
* keep track of the rest of the record. residual stores the number of bytes
* that are still to deliver. If the rest of the record is invalidated between
* two reads then residual will be set to -1 so that the next read will fail.
* All entries in the eerbuffer structure are protected with the bufferlock.
* To avoid races between writing to a buffer on the one side and creating
* and destroying buffers on the other side, the bufferlock must also be used
* to protect the bufferlist.
*/
static int eer_pages = 5;
module_param(eer_pages, int, S_IRUGO|S_IWUSR);
struct eerbuffer {
struct list_head list;
char **buffer;
int buffersize;
int buffer_page_count;
int head;
int tail;
int residual;
};
static LIST_HEAD(bufferlist);
static DEFINE_SPINLOCK(bufferlock);
static DECLARE_WAIT_QUEUE_HEAD(dasd_eer_read_wait_queue);
/*
* How many free bytes are available on the buffer.
* Needs to be called with bufferlock held.
*/
static int dasd_eer_get_free_bytes(struct eerbuffer *eerb)
{
if (eerb->head < eerb->tail)
return eerb->tail - eerb->head - 1;
return eerb->buffersize - eerb->head + eerb->tail -1;
}
/*
* How many bytes of buffer space are used.
* Needs to be called with bufferlock held.
*/
static int dasd_eer_get_filled_bytes(struct eerbuffer *eerb)
{
if (eerb->head >= eerb->tail)
return eerb->head - eerb->tail;
return eerb->buffersize - eerb->tail + eerb->head;
}
/*
* The dasd_eer_write_buffer function just copies count bytes of data
* to the buffer. Make sure to call dasd_eer_start_record first, to
* make sure that enough free space is available.
* Needs to be called with bufferlock held.
*/
static void dasd_eer_write_buffer(struct eerbuffer *eerb,
char *data, int count)
{
unsigned long headindex,localhead;
unsigned long rest, len;
char *nextdata;
nextdata = data;
rest = count;
while (rest > 0) {
headindex = eerb->head / PAGE_SIZE;
localhead = eerb->head % PAGE_SIZE;
len = min(rest, PAGE_SIZE - localhead);
memcpy(eerb->buffer[headindex]+localhead, nextdata, len);
nextdata += len;
rest -= len;
eerb->head += len;
if (eerb->head == eerb->buffersize)
eerb->head = 0; /* wrap around */
BUG_ON(eerb->head > eerb->buffersize);
}
}
/*
* Needs to be called with bufferlock held.
*/
static int dasd_eer_read_buffer(struct eerbuffer *eerb, char *data, int count)
{
unsigned long tailindex,localtail;
unsigned long rest, len, finalcount;
char *nextdata;
finalcount = min(count, dasd_eer_get_filled_bytes(eerb));
nextdata = data;
rest = finalcount;
while (rest > 0) {
tailindex = eerb->tail / PAGE_SIZE;
localtail = eerb->tail % PAGE_SIZE;
len = min(rest, PAGE_SIZE - localtail);
memcpy(nextdata, eerb->buffer[tailindex] + localtail, len);
nextdata += len;
rest -= len;
eerb->tail += len;
if (eerb->tail == eerb->buffersize)
eerb->tail = 0; /* wrap around */
BUG_ON(eerb->tail > eerb->buffersize);
}
return finalcount;
}
/*
* Whenever you want to write a blob of data to the internal buffer you
* have to start by using this function first. It will write the number
* of bytes that will be written to the buffer. If necessary it will remove
* old records to make room for the new one.
* Needs to be called with bufferlock held.
*/
static int dasd_eer_start_record(struct eerbuffer *eerb, int count)
{
int tailcount;
if (count + sizeof(count) > eerb->buffersize)
return -ENOMEM;
while (dasd_eer_get_free_bytes(eerb) < count + sizeof(count)) {
if (eerb->residual > 0) {
eerb->tail += eerb->residual;
if (eerb->tail >= eerb->buffersize)
eerb->tail -= eerb->buffersize;
eerb->residual = -1;
}
dasd_eer_read_buffer(eerb, (char *) &tailcount,
sizeof(tailcount));
eerb->tail += tailcount;
if (eerb->tail >= eerb->buffersize)
eerb->tail -= eerb->buffersize;
}
dasd_eer_write_buffer(eerb, (char*) &count, sizeof(count));
return 0;
};
/*
* Release pages that are not used anymore.
*/
static void dasd_eer_free_buffer_pages(char **buf, int no_pages)
{
int i;
for (i = 0; i < no_pages; i++)
free_page((unsigned long) buf[i]);
}
/*
* Allocate a new set of memory pages.
*/
static int dasd_eer_allocate_buffer_pages(char **buf, int no_pages)
{
int i;
for (i = 0; i < no_pages; i++) {
buf[i] = (char *) get_zeroed_page(GFP_KERNEL);
if (!buf[i]) {
dasd_eer_free_buffer_pages(buf, i);
return -ENOMEM;
}
}
return 0;
}
/*
* SECTION: The extended error reporting functionality
*/
/*
* When a DASD device driver wants to report an error, it calls the
* function dasd_eer_write and gives the respective trigger ID as
* parameter. Currently there are four kinds of triggers:
*
* DASD_EER_FATALERROR: all kinds of unrecoverable I/O problems
* DASD_EER_PPRCSUSPEND: PPRC was suspended
* DASD_EER_NOPATH: There is no path to the device left.
* DASD_EER_STATECHANGE: The state of the device has changed.
*
* For the first three triggers all required information can be supplied by
* the caller. For these triggers a record is written by the function
* dasd_eer_write_standard_trigger.
*
* The DASD_EER_STATECHANGE trigger is special since a sense subsystem
* status ccw need to be executed to gather the necessary sense data first.
* The dasd_eer_snss function will queue the SNSS request and the request
* callback will then call dasd_eer_write with the DASD_EER_STATCHANGE
* trigger.
*
* To avoid memory allocations at runtime, the necessary memory is allocated
* when the extended error reporting is enabled for a device (by
* dasd_eer_probe). There is one sense subsystem status request for each
* eer enabled DASD device. The presence of the cqr in device->eer_cqr
* indicates that eer is enable for the device. The use of the snss request
* is protected by the DASD_FLAG_EER_IN_USE bit. When this flag indicates
* that the cqr is currently in use, dasd_eer_snss cannot start a second
* request but sets the DASD_FLAG_EER_SNSS flag instead. The callback of
* the SNSS request will check the bit and call dasd_eer_snss again.
*/
#define SNSS_DATA_SIZE 44
#define DASD_EER_BUSID_SIZE 10
struct dasd_eer_header {
__u32 total_size;
__u32 trigger;
__u64 tv_sec;
__u64 tv_usec;
char busid[DASD_EER_BUSID_SIZE];
} __attribute__ ((packed));
/*
* The following function can be used for those triggers that have
* all necessary data available when the function is called.
* If the parameter cqr is not NULL, the chain of requests will be searched
* for valid sense data, and all valid sense data sets will be added to
* the triggers data.
*/
static void dasd_eer_write_standard_trigger(struct dasd_device *device,
struct dasd_ccw_req *cqr,
int trigger)
{
struct dasd_ccw_req *temp_cqr;
int data_size;
struct timeval tv;
struct dasd_eer_header header;
unsigned long flags;
struct eerbuffer *eerb;
char *sense;
/* go through cqr chain and count the valid sense data sets */
data_size = 0;
for (temp_cqr = cqr; temp_cqr; temp_cqr = temp_cqr->refers)
if (dasd_get_sense(&temp_cqr->irb))
data_size += 32;
header.total_size = sizeof(header) + data_size + 4; /* "EOR" */
header.trigger = trigger;
do_gettimeofday(&tv);
header.tv_sec = tv.tv_sec;
header.tv_usec = tv.tv_usec;
strncpy(header.busid, dev_name(&device->cdev->dev),
DASD_EER_BUSID_SIZE);
spin_lock_irqsave(&bufferlock, flags);
list_for_each_entry(eerb, &bufferlist, list) {
dasd_eer_start_record(eerb, header.total_size);
dasd_eer_write_buffer(eerb, (char *) &header, sizeof(header));
for (temp_cqr = cqr; temp_cqr; temp_cqr = temp_cqr->refers) {
sense = dasd_get_sense(&temp_cqr->irb);
if (sense)
dasd_eer_write_buffer(eerb, sense, 32);
}
dasd_eer_write_buffer(eerb, "EOR", 4);
}
spin_unlock_irqrestore(&bufferlock, flags);
wake_up_interruptible(&dasd_eer_read_wait_queue);
}
/*
* This function writes a DASD_EER_STATECHANGE trigger.
*/
static void dasd_eer_write_snss_trigger(struct dasd_device *device,
struct dasd_ccw_req *cqr,
int trigger)
{
int data_size;
int snss_rc;
struct timeval tv;
struct dasd_eer_header header;
unsigned long flags;
struct eerbuffer *eerb;
snss_rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
if (snss_rc)
data_size = 0;
else
data_size = SNSS_DATA_SIZE;
header.total_size = sizeof(header) + data_size + 4; /* "EOR" */
header.trigger = DASD_EER_STATECHANGE;
do_gettimeofday(&tv);
header.tv_sec = tv.tv_sec;
header.tv_usec = tv.tv_usec;
strncpy(header.busid, dev_name(&device->cdev->dev),
DASD_EER_BUSID_SIZE);
spin_lock_irqsave(&bufferlock, flags);
list_for_each_entry(eerb, &bufferlist, list) {
dasd_eer_start_record(eerb, header.total_size);
dasd_eer_write_buffer(eerb, (char *) &header , sizeof(header));
if (!snss_rc)
dasd_eer_write_buffer(eerb, cqr->data, SNSS_DATA_SIZE);
dasd_eer_write_buffer(eerb, "EOR", 4);
}
spin_unlock_irqrestore(&bufferlock, flags);
wake_up_interruptible(&dasd_eer_read_wait_queue);
}
/*
* This function is called for all triggers. It calls the appropriate
* function that writes the actual trigger records.
*/
void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr,
unsigned int id)
{
if (!device->eer_cqr)
return;
switch (id) {
case DASD_EER_FATALERROR:
case DASD_EER_PPRCSUSPEND:
dasd_eer_write_standard_trigger(device, cqr, id);
break;
case DASD_EER_NOPATH:
dasd_eer_write_standard_trigger(device, NULL, id);
break;
case DASD_EER_STATECHANGE:
dasd_eer_write_snss_trigger(device, cqr, id);
break;
default: /* unknown trigger, so we write it without any sense data */
dasd_eer_write_standard_trigger(device, NULL, id);
break;
}
}
EXPORT_SYMBOL(dasd_eer_write);
/*
* Start a sense subsystem status request.
* Needs to be called with the device held.
*/
void dasd_eer_snss(struct dasd_device *device)
{
struct dasd_ccw_req *cqr;
cqr = device->eer_cqr;
if (!cqr) /* Device not eer enabled. */
return;
if (test_and_set_bit(DASD_FLAG_EER_IN_USE, &device->flags)) {
/* Sense subsystem status request in use. */
set_bit(DASD_FLAG_EER_SNSS, &device->flags);
return;
}
/* cdev is already locked, can't use dasd_add_request_head */
clear_bit(DASD_FLAG_EER_SNSS, &device->flags);
cqr->status = DASD_CQR_QUEUED;
list_add(&cqr->devlist, &device->ccw_queue);
dasd_schedule_device_bh(device);
}
/*
* Callback function for use with sense subsystem status request.
*/
static void dasd_eer_snss_cb(struct dasd_ccw_req *cqr, void *data)
{
struct dasd_device *device = cqr->startdev;
unsigned long flags;
dasd_eer_write(device, cqr, DASD_EER_STATECHANGE);
spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
if (device->eer_cqr == cqr) {
clear_bit(DASD_FLAG_EER_IN_USE, &device->flags);
if (test_bit(DASD_FLAG_EER_SNSS, &device->flags))
/* Another SNSS has been requested in the meantime. */
dasd_eer_snss(device);
cqr = NULL;
}
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
if (cqr)
/*
* Extended error recovery has been switched off while
* the SNSS request was running. It could even have
* been switched off and on again in which case there
* is a new ccw in device->eer_cqr. Free the "old"
* snss request now.
*/
dasd_kfree_request(cqr, device);
}
/*
* Enable error reporting on a given device.
*/
int dasd_eer_enable(struct dasd_device *device)
{
struct dasd_ccw_req *cqr;
unsigned long flags;
struct ccw1 *ccw;
if (device->eer_cqr)
return 0;
if (!device->discipline || strcmp(device->discipline->name, "ECKD"))
return -EPERM; /* FIXME: -EMEDIUMTYPE ? */
cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
SNSS_DATA_SIZE, device);
if (IS_ERR(cqr))
return -ENOMEM;
cqr->startdev = device;
cqr->retries = 255;
cqr->expires = 10 * HZ;
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
set_bit(DASD_CQR_ALLOW_SLOCK, &cqr->flags);
ccw = cqr->cpaddr;
ccw->cmd_code = DASD_ECKD_CCW_SNSS;
ccw->count = SNSS_DATA_SIZE;
ccw->flags = 0;
ccw->cda = (__u32)(addr_t) cqr->data;
cqr->buildclk = get_clock();
cqr->status = DASD_CQR_FILLED;
cqr->callback = dasd_eer_snss_cb;
spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
if (!device->eer_cqr) {
device->eer_cqr = cqr;
cqr = NULL;
}
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
if (cqr)
dasd_kfree_request(cqr, device);
return 0;
}
/*
* Disable error reporting on a given device.
*/
void dasd_eer_disable(struct dasd_device *device)
{
struct dasd_ccw_req *cqr;
unsigned long flags;
int in_use;
if (!device->eer_cqr)
return;
spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
cqr = device->eer_cqr;
device->eer_cqr = NULL;
clear_bit(DASD_FLAG_EER_SNSS, &device->flags);
in_use = test_and_clear_bit(DASD_FLAG_EER_IN_USE, &device->flags);
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
if (cqr && !in_use)
dasd_kfree_request(cqr, device);
}
/*
* SECTION: the device operations
*/
/*
* On the one side we need a lock to access our internal buffer, on the
* other side a copy_to_user can sleep. So we need to copy the data we have
* to transfer in a readbuffer, which is protected by the readbuffer_mutex.
*/
static char readbuffer[PAGE_SIZE];
static DEFINE_MUTEX(readbuffer_mutex);
static int dasd_eer_open(struct inode *inp, struct file *filp)
{
struct eerbuffer *eerb;
unsigned long flags;
eerb = kzalloc(sizeof(struct eerbuffer), GFP_KERNEL);
if (!eerb)
return -ENOMEM;
eerb->buffer_page_count = eer_pages;
if (eerb->buffer_page_count < 1 ||
eerb->buffer_page_count > INT_MAX / PAGE_SIZE) {
kfree(eerb);
DBF_EVENT(DBF_WARNING, "can't open device since module "
"parameter eer_pages is smaller than 1 or"
" bigger than %d", (int)(INT_MAX / PAGE_SIZE));
return -EINVAL;
}
eerb->buffersize = eerb->buffer_page_count * PAGE_SIZE;
eerb->buffer = kmalloc(eerb->buffer_page_count * sizeof(char *),
GFP_KERNEL);
if (!eerb->buffer) {
kfree(eerb);
return -ENOMEM;
}
if (dasd_eer_allocate_buffer_pages(eerb->buffer,
eerb->buffer_page_count)) {
kfree(eerb->buffer);
kfree(eerb);
return -ENOMEM;
}
filp->private_data = eerb;
spin_lock_irqsave(&bufferlock, flags);
list_add(&eerb->list, &bufferlist);
spin_unlock_irqrestore(&bufferlock, flags);
return nonseekable_open(inp,filp);
}
static int dasd_eer_close(struct inode *inp, struct file *filp)
{
struct eerbuffer *eerb;
unsigned long flags;
eerb = (struct eerbuffer *) filp->private_data;
spin_lock_irqsave(&bufferlock, flags);
list_del(&eerb->list);
spin_unlock_irqrestore(&bufferlock, flags);
dasd_eer_free_buffer_pages(eerb->buffer, eerb->buffer_page_count);
kfree(eerb->buffer);
kfree(eerb);
return 0;
}
static ssize_t dasd_eer_read(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
int tc,rc;
int tailcount,effective_count;
unsigned long flags;
struct eerbuffer *eerb;
eerb = (struct eerbuffer *) filp->private_data;
if (mutex_lock_interruptible(&readbuffer_mutex))
return -ERESTARTSYS;
spin_lock_irqsave(&bufferlock, flags);
if (eerb->residual < 0) { /* the remainder of this record */
/* has been deleted */
eerb->residual = 0;
spin_unlock_irqrestore(&bufferlock, flags);
mutex_unlock(&readbuffer_mutex);
return -EIO;
} else if (eerb->residual > 0) {
/* OK we still have a second half of a record to deliver */
effective_count = min(eerb->residual, (int) count);
eerb->residual -= effective_count;
} else {
tc = 0;
while (!tc) {
tc = dasd_eer_read_buffer(eerb, (char *) &tailcount,
sizeof(tailcount));
if (!tc) {
/* no data available */
spin_unlock_irqrestore(&bufferlock, flags);
mutex_unlock(&readbuffer_mutex);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
rc = wait_event_interruptible(
dasd_eer_read_wait_queue,
eerb->head != eerb->tail);
if (rc)
return rc;
if (mutex_lock_interruptible(&readbuffer_mutex))
return -ERESTARTSYS;
spin_lock_irqsave(&bufferlock, flags);
}
}
WARN_ON(tc != sizeof(tailcount));
effective_count = min(tailcount,(int)count);
eerb->residual = tailcount - effective_count;
}
tc = dasd_eer_read_buffer(eerb, readbuffer, effective_count);
WARN_ON(tc != effective_count);
spin_unlock_irqrestore(&bufferlock, flags);
if (copy_to_user(buf, readbuffer, effective_count)) {
mutex_unlock(&readbuffer_mutex);
return -EFAULT;
}
mutex_unlock(&readbuffer_mutex);
return effective_count;
}
static unsigned int dasd_eer_poll(struct file *filp, poll_table *ptable)
{
unsigned int mask;
unsigned long flags;
struct eerbuffer *eerb;
eerb = (struct eerbuffer *) filp->private_data;
poll_wait(filp, &dasd_eer_read_wait_queue, ptable);
spin_lock_irqsave(&bufferlock, flags);
if (eerb->head != eerb->tail)
mask = POLLIN | POLLRDNORM ;
else
mask = 0;
spin_unlock_irqrestore(&bufferlock, flags);
return mask;
}
static const struct file_operations dasd_eer_fops = {
.open = &dasd_eer_open,
.release = &dasd_eer_close,
.read = &dasd_eer_read,
.poll = &dasd_eer_poll,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
static struct miscdevice *dasd_eer_dev = NULL;
int __init dasd_eer_init(void)
{
int rc;
dasd_eer_dev = kzalloc(sizeof(*dasd_eer_dev), GFP_KERNEL);
if (!dasd_eer_dev)
return -ENOMEM;
dasd_eer_dev->minor = MISC_DYNAMIC_MINOR;
dasd_eer_dev->name = "dasd_eer";
dasd_eer_dev->fops = &dasd_eer_fops;
rc = misc_register(dasd_eer_dev);
if (rc) {
kfree(dasd_eer_dev);
dasd_eer_dev = NULL;
DBF_EVENT(DBF_ERR, "%s", "dasd_eer_init could not "
"register misc device");
return rc;
}
return 0;
}
void dasd_eer_exit(void)
{
if (dasd_eer_dev) {
misc_deregister(dasd_eer_dev);
kfree(dasd_eer_dev);
dasd_eer_dev = NULL;
}
}