linux_dsm_epyc7002/drivers/block/umem.c

1137 lines
30 KiB
C
Raw Normal View History

/*
* mm.c - Micro Memory(tm) PCI memory board block device driver - v2.3
*
* (C) 2001 San Mehat <nettwerk@valinux.com>
* (C) 2001 Johannes Erdfelt <jerdfelt@valinux.com>
* (C) 2001 NeilBrown <neilb@cse.unsw.edu.au>
*
* This driver for the Micro Memory PCI Memory Module with Battery Backup
* is Copyright Micro Memory Inc 2001-2002. All rights reserved.
*
* This driver is released to the public under the terms of the
* GNU GENERAL PUBLIC LICENSE version 2
* See the file COPYING for details.
*
* This driver provides a standard block device interface for Micro Memory(tm)
* PCI based RAM boards.
* 10/05/01: Phap Nguyen - Rebuilt the driver
* 10/22/01: Phap Nguyen - v2.1 Added disk partitioning
* 29oct2001:NeilBrown - Use make_request_fn instead of request_fn
* - use stand disk partitioning (so fdisk works).
* 08nov2001:NeilBrown - change driver name from "mm" to "umem"
* - incorporate into main kernel
* 08apr2002:NeilBrown - Move some of interrupt handle to tasklet
* - use spin_lock_bh instead of _irq
* - Never block on make_request. queue
* bh's instead.
* - unregister umem from devfs at mod unload
* - Change version to 2.3
* 07Nov2001:Phap Nguyen - Select pci read command: 06, 12, 15 (Decimal)
* 07Jan2002: P. Nguyen - Used PCI Memory Write & Invalidate for DMA
* 15May2002:NeilBrown - convert to bio for 2.5
* 17May2002:NeilBrown - remove init_mem initialisation. Instead detect
* - a sequence of writes that cover the card, and
* - set initialised bit then.
*/
#undef DEBUG /* #define DEBUG if you want debugging info (pr_debug) */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mman.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/gfp.h>
#include <linux/ioctl.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/fcntl.h> /* O_ACCMODE */
#include <linux/hdreg.h> /* HDIO_GETGEO */
#include "umem.h"
#include <asm/uaccess.h>
#include <asm/io.h>
#define MM_MAXCARDS 4
#define MM_RAHEAD 2 /* two sectors */
#define MM_BLKSIZE 1024 /* 1k blocks */
#define MM_HARDSECT 512 /* 512-byte hardware sectors */
#define MM_SHIFT 6 /* max 64 partitions on 4 cards */
/*
* Version Information
*/
#define DRIVER_NAME "umem"
#define DRIVER_VERSION "v2.3"
#define DRIVER_AUTHOR "San Mehat, Johannes Erdfelt, NeilBrown"
#define DRIVER_DESC "Micro Memory(tm) PCI memory board block driver"
static int debug;
/* #define HW_TRACE(x) writeb(x,cards[0].csr_remap + MEMCTRLSTATUS_MAGIC) */
#define HW_TRACE(x)
#define DEBUG_LED_ON_TRANSFER 0x01
#define DEBUG_BATTERY_POLLING 0x02
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "Debug bitmask");
static int pci_read_cmd = 0x0C; /* Read Multiple */
module_param(pci_read_cmd, int, 0);
MODULE_PARM_DESC(pci_read_cmd, "PCI read command");
static int pci_write_cmd = 0x0F; /* Write and Invalidate */
module_param(pci_write_cmd, int, 0);
MODULE_PARM_DESC(pci_write_cmd, "PCI write command");
static int pci_cmds;
static int major_nr;
#include <linux/blkdev.h>
#include <linux/blkpg.h>
struct cardinfo {
struct pci_dev *dev;
unsigned char __iomem *csr_remap;
unsigned int mm_size; /* size in kbytes */
unsigned int init_size; /* initial segment, in sectors,
* that we know to
* have been written
*/
struct bio *bio, *currentbio, **biotail;
struct bvec_iter current_iter;
struct request_queue *queue;
struct mm_page {
dma_addr_t page_dma;
struct mm_dma_desc *desc;
int cnt, headcnt;
struct bio *bio, **biotail;
struct bvec_iter iter;
} mm_pages[2];
#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
int Active, Ready;
struct tasklet_struct tasklet;
unsigned int dma_status;
struct {
int good;
int warned;
unsigned long last_change;
} battery[2];
spinlock_t lock;
int check_batteries;
int flags;
};
static struct cardinfo cards[MM_MAXCARDS];
static struct timer_list battery_timer;
static int num_cards;
static struct gendisk *mm_gendisk[MM_MAXCARDS];
static void check_batteries(struct cardinfo *card);
static int get_userbit(struct cardinfo *card, int bit)
{
unsigned char led;
led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
return led & bit;
}
static int set_userbit(struct cardinfo *card, int bit, unsigned char state)
{
unsigned char led;
led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
if (state)
led |= bit;
else
led &= ~bit;
writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
return 0;
}
/*
* NOTE: For the power LED, use the LED_POWER_* macros since they differ
*/
static void set_led(struct cardinfo *card, int shift, unsigned char state)
{
unsigned char led;
led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
if (state == LED_FLIP)
led ^= (1<<shift);
else {
led &= ~(0x03 << shift);
led |= (state << shift);
}
writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
}
#ifdef MM_DIAG
static void dump_regs(struct cardinfo *card)
{
unsigned char *p;
int i, i1;
p = card->csr_remap;
for (i = 0; i < 8; i++) {
printk(KERN_DEBUG "%p ", p);
for (i1 = 0; i1 < 16; i1++)
printk("%02x ", *p++);
printk("\n");
}
}
#endif
static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
{
dev_printk(KERN_DEBUG, &card->dev->dev, "DMAstat - ");
if (dmastat & DMASCR_ANY_ERR)
printk(KERN_CONT "ANY_ERR ");
if (dmastat & DMASCR_MBE_ERR)
printk(KERN_CONT "MBE_ERR ");
if (dmastat & DMASCR_PARITY_ERR_REP)
printk(KERN_CONT "PARITY_ERR_REP ");
if (dmastat & DMASCR_PARITY_ERR_DET)
printk(KERN_CONT "PARITY_ERR_DET ");
if (dmastat & DMASCR_SYSTEM_ERR_SIG)
printk(KERN_CONT "SYSTEM_ERR_SIG ");
if (dmastat & DMASCR_TARGET_ABT)
printk(KERN_CONT "TARGET_ABT ");
if (dmastat & DMASCR_MASTER_ABT)
printk(KERN_CONT "MASTER_ABT ");
if (dmastat & DMASCR_CHAIN_COMPLETE)
printk(KERN_CONT "CHAIN_COMPLETE ");
if (dmastat & DMASCR_DMA_COMPLETE)
printk(KERN_CONT "DMA_COMPLETE ");
printk("\n");
}
/*
* Theory of request handling
*
* Each bio is assigned to one mm_dma_desc - which may not be enough FIXME
* We have two pages of mm_dma_desc, holding about 64 descriptors
* each. These are allocated at init time.
* One page is "Ready" and is either full, or can have request added.
* The other page might be "Active", which DMA is happening on it.
*
* Whenever IO on the active page completes, the Ready page is activated
* and the ex-Active page is clean out and made Ready.
* Otherwise the Ready page is only activated when it becomes full.
*
* If a request arrives while both pages a full, it is queued, and b_rdev is
* overloaded to record whether it was a read or a write.
*
* The interrupt handler only polls the device to clear the interrupt.
* The processing of the result is done in a tasklet.
*/
static void mm_start_io(struct cardinfo *card)
{
/* we have the lock, we know there is
* no IO active, and we know that card->Active
* is set
*/
struct mm_dma_desc *desc;
struct mm_page *page;
int offset;
/* make the last descriptor end the chain */
page = &card->mm_pages[card->Active];
pr_debug("start_io: %d %d->%d\n",
card->Active, page->headcnt, page->cnt - 1);
desc = &page->desc[page->cnt-1];
desc->control_bits |= cpu_to_le32(DMASCR_CHAIN_COMP_EN);
desc->control_bits &= ~cpu_to_le32(DMASCR_CHAIN_EN);
desc->sem_control_bits = desc->control_bits;
if (debug & DEBUG_LED_ON_TRANSFER)
set_led(card, LED_REMOVE, LED_ON);
desc = &page->desc[page->headcnt];
writel(0, card->csr_remap + DMA_PCI_ADDR);
writel(0, card->csr_remap + DMA_PCI_ADDR + 4);
writel(0, card->csr_remap + DMA_LOCAL_ADDR);
writel(0, card->csr_remap + DMA_LOCAL_ADDR + 4);
writel(0, card->csr_remap + DMA_TRANSFER_SIZE);
writel(0, card->csr_remap + DMA_TRANSFER_SIZE + 4);
writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR);
writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR + 4);
offset = ((char *)desc) - ((char *)page->desc);
writel(cpu_to_le32((page->page_dma+offset) & 0xffffffff),
card->csr_remap + DMA_DESCRIPTOR_ADDR);
/* Force the value to u64 before shifting otherwise >> 32 is undefined C
* and on some ports will do nothing ! */
writel(cpu_to_le32(((u64)page->page_dma)>>32),
card->csr_remap + DMA_DESCRIPTOR_ADDR + 4);
/* Go, go, go */
writel(cpu_to_le32(DMASCR_GO | DMASCR_CHAIN_EN | pci_cmds),
card->csr_remap + DMA_STATUS_CTRL);
}
static int add_bio(struct cardinfo *card);
static void activate(struct cardinfo *card)
{
/* if No page is Active, and Ready is
* not empty, then switch Ready page
* to active and start IO.
* Then add any bh's that are available to Ready
*/
do {
while (add_bio(card))
;
if (card->Active == -1 &&
card->mm_pages[card->Ready].cnt > 0) {
card->Active = card->Ready;
card->Ready = 1-card->Ready;
mm_start_io(card);
}
} while (card->Active == -1 && add_bio(card));
}
static inline void reset_page(struct mm_page *page)
{
page->cnt = 0;
page->headcnt = 0;
page->bio = NULL;
page->biotail = &page->bio;
}
/*
* If there is room on Ready page, take
* one bh off list and add it.
* return 1 if there was room, else 0.
*/
static int add_bio(struct cardinfo *card)
{
struct mm_page *p;
struct mm_dma_desc *desc;
dma_addr_t dma_handle;
int offset;
struct bio *bio;
struct bio_vec vec;
bio = card->currentbio;
if (!bio && card->bio) {
card->currentbio = card->bio;
card->current_iter = card->bio->bi_iter;
card->bio = card->bio->bi_next;
if (card->bio == NULL)
card->biotail = &card->bio;
card->currentbio->bi_next = NULL;
return 1;
}
if (!bio)
return 0;
if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
return 0;
vec = bio_iter_iovec(bio, card->current_iter);
dma_handle = pci_map_page(card->dev,
vec.bv_page,
vec.bv_offset,
vec.bv_len,
bio_op(bio) == REQ_OP_READ ?
PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
p = &card->mm_pages[card->Ready];
desc = &p->desc[p->cnt];
p->cnt++;
if (p->bio == NULL)
p->iter = card->current_iter;
if ((p->biotail) != &bio->bi_next) {
*(p->biotail) = bio;
p->biotail = &(bio->bi_next);
bio->bi_next = NULL;
}
desc->data_dma_handle = dma_handle;
desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
desc->transfer_size = cpu_to_le32(vec.bv_len);
offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
desc->zero1 = desc->zero2 = 0;
offset = (((char *)(desc+1)) - ((char *)p->desc));
desc->next_desc_addr = cpu_to_le64(p->page_dma+offset);
desc->control_bits = cpu_to_le32(DMASCR_GO|DMASCR_ERR_INT_EN|
DMASCR_PARITY_INT_EN|
DMASCR_CHAIN_EN |
DMASCR_SEM_EN |
pci_cmds);
if (bio_op(bio) == REQ_OP_WRITE)
desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
desc->sem_control_bits = desc->control_bits;
bio_advance_iter(bio, &card->current_iter, vec.bv_len);
if (!card->current_iter.bi_size)
card->currentbio = NULL;
return 1;
}
static void process_page(unsigned long data)
{
/* check if any of the requests in the page are DMA_COMPLETE,
* and deal with them appropriately.
* If we find a descriptor without DMA_COMPLETE in the semaphore, then
* dma must have hit an error on that descriptor, so use dma_status
* instead and assume that all following descriptors must be re-tried.
*/
struct mm_page *page;
struct bio *return_bio = NULL;
struct cardinfo *card = (struct cardinfo *)data;
unsigned int dma_status = card->dma_status;
spin_lock_bh(&card->lock);
if (card->Active < 0)
goto out_unlock;
page = &card->mm_pages[card->Active];
while (page->headcnt < page->cnt) {
struct bio *bio = page->bio;
struct mm_dma_desc *desc = &page->desc[page->headcnt];
int control = le32_to_cpu(desc->sem_control_bits);
int last = 0;
struct bio_vec vec;
if (!(control & DMASCR_DMA_COMPLETE)) {
control = dma_status;
last = 1;
}
page->headcnt++;
vec = bio_iter_iovec(bio, page->iter);
bio_advance_iter(bio, &page->iter, vec.bv_len);
if (!page->iter.bi_size) {
page->bio = bio->bi_next;
if (page->bio)
page->iter = page->bio->bi_iter;
}
pci_unmap_page(card->dev, desc->data_dma_handle,
vec.bv_len,
(control & DMASCR_TRANSFER_READ) ?
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
/* error */
bio->bi_error = -EIO;
dev_printk(KERN_WARNING, &card->dev->dev,
"I/O error on sector %d/%d\n",
le32_to_cpu(desc->local_addr)>>9,
le32_to_cpu(desc->transfer_size));
dump_dmastat(card, control);
} else if (op_is_write(bio_op(bio)) &&
le32_to_cpu(desc->local_addr) >> 9 ==
card->init_size) {
card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
if (card->init_size >> 1 >= card->mm_size) {
dev_printk(KERN_INFO, &card->dev->dev,
"memory now initialised\n");
set_userbit(card, MEMORY_INITIALIZED, 1);
}
}
if (bio != page->bio) {
bio->bi_next = return_bio;
return_bio = bio;
}
if (last)
break;
}
if (debug & DEBUG_LED_ON_TRANSFER)
set_led(card, LED_REMOVE, LED_OFF);
if (card->check_batteries) {
card->check_batteries = 0;
check_batteries(card);
}
if (page->headcnt >= page->cnt) {
reset_page(page);
card->Active = -1;
activate(card);
} else {
/* haven't finished with this one yet */
pr_debug("do some more\n");
mm_start_io(card);
}
out_unlock:
spin_unlock_bh(&card->lock);
while (return_bio) {
struct bio *bio = return_bio;
return_bio = bio->bi_next;
bio->bi_next = NULL;
bio_endio(bio);
}
}
static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct cardinfo *card = cb->data;
spin_lock_irq(&card->lock);
activate(card);
spin_unlock_irq(&card->lock);
kfree(cb);
}
static int mm_check_plugged(struct cardinfo *card)
{
return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb));
}
static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
pr_debug("mm_make_request %llu %u\n",
block: Abstract out bvec iterator Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Boaz Harrosh <bharrosh@panasas.com> Cc: Benny Halevy <bhalevy@tonian.com> Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Chris Mason <chris.mason@fusionio.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@kernel.org> Cc: Joern Engel <joern@logfs.org> Cc: Prasad Joshi <prasadjoshi.linux@gmail.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Ben Myers <bpm@sgi.com> Cc: xfs@oss.sgi.com Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Cc: "Roger Pau Monné" <roger.pau@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchand@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Peng Tao <tao.peng@emc.com> Cc: Andy Adamson <andros@netapp.com> Cc: fanchaoting <fanchaoting@cn.fujitsu.com> Cc: Jie Liu <jeff.liu@oracle.com> Cc: Sunil Mushran <sunil.mushran@gmail.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Pankaj Kumar <pankaj.km@samsung.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Mel Gorman <mgorman@suse.de>6
2013-10-12 05:44:27 +07:00
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size);
block: make generic_make_request handle arbitrarily sized bios The way the block layer is currently written, it goes to great lengths to avoid having to split bios; upper layer code (such as bio_add_page()) checks what the underlying device can handle and tries to always create bios that don't need to be split. But this approach becomes unwieldy and eventually breaks down with stacked devices and devices with dynamic limits, and it adds a lot of complexity. If the block layer could split bios as needed, we could eliminate a lot of complexity elsewhere - particularly in stacked drivers. Code that creates bios can then create whatever size bios are convenient, and more importantly stacked drivers don't have to deal with both their own bio size limitations and the limitations of the (potentially multiple) devices underneath them. In the future this will let us delete merge_bvec_fn and a bunch of other code. We do this by adding calls to blk_queue_split() to the various make_request functions that need it - a few can already handle arbitrary size bios. Note that we add the call _after_ any call to blk_queue_bounce(); this means that blk_queue_split() and blk_recalc_rq_segments() don't need to be concerned with bouncing affecting segment merging. Some make_request_fn() callbacks were simple enough to audit and verify they don't need blk_queue_split() calls. The skipped ones are: * nfhd_make_request (arch/m68k/emu/nfblock.c) * axon_ram_make_request (arch/powerpc/sysdev/axonram.c) * simdisk_make_request (arch/xtensa/platforms/iss/simdisk.c) * brd_make_request (ramdisk - drivers/block/brd.c) * mtip_submit_request (drivers/block/mtip32xx/mtip32xx.c) * loop_make_request * null_queue_bio * bcache's make_request fns Some others are almost certainly safe to remove now, but will be left for future patches. Cc: Jens Axboe <axboe@kernel.dk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Ming Lei <ming.lei@canonical.com> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: drbd-user@lists.linbit.com Cc: Jiri Kosina <jkosina@suse.cz> Cc: Geoff Levand <geoff@infradead.org> Cc: Jim Paris <jim@jtan.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Oleg Drokin <oleg.drokin@intel.com> Cc: Andreas Dilger <andreas.dilger@intel.com> Acked-by: NeilBrown <neilb@suse.de> (for the 'md/md.c' bits) Acked-by: Mike Snitzer <snitzer@redhat.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> [dpark: skip more mq-based drivers, resolve merge conflicts, etc.] Signed-off-by: Dongsu Park <dpark@posteo.net> Signed-off-by: Ming Lin <ming.l@ssi.samsung.com> Signed-off-by: Jens Axboe <axboe@fb.com>
2015-04-24 12:37:18 +07:00
blk_queue_split(q, &bio, q->bio_split);
spin_lock_irq(&card->lock);
*card->biotail = bio;
bio->bi_next = NULL;
card->biotail = &bio->bi_next;
if (bio->bi_opf & REQ_SYNC || !mm_check_plugged(card))
activate(card);
spin_unlock_irq(&card->lock);
return BLK_QC_T_NONE;
}
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 20:55:46 +07:00
static irqreturn_t mm_interrupt(int irq, void *__card)
{
struct cardinfo *card = (struct cardinfo *) __card;
unsigned int dma_status;
unsigned short cfg_status;
HW_TRACE(0x30);
dma_status = le32_to_cpu(readl(card->csr_remap + DMA_STATUS_CTRL));
if (!(dma_status & (DMASCR_ERROR_MASK | DMASCR_CHAIN_COMPLETE))) {
/* interrupt wasn't for me ... */
return IRQ_NONE;
}
/* clear COMPLETION interrupts */
if (card->flags & UM_FLAG_NO_BYTE_STATUS)
writel(cpu_to_le32(DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE),
card->csr_remap + DMA_STATUS_CTRL);
else
writeb((DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE) >> 16,
card->csr_remap + DMA_STATUS_CTRL + 2);
/* log errors and clear interrupt status */
if (dma_status & DMASCR_ANY_ERR) {
unsigned int data_log1, data_log2;
unsigned int addr_log1, addr_log2;
unsigned char stat, count, syndrome, check;
stat = readb(card->csr_remap + MEMCTRLCMD_ERRSTATUS);
data_log1 = le32_to_cpu(readl(card->csr_remap +
ERROR_DATA_LOG));
data_log2 = le32_to_cpu(readl(card->csr_remap +
ERROR_DATA_LOG + 4));
addr_log1 = le32_to_cpu(readl(card->csr_remap +
ERROR_ADDR_LOG));
addr_log2 = readb(card->csr_remap + ERROR_ADDR_LOG + 4);
count = readb(card->csr_remap + ERROR_COUNT);
syndrome = readb(card->csr_remap + ERROR_SYNDROME);
check = readb(card->csr_remap + ERROR_CHECK);
dump_dmastat(card, dma_status);
if (stat & 0x01)
dev_printk(KERN_ERR, &card->dev->dev,
"Memory access error detected (err count %d)\n",
count);
if (stat & 0x02)
dev_printk(KERN_ERR, &card->dev->dev,
"Multi-bit EDC error\n");
dev_printk(KERN_ERR, &card->dev->dev,
"Fault Address 0x%02x%08x, Fault Data 0x%08x%08x\n",
addr_log2, addr_log1, data_log2, data_log1);
dev_printk(KERN_ERR, &card->dev->dev,
"Fault Check 0x%02x, Fault Syndrome 0x%02x\n",
check, syndrome);
writeb(0, card->csr_remap + ERROR_COUNT);
}
if (dma_status & DMASCR_PARITY_ERR_REP) {
dev_printk(KERN_ERR, &card->dev->dev,
"PARITY ERROR REPORTED\n");
pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
}
if (dma_status & DMASCR_PARITY_ERR_DET) {
dev_printk(KERN_ERR, &card->dev->dev,
"PARITY ERROR DETECTED\n");
pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
}
if (dma_status & DMASCR_SYSTEM_ERR_SIG) {
dev_printk(KERN_ERR, &card->dev->dev, "SYSTEM ERROR\n");
pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
}
if (dma_status & DMASCR_TARGET_ABT) {
dev_printk(KERN_ERR, &card->dev->dev, "TARGET ABORT\n");
pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
}
if (dma_status & DMASCR_MASTER_ABT) {
dev_printk(KERN_ERR, &card->dev->dev, "MASTER ABORT\n");
pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
}
/* and process the DMA descriptors */
card->dma_status = dma_status;
tasklet_schedule(&card->tasklet);
HW_TRACE(0x36);
return IRQ_HANDLED;
}
/*
* If both batteries are good, no LED
* If either battery has been warned, solid LED
* If both batteries are bad, flash the LED quickly
* If either battery is bad, flash the LED semi quickly
*/
static void set_fault_to_battery_status(struct cardinfo *card)
{
if (card->battery[0].good && card->battery[1].good)
set_led(card, LED_FAULT, LED_OFF);
else if (card->battery[0].warned || card->battery[1].warned)
set_led(card, LED_FAULT, LED_ON);
else if (!card->battery[0].good && !card->battery[1].good)
set_led(card, LED_FAULT, LED_FLASH_7_0);
else
set_led(card, LED_FAULT, LED_FLASH_3_5);
}
static void init_battery_timer(void);
static int check_battery(struct cardinfo *card, int battery, int status)
{
if (status != card->battery[battery].good) {
card->battery[battery].good = !card->battery[battery].good;
card->battery[battery].last_change = jiffies;
if (card->battery[battery].good) {
dev_printk(KERN_ERR, &card->dev->dev,
"Battery %d now good\n", battery + 1);
card->battery[battery].warned = 0;
} else
dev_printk(KERN_ERR, &card->dev->dev,
"Battery %d now FAILED\n", battery + 1);
return 1;
} else if (!card->battery[battery].good &&
!card->battery[battery].warned &&
time_after_eq(jiffies, card->battery[battery].last_change +
(HZ * 60 * 60 * 5))) {
dev_printk(KERN_ERR, &card->dev->dev,
"Battery %d still FAILED after 5 hours\n", battery + 1);
card->battery[battery].warned = 1;
return 1;
}
return 0;
}
static void check_batteries(struct cardinfo *card)
{
/* NOTE: this must *never* be called while the card
* is doing (bus-to-card) DMA, or you will need the
* reset switch
*/
unsigned char status;
int ret1, ret2;
status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
if (debug & DEBUG_BATTERY_POLLING)
dev_printk(KERN_DEBUG, &card->dev->dev,
"checking battery status, 1 = %s, 2 = %s\n",
(status & BATTERY_1_FAILURE) ? "FAILURE" : "OK",
(status & BATTERY_2_FAILURE) ? "FAILURE" : "OK");
ret1 = check_battery(card, 0, !(status & BATTERY_1_FAILURE));
ret2 = check_battery(card, 1, !(status & BATTERY_2_FAILURE));
if (ret1 || ret2)
set_fault_to_battery_status(card);
}
static void check_all_batteries(unsigned long ptr)
{
int i;
for (i = 0; i < num_cards; i++)
if (!(cards[i].flags & UM_FLAG_NO_BATT)) {
struct cardinfo *card = &cards[i];
spin_lock_bh(&card->lock);
if (card->Active >= 0)
card->check_batteries = 1;
else
check_batteries(card);
spin_unlock_bh(&card->lock);
}
init_battery_timer();
}
static void init_battery_timer(void)
{
init_timer(&battery_timer);
battery_timer.function = check_all_batteries;
battery_timer.expires = jiffies + (HZ * 60);
add_timer(&battery_timer);
}
static void del_battery_timer(void)
{
del_timer(&battery_timer);
}
/*
* Note no locks taken out here. In a worst case scenario, we could drop
* a chunk of system memory. But that should never happen, since validation
* happens at open or mount time, when locks are held.
*
* That's crap, since doing that while some partitions are opened
* or mounted will give you really nasty results.
*/
static int mm_revalidate(struct gendisk *disk)
{
struct cardinfo *card = disk->private_data;
set_capacity(disk, card->mm_size << 1);
return 0;
}
static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct cardinfo *card = bdev->bd_disk->private_data;
int size = card->mm_size * (1024 / MM_HARDSECT);
/*
* get geometry: we have to fake one... trim the size to a
* multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
* whatever cylinders.
*/
geo->heads = 64;
geo->sectors = 32;
geo->cylinders = size / (geo->heads * geo->sectors);
return 0;
}
static const struct block_device_operations mm_fops = {
.owner = THIS_MODULE,
.getgeo = mm_getgeo,
.revalidate_disk = mm_revalidate,
};
static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int ret = -ENODEV;
struct cardinfo *card = &cards[num_cards];
unsigned char mem_present;
unsigned char batt_status;
unsigned int saved_bar, data;
unsigned long csr_base;
unsigned long csr_len;
int magic_number;
static int printed_version;
if (!printed_version++)
printk(KERN_INFO DRIVER_VERSION " : " DRIVER_DESC "\n");
ret = pci_enable_device(dev);
if (ret)
return ret;
pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF8);
pci_set_master(dev);
card->dev = dev;
csr_base = pci_resource_start(dev, 0);
csr_len = pci_resource_len(dev, 0);
if (!csr_base || !csr_len)
return -ENODEV;
dev_printk(KERN_INFO, &dev->dev,
"Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n");
if (pci_set_dma_mask(dev, DMA_BIT_MASK(64)) &&
pci_set_dma_mask(dev, DMA_BIT_MASK(32))) {
dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n");
return -ENOMEM;
}
ret = pci_request_regions(dev, DRIVER_NAME);
if (ret) {
dev_printk(KERN_ERR, &card->dev->dev,
"Unable to request memory region\n");
goto failed_req_csr;
}
card->csr_remap = ioremap_nocache(csr_base, csr_len);
if (!card->csr_remap) {
dev_printk(KERN_ERR, &card->dev->dev,
"Unable to remap memory region\n");
ret = -ENOMEM;
goto failed_remap_csr;
}
dev_printk(KERN_INFO, &card->dev->dev,
"CSR 0x%08lx -> 0x%p (0x%lx)\n",
csr_base, card->csr_remap, csr_len);
switch (card->dev->device) {
case 0x5415:
card->flags |= UM_FLAG_NO_BYTE_STATUS | UM_FLAG_NO_BATTREG;
magic_number = 0x59;
break;
case 0x5425:
card->flags |= UM_FLAG_NO_BYTE_STATUS;
magic_number = 0x5C;
break;
case 0x6155:
card->flags |= UM_FLAG_NO_BYTE_STATUS |
UM_FLAG_NO_BATTREG | UM_FLAG_NO_BATT;
magic_number = 0x99;
break;
default:
magic_number = 0x100;
break;
}
if (readb(card->csr_remap + MEMCTRLSTATUS_MAGIC) != magic_number) {
dev_printk(KERN_ERR, &card->dev->dev, "Magic number invalid\n");
ret = -ENOMEM;
goto failed_magic;
}
card->mm_pages[0].desc = pci_alloc_consistent(card->dev,
PAGE_SIZE * 2,
&card->mm_pages[0].page_dma);
card->mm_pages[1].desc = pci_alloc_consistent(card->dev,
PAGE_SIZE * 2,
&card->mm_pages[1].page_dma);
if (card->mm_pages[0].desc == NULL ||
card->mm_pages[1].desc == NULL) {
dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
goto failed_alloc;
}
reset_page(&card->mm_pages[0]);
reset_page(&card->mm_pages[1]);
card->Ready = 0; /* page 0 is ready */
card->Active = -1; /* no page is active */
card->bio = NULL;
card->biotail = &card->bio;
card->queue = blk_alloc_queue(GFP_KERNEL);
if (!card->queue)
goto failed_alloc;
blk_queue_make_request(card->queue, mm_make_request);
umem: fix request_queue lock warning The umem driver issues two warnings on boot, due to blk_plug_device() and blk_remove_plug() being called without q->queue_lock held. Starting with e48ec690 (block: extend queue_flag bitops), the queue_flag_* functions warn if q->queue_lock doesn't appear to be locked. In fact, q->queue_lock is NULL (though that apparently isn't otherwise a problem as the driver is using card->lock for everything). Although blk_init_queue() with take a request_fn_proc and spinlock_t*, there isn't a corresponding init helper that takes a make_request_fn. Setting queue_lock to &card->lock explicitly seems to work fine for me. The warning goes away and the device appears to behave. [ 1.531881] v2.3 : Micro Memory(tm) PCI memory board block driver [ 1.538136] umem 0000:02:01.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20 [ 1.545018] umem 0000:02:01.0: Micro Memory(tm) controller found (PCI Mem Module (Battery Backup)) [ 1.554176] umem 0000:02:01.0: CSR 0xfc9ffc00 -> 0xffffc200013d0c00 (0x100) [ 1.561279] umem 0000:02:01.0: Size 1048576 KB, Battery 1 Disabled (FAILURE), Battery 2 Disabled (FAILURE) [ 1.571114] umem 0000:02:01.0: Window size 16777216 bytes, IRQ 20 [ 1.577304] umem 0000:02:01.0: memory NOT initialized. Consider over-writing whole device. [ 1.585989] umema:<4>------------[ cut here ]------------ [ 1.591775] WARNING: at include/linux/blkdev.h:492 blk_plug_device+0x6d/0x106() [ 1.592025] Hardware name: H8SSL [ 1.592025] Modules linked in: [ 1.592025] Pid: 1, comm: swapper Not tainted 2.6.29 #8 [ 1.592025] Call Trace: [ 1.592025] [<ffffffff8023c994>] warn_slowpath+0xd3/0xf2 [ 1.592025] [<ffffffff8025a5b5>] ? save_trace+0x3f/0x9b [ 1.592025] [<ffffffff8025a68b>] ? add_lock_to_list+0x7a/0xba [ 1.592025] [<ffffffff8025e609>] ? validate_chain+0xb3b/0xce8 [ 1.592025] [<ffffffff80441556>] ? mm_make_request+0x27/0x59 [ 1.592025] [<ffffffff80441556>] ? mm_make_request+0x27/0x59 [ 1.592025] [<ffffffff8025ef04>] ? __lock_acquire+0x74e/0x7b9 [ 1.592025] [<ffffffff8025a70e>] ? get_lock_stats+0x34/0x5e [ 1.592025] [<ffffffff8025a746>] ? put_lock_stats+0xe/0x27 [ 1.592025] [<ffffffff80441556>] ? mm_make_request+0x27/0x59 [ 1.592025] [<ffffffff803ad165>] blk_plug_device+0x6d/0x106 [ 1.592025] [<ffffffff80441575>] mm_make_request+0x46/0x59 [ 1.592025] [<ffffffff803ac2d9>] generic_make_request+0x335/0x3cf [ 1.592025] [<ffffffff8027fcc7>] ? mempool_alloc_slab+0x11/0x13 [ 1.592025] [<ffffffff8027fdce>] ? mempool_alloc+0x45/0x101 [ 1.592025] [<ffffffff8025a746>] ? put_lock_stats+0xe/0x27 [ 1.592025] [<ffffffff803adda5>] submit_bio+0x10a/0x119 [ 1.592025] [<ffffffff802c8d00>] submit_bh+0xe5/0x109 [ 1.592025] [<ffffffff802cbf43>] block_read_full_page+0x2aa/0x2cb [ 1.592025] [<ffffffff802cf4c4>] ? blkdev_get_block+0x0/0x4c [ 1.592025] [<ffffffff805c90a8>] ? _spin_unlock_irq+0x36/0x51 [ 1.592025] [<ffffffff80286836>] ? __lru_cache_add+0x92/0xb2 [ 1.592025] [<ffffffff802cf008>] blkdev_readpage+0x13/0x15 [ 1.592025] [<ffffffff8027de06>] read_cache_page_async+0x90/0x134 [ 1.592025] [<ffffffff802ceff5>] ? blkdev_readpage+0x0/0x15 [ 1.592025] [<ffffffff802f5f1c>] ? adfspart_check_ICS+0x0/0x16c [ 1.592025] [<ffffffff8027deb8>] read_cache_page+0xe/0x45 [ 1.592025] [<ffffffff802f5170>] read_dev_sector+0x2e/0x93 [ 1.592025] [<ffffffff802f5f44>] adfspart_check_ICS+0x28/0x16c [ 1.592025] [<ffffffff8025d427>] ? trace_hardirqs_on+0xd/0xf [ 1.592025] [<ffffffff802f5f1c>] ? adfspart_check_ICS+0x0/0x16c [ 1.592025] [<ffffffff802f59c5>] rescan_partitions+0x168/0x2fb [ 1.592025] [<ffffffff802ceae9>] __blkdev_get+0x259/0x336 [ 1.592025] [<ffffffff803ca1e2>] ? kobject_put+0x47/0x4b [ 1.592025] [<ffffffff802cebd1>] blkdev_get+0xb/0xd [ 1.592025] [<ffffffff802f5773>] register_disk+0xc4/0x12b [ 1.592025] [<ffffffff803b2a7b>] add_disk+0xc3/0x12d [ 1.592025] [<ffffffff808a1d4a>] ? mm_init+0x0/0x1a5 [ 1.592025] [<ffffffff808a1e73>] mm_init+0x129/0x1a5 [ 1.592025] [<ffffffff808a1d4a>] ? mm_init+0x0/0x1a5 [ 1.592025] [<ffffffff80209056>] _stext+0x56/0x130 [ 1.592025] [<ffffffff80274932>] ? register_irq_proc+0xae/0xca [ 1.592025] [<ffffffff802f0000>] ? proc_pid_lookup+0xb4/0x18b [ 1.592025] [<ffffffff8087f975>] kernel_init+0x132/0x18b [ 1.592025] [<ffffffff8020d17a>] child_rip+0xa/0x20 [ 1.592025] [<ffffffff8020cb40>] ? restore_args+0x0/0x30 [ 1.592025] [<ffffffff8087f843>] ? kernel_init+0x0/0x18b [ 1.592025] [<ffffffff8020d170>] ? child_rip+0x0/0x20 [ 1.592025] ---[ end trace 7150b3b86da74e1e ]--- [ 1.889858] ------------[ cut here ]------------[ve_plug+0x5f/0x91() [ 1.893848] Hardware name: H8SSL [ 1.893848] Modules linked in: [ 1.893848] Pid: 1, comm: swapper Tainted: G W 2.6.29 #8 [ 1.893848] Call Trace: [ 1.893848] [<ffffffff8023c994>] warn_slowpath+0xd3/0xf2 [ 1.893848] [<ffffffff805c8411>] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 1.893848] [<ffffffff8020cb40>] ? restore_args+0x0/0x30 [ 1.893848] [<ffffffff80254245>] ? __atomic_notifier_call_chain+0x0/0xb2 [ 1.893848] [<ffffffff805c90a3>] ? _spin_unlock_irq+0x31/0x51 [ 1.893848] [<ffffffff805c90bf>] ? _spin_unlock_irq+0x4d/0x51 [ 1.893848] [<ffffffff8044157d>] ? mm_make_request+0x4e/0x59 [ 1.893848] [<ffffffff8025a70e>] ? get_lock_stats+0x34/0x5e [ 1.893848] [<ffffffff8025a75d>] ? put_lock_stats+0x25/0x27 [ 1.893848] [<ffffffff80441504>] ? mm_unplug_device+0x25/0x50 [ 1.893848] [<ffffffff803acf23>] blk_remove_plug+0x5f/0x91 [ 1.893848] [<ffffffff8044150f>] mm_unplug_device+0x30/0x50 [ 1.893848] [<ffffffff803ab74a>] blk_unplug+0x78/0x7d [ 1.893848] [<ffffffff803ab75c>] blk_backing_dev_unplug+0xd/0xf [ 1.893848] [<ffffffff802c853c>] block_sync_page+0x4a/0x4c [ 1.893848] [<ffffffff8027da1c>] sync_page+0x44/0x4d [ 1.893848] [<ffffffff805c66fd>] __wait_on_bit_lock+0x42/0x8a [ 1.893848] [<ffffffff8027d9d8>] ? sync_page+0x0/0x4d [ 1.893848] [<ffffffff8027d9c4>] __lock_page+0x64/0x6b [ 1.893848] [<ffffffff802508db>] ? wake_bit_function+0x0/0x2a [ 1.893848] [<ffffffff8027de4a>] read_cache_page_async+0xd4/0x134 [ 1.893848] [<ffffffff802ceff5>] ? blkdev_readpage+0x0/0x15 [ 1.893848] [<ffffffff802f5f1c>] ? adfspart_check_ICS+0x0/0x16c [ 1.893848] [<ffffffff8027deb8>] read_cache_page+0xe/0x45 [ 1.893848] [<ffffffff802f5170>] read_dev_sector+0x2e/0x93 [ 1.893848] [<ffffffff802f5f44>] adfspart_check_ICS+0x28/0x16c [ 1.893848] [<ffffffff8025d427>] ? trace_hardirqs_on+0xd/0xf [ 1.893848] [<ffffffff802f5f1c>] ? adfspart_check_ICS+0x0/0x16c [ 1.893848] [<ffffffff802f59c5>] rescan_partitions+0x168/0x2fb [ 1.893848] [<ffffffff802ceae9>] __blkdev_get+0x259/0x336 [ 1.893848] [<ffffffff803ca1e2>] ? kobject_put+0x47/0x4b [ 1.893848] [<ffffffff802cebd1>] blkdev_get+0xb/0xd [ 1.893848] [<ffffffff802f5773>] register_disk+0xc4/0x12b [ 1.893848] [<ffffffff803b2a7b>] add_disk+0xc3/0x12d [ 1.893848] [<ffffffff808a1d4a>] ? mm_init+0x0/0x1a5 [ 1.893848] [<ffffffff808a1e73>] mm_init+0x129/0x1a5 [ 1.893848] [<ffffffff808a1d4a>] ? mm_init+0x0/0x1a5 [ 1.893848] [<ffffffff80209056>] _stext+0x56/0x130 [ 1.893848] [<ffffffff80274932>] ? register_irq_proc+0xae/0xca [ 1.893848] [<ffffffff802f0000>] ? proc_pid_lookup+0xb4/0x18b [ 1.893848] [<ffffffff8087f975>] kernel_init+0x132/0x18b [ 1.893848] [<ffffffff8020d17a>] child_rip+0xa/0x20 [ 1.893848] [<ffffffff8020cb40>] ? restore_args+0x0/0x30 [ 1.893848] [<ffffffff8087f843>] ? kernel_init+0x0/0x18b [ 1.893848] [<ffffffff8020d170>] ? child_rip+0x0/0x20 [ 1.893848] ---[ end trace 7150b3b86da74e1f ]--- Signed-off-by: Sage Weil <sage@newdream.net> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-04-23 13:37:58 +07:00
card->queue->queue_lock = &card->lock;
card->queue->queuedata = card;
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
card->check_batteries = 0;
mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY);
switch (mem_present) {
case MEM_128_MB:
card->mm_size = 1024 * 128;
break;
case MEM_256_MB:
card->mm_size = 1024 * 256;
break;
case MEM_512_MB:
card->mm_size = 1024 * 512;
break;
case MEM_1_GB:
card->mm_size = 1024 * 1024;
break;
case MEM_2_GB:
card->mm_size = 1024 * 2048;
break;
default:
card->mm_size = 0;
break;
}
/* Clear the LED's we control */
set_led(card, LED_REMOVE, LED_OFF);
set_led(card, LED_FAULT, LED_OFF);
batt_status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
card->battery[0].good = !(batt_status & BATTERY_1_FAILURE);
card->battery[1].good = !(batt_status & BATTERY_2_FAILURE);
card->battery[0].last_change = card->battery[1].last_change = jiffies;
if (card->flags & UM_FLAG_NO_BATT)
dev_printk(KERN_INFO, &card->dev->dev,
"Size %d KB\n", card->mm_size);
else {
dev_printk(KERN_INFO, &card->dev->dev,
"Size %d KB, Battery 1 %s (%s), Battery 2 %s (%s)\n",
card->mm_size,
batt_status & BATTERY_1_DISABLED ? "Disabled" : "Enabled",
card->battery[0].good ? "OK" : "FAILURE",
batt_status & BATTERY_2_DISABLED ? "Disabled" : "Enabled",
card->battery[1].good ? "OK" : "FAILURE");
set_fault_to_battery_status(card);
}
pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &saved_bar);
data = 0xffffffff;
pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, data);
pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &data);
pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, saved_bar);
data &= 0xfffffff0;
data = ~data;
data += 1;
if (request_irq(dev->irq, mm_interrupt, IRQF_SHARED, DRIVER_NAME,
card)) {
dev_printk(KERN_ERR, &card->dev->dev,
"Unable to allocate IRQ\n");
ret = -ENODEV;
goto failed_req_irq;
}
dev_printk(KERN_INFO, &card->dev->dev,
"Window size %d bytes, IRQ %d\n", data, dev->irq);
spin_lock_init(&card->lock);
pci_set_drvdata(dev, card);
if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
pci_write_cmd = 0x07; /* then Memory Write command */
if (pci_write_cmd & 0x08) { /* use Memory Write and Invalidate */
unsigned short cfg_command;
pci_read_config_word(dev, PCI_COMMAND, &cfg_command);
cfg_command |= 0x10; /* Memory Write & Invalidate Enable */
pci_write_config_word(dev, PCI_COMMAND, cfg_command);
}
pci_cmds = (pci_read_cmd << 28) | (pci_write_cmd << 24);
num_cards++;
if (!get_userbit(card, MEMORY_INITIALIZED)) {
dev_printk(KERN_INFO, &card->dev->dev,
"memory NOT initialized. Consider over-writing whole device.\n");
card->init_size = 0;
} else {
dev_printk(KERN_INFO, &card->dev->dev,
"memory already initialized\n");
card->init_size = card->mm_size;
}
/* Enable ECC */
writeb(EDC_STORE_CORRECT, card->csr_remap + MEMCTRLCMD_ERRCTRL);
return 0;
failed_req_irq:
failed_alloc:
if (card->mm_pages[0].desc)
pci_free_consistent(card->dev, PAGE_SIZE*2,
card->mm_pages[0].desc,
card->mm_pages[0].page_dma);
if (card->mm_pages[1].desc)
pci_free_consistent(card->dev, PAGE_SIZE*2,
card->mm_pages[1].desc,
card->mm_pages[1].page_dma);
failed_magic:
iounmap(card->csr_remap);
failed_remap_csr:
pci_release_regions(dev);
failed_req_csr:
return ret;
}
static void mm_pci_remove(struct pci_dev *dev)
{
struct cardinfo *card = pci_get_drvdata(dev);
tasklet_kill(&card->tasklet);
free_irq(dev->irq, card);
iounmap(card->csr_remap);
if (card->mm_pages[0].desc)
pci_free_consistent(card->dev, PAGE_SIZE*2,
card->mm_pages[0].desc,
card->mm_pages[0].page_dma);
if (card->mm_pages[1].desc)
pci_free_consistent(card->dev, PAGE_SIZE*2,
card->mm_pages[1].desc,
card->mm_pages[1].page_dma);
blk_cleanup_queue(card->queue);
pci_release_regions(dev);
pci_disable_device(dev);
}
static const struct pci_device_id mm_pci_ids[] = {
{PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5415CN)},
{PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5425CN)},
{PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_6155)},
{
.vendor = 0x8086,
.device = 0xB555,
.subvendor = 0x1332,
.subdevice = 0x5460,
.class = 0x050000,
.class_mask = 0,
}, { /* end: all zeroes */ }
};
MODULE_DEVICE_TABLE(pci, mm_pci_ids);
static struct pci_driver mm_pci_driver = {
.name = DRIVER_NAME,
.id_table = mm_pci_ids,
.probe = mm_pci_probe,
.remove = mm_pci_remove,
};
static int __init mm_init(void)
{
int retval, i;
int err;
retval = pci_register_driver(&mm_pci_driver);
if (retval)
return -ENOMEM;
err = major_nr = register_blkdev(0, DRIVER_NAME);
if (err < 0) {
pci_unregister_driver(&mm_pci_driver);
return -EIO;
}
for (i = 0; i < num_cards; i++) {
mm_gendisk[i] = alloc_disk(1 << MM_SHIFT);
if (!mm_gendisk[i])
goto out;
}
for (i = 0; i < num_cards; i++) {
struct gendisk *disk = mm_gendisk[i];
sprintf(disk->disk_name, "umem%c", 'a'+i);
spin_lock_init(&cards[i].lock);
disk->major = major_nr;
disk->first_minor = i << MM_SHIFT;
disk->fops = &mm_fops;
disk->private_data = &cards[i];
disk->queue = cards[i].queue;
set_capacity(disk, cards[i].mm_size << 1);
add_disk(disk);
}
init_battery_timer();
printk(KERN_INFO "MM: desc_per_page = %ld\n", DESC_PER_PAGE);
/* printk("mm_init: Done. 10-19-01 9:00\n"); */
return 0;
out:
pci_unregister_driver(&mm_pci_driver);
unregister_blkdev(major_nr, DRIVER_NAME);
while (i--)
put_disk(mm_gendisk[i]);
return -ENOMEM;
}
static void __exit mm_cleanup(void)
{
int i;
del_battery_timer();
for (i = 0; i < num_cards ; i++) {
del_gendisk(mm_gendisk[i]);
put_disk(mm_gendisk[i]);
}
pci_unregister_driver(&mm_pci_driver);
unregister_blkdev(major_nr, DRIVER_NAME);
}
module_init(mm_init);
module_exit(mm_cleanup);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");