mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-26 01:29:32 +07:00
b3e19d924b
The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
1218 lines
26 KiB
C
1218 lines
26 KiB
C
/*
|
|
* Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
*/
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/err.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/blkpg.h>
|
|
#include <linux/mtd/mtd.h>
|
|
#include <linux/mtd/partitions.h>
|
|
#include <linux/mtd/map.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#define MTD_INODE_FS_MAGIC 0x11307854
|
|
static DEFINE_MUTEX(mtd_mutex);
|
|
static struct vfsmount *mtd_inode_mnt __read_mostly;
|
|
|
|
/*
|
|
* Data structure to hold the pointer to the mtd device as well
|
|
* as mode information ofr various use cases.
|
|
*/
|
|
struct mtd_file_info {
|
|
struct mtd_info *mtd;
|
|
struct inode *ino;
|
|
enum mtd_file_modes mode;
|
|
};
|
|
|
|
static loff_t mtd_lseek (struct file *file, loff_t offset, int orig)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
|
|
switch (orig) {
|
|
case SEEK_SET:
|
|
break;
|
|
case SEEK_CUR:
|
|
offset += file->f_pos;
|
|
break;
|
|
case SEEK_END:
|
|
offset += mtd->size;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (offset >= 0 && offset <= mtd->size)
|
|
return file->f_pos = offset;
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
|
|
|
|
static int mtd_open(struct inode *inode, struct file *file)
|
|
{
|
|
int minor = iminor(inode);
|
|
int devnum = minor >> 1;
|
|
int ret = 0;
|
|
struct mtd_info *mtd;
|
|
struct mtd_file_info *mfi;
|
|
struct inode *mtd_ino;
|
|
|
|
DEBUG(MTD_DEBUG_LEVEL0, "MTD_open\n");
|
|
|
|
/* You can't open the RO devices RW */
|
|
if ((file->f_mode & FMODE_WRITE) && (minor & 1))
|
|
return -EACCES;
|
|
|
|
mutex_lock(&mtd_mutex);
|
|
mtd = get_mtd_device(NULL, devnum);
|
|
|
|
if (IS_ERR(mtd)) {
|
|
ret = PTR_ERR(mtd);
|
|
goto out;
|
|
}
|
|
|
|
if (mtd->type == MTD_ABSENT) {
|
|
put_mtd_device(mtd);
|
|
ret = -ENODEV;
|
|
goto out;
|
|
}
|
|
|
|
mtd_ino = iget_locked(mtd_inode_mnt->mnt_sb, devnum);
|
|
if (!mtd_ino) {
|
|
put_mtd_device(mtd);
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
if (mtd_ino->i_state & I_NEW) {
|
|
mtd_ino->i_private = mtd;
|
|
mtd_ino->i_mode = S_IFCHR;
|
|
mtd_ino->i_data.backing_dev_info = mtd->backing_dev_info;
|
|
unlock_new_inode(mtd_ino);
|
|
}
|
|
file->f_mapping = mtd_ino->i_mapping;
|
|
|
|
/* You can't open it RW if it's not a writeable device */
|
|
if ((file->f_mode & FMODE_WRITE) && !(mtd->flags & MTD_WRITEABLE)) {
|
|
iput(mtd_ino);
|
|
put_mtd_device(mtd);
|
|
ret = -EACCES;
|
|
goto out;
|
|
}
|
|
|
|
mfi = kzalloc(sizeof(*mfi), GFP_KERNEL);
|
|
if (!mfi) {
|
|
iput(mtd_ino);
|
|
put_mtd_device(mtd);
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
mfi->ino = mtd_ino;
|
|
mfi->mtd = mtd;
|
|
file->private_data = mfi;
|
|
|
|
out:
|
|
mutex_unlock(&mtd_mutex);
|
|
return ret;
|
|
} /* mtd_open */
|
|
|
|
/*====================================================================*/
|
|
|
|
static int mtd_close(struct inode *inode, struct file *file)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
|
|
DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n");
|
|
|
|
/* Only sync if opened RW */
|
|
if ((file->f_mode & FMODE_WRITE) && mtd->sync)
|
|
mtd->sync(mtd);
|
|
|
|
iput(mfi->ino);
|
|
|
|
put_mtd_device(mtd);
|
|
file->private_data = NULL;
|
|
kfree(mfi);
|
|
|
|
return 0;
|
|
} /* mtd_close */
|
|
|
|
/* FIXME: This _really_ needs to die. In 2.5, we should lock the
|
|
userspace buffer down and use it directly with readv/writev.
|
|
*/
|
|
#define MAX_KMALLOC_SIZE 0x20000
|
|
|
|
static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
size_t retlen=0;
|
|
size_t total_retlen=0;
|
|
int ret=0;
|
|
int len;
|
|
char *kbuf;
|
|
|
|
DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
|
|
|
|
if (*ppos + count > mtd->size)
|
|
count = mtd->size - *ppos;
|
|
|
|
if (!count)
|
|
return 0;
|
|
|
|
/* FIXME: Use kiovec in 2.5 to lock down the user's buffers
|
|
and pass them directly to the MTD functions */
|
|
|
|
if (count > MAX_KMALLOC_SIZE)
|
|
kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
|
|
else
|
|
kbuf=kmalloc(count, GFP_KERNEL);
|
|
|
|
if (!kbuf)
|
|
return -ENOMEM;
|
|
|
|
while (count) {
|
|
|
|
if (count > MAX_KMALLOC_SIZE)
|
|
len = MAX_KMALLOC_SIZE;
|
|
else
|
|
len = count;
|
|
|
|
switch (mfi->mode) {
|
|
case MTD_MODE_OTP_FACTORY:
|
|
ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, kbuf);
|
|
break;
|
|
case MTD_MODE_OTP_USER:
|
|
ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
|
|
break;
|
|
case MTD_MODE_RAW:
|
|
{
|
|
struct mtd_oob_ops ops;
|
|
|
|
ops.mode = MTD_OOB_RAW;
|
|
ops.datbuf = kbuf;
|
|
ops.oobbuf = NULL;
|
|
ops.len = len;
|
|
|
|
ret = mtd->read_oob(mtd, *ppos, &ops);
|
|
retlen = ops.retlen;
|
|
break;
|
|
}
|
|
default:
|
|
ret = mtd->read(mtd, *ppos, len, &retlen, kbuf);
|
|
}
|
|
/* Nand returns -EBADMSG on ecc errors, but it returns
|
|
* the data. For our userspace tools it is important
|
|
* to dump areas with ecc errors !
|
|
* For kernel internal usage it also might return -EUCLEAN
|
|
* to signal the caller that a bitflip has occured and has
|
|
* been corrected by the ECC algorithm.
|
|
* Userspace software which accesses NAND this way
|
|
* must be aware of the fact that it deals with NAND
|
|
*/
|
|
if (!ret || (ret == -EUCLEAN) || (ret == -EBADMSG)) {
|
|
*ppos += retlen;
|
|
if (copy_to_user(buf, kbuf, retlen)) {
|
|
kfree(kbuf);
|
|
return -EFAULT;
|
|
}
|
|
else
|
|
total_retlen += retlen;
|
|
|
|
count -= retlen;
|
|
buf += retlen;
|
|
if (retlen == 0)
|
|
count = 0;
|
|
}
|
|
else {
|
|
kfree(kbuf);
|
|
return ret;
|
|
}
|
|
|
|
}
|
|
|
|
kfree(kbuf);
|
|
return total_retlen;
|
|
} /* mtd_read */
|
|
|
|
static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count,loff_t *ppos)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
char *kbuf;
|
|
size_t retlen;
|
|
size_t total_retlen=0;
|
|
int ret=0;
|
|
int len;
|
|
|
|
DEBUG(MTD_DEBUG_LEVEL0,"MTD_write\n");
|
|
|
|
if (*ppos == mtd->size)
|
|
return -ENOSPC;
|
|
|
|
if (*ppos + count > mtd->size)
|
|
count = mtd->size - *ppos;
|
|
|
|
if (!count)
|
|
return 0;
|
|
|
|
if (count > MAX_KMALLOC_SIZE)
|
|
kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
|
|
else
|
|
kbuf=kmalloc(count, GFP_KERNEL);
|
|
|
|
if (!kbuf)
|
|
return -ENOMEM;
|
|
|
|
while (count) {
|
|
|
|
if (count > MAX_KMALLOC_SIZE)
|
|
len = MAX_KMALLOC_SIZE;
|
|
else
|
|
len = count;
|
|
|
|
if (copy_from_user(kbuf, buf, len)) {
|
|
kfree(kbuf);
|
|
return -EFAULT;
|
|
}
|
|
|
|
switch (mfi->mode) {
|
|
case MTD_MODE_OTP_FACTORY:
|
|
ret = -EROFS;
|
|
break;
|
|
case MTD_MODE_OTP_USER:
|
|
if (!mtd->write_user_prot_reg) {
|
|
ret = -EOPNOTSUPP;
|
|
break;
|
|
}
|
|
ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
|
|
break;
|
|
|
|
case MTD_MODE_RAW:
|
|
{
|
|
struct mtd_oob_ops ops;
|
|
|
|
ops.mode = MTD_OOB_RAW;
|
|
ops.datbuf = kbuf;
|
|
ops.oobbuf = NULL;
|
|
ops.len = len;
|
|
|
|
ret = mtd->write_oob(mtd, *ppos, &ops);
|
|
retlen = ops.retlen;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf);
|
|
}
|
|
if (!ret) {
|
|
*ppos += retlen;
|
|
total_retlen += retlen;
|
|
count -= retlen;
|
|
buf += retlen;
|
|
}
|
|
else {
|
|
kfree(kbuf);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
kfree(kbuf);
|
|
return total_retlen;
|
|
} /* mtd_write */
|
|
|
|
/*======================================================================
|
|
|
|
IOCTL calls for getting device parameters.
|
|
|
|
======================================================================*/
|
|
static void mtdchar_erase_callback (struct erase_info *instr)
|
|
{
|
|
wake_up((wait_queue_head_t *)instr->priv);
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_MTD_OTP
|
|
static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
|
|
{
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
int ret = 0;
|
|
|
|
switch (mode) {
|
|
case MTD_OTP_FACTORY:
|
|
if (!mtd->read_fact_prot_reg)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
mfi->mode = MTD_MODE_OTP_FACTORY;
|
|
break;
|
|
case MTD_OTP_USER:
|
|
if (!mtd->read_fact_prot_reg)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
mfi->mode = MTD_MODE_OTP_USER;
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
case MTD_OTP_OFF:
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
#else
|
|
# define otp_select_filemode(f,m) -EOPNOTSUPP
|
|
#endif
|
|
|
|
static int mtd_do_writeoob(struct file *file, struct mtd_info *mtd,
|
|
uint64_t start, uint32_t length, void __user *ptr,
|
|
uint32_t __user *retp)
|
|
{
|
|
struct mtd_oob_ops ops;
|
|
uint32_t retlen;
|
|
int ret = 0;
|
|
|
|
if (!(file->f_mode & FMODE_WRITE))
|
|
return -EPERM;
|
|
|
|
if (length > 4096)
|
|
return -EINVAL;
|
|
|
|
if (!mtd->write_oob)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
ret = access_ok(VERIFY_READ, ptr, length) ? 0 : -EFAULT;
|
|
|
|
if (ret)
|
|
return ret;
|
|
|
|
ops.ooblen = length;
|
|
ops.ooboffs = start & (mtd->oobsize - 1);
|
|
ops.datbuf = NULL;
|
|
ops.mode = MTD_OOB_PLACE;
|
|
|
|
if (ops.ooboffs && ops.ooblen > (mtd->oobsize - ops.ooboffs))
|
|
return -EINVAL;
|
|
|
|
ops.oobbuf = memdup_user(ptr, length);
|
|
if (IS_ERR(ops.oobbuf))
|
|
return PTR_ERR(ops.oobbuf);
|
|
|
|
start &= ~((uint64_t)mtd->oobsize - 1);
|
|
ret = mtd->write_oob(mtd, start, &ops);
|
|
|
|
if (ops.oobretlen > 0xFFFFFFFFU)
|
|
ret = -EOVERFLOW;
|
|
retlen = ops.oobretlen;
|
|
if (copy_to_user(retp, &retlen, sizeof(length)))
|
|
ret = -EFAULT;
|
|
|
|
kfree(ops.oobbuf);
|
|
return ret;
|
|
}
|
|
|
|
static int mtd_do_readoob(struct mtd_info *mtd, uint64_t start,
|
|
uint32_t length, void __user *ptr, uint32_t __user *retp)
|
|
{
|
|
struct mtd_oob_ops ops;
|
|
int ret = 0;
|
|
|
|
if (length > 4096)
|
|
return -EINVAL;
|
|
|
|
if (!mtd->read_oob)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
ret = access_ok(VERIFY_WRITE, ptr,
|
|
length) ? 0 : -EFAULT;
|
|
if (ret)
|
|
return ret;
|
|
|
|
ops.ooblen = length;
|
|
ops.ooboffs = start & (mtd->oobsize - 1);
|
|
ops.datbuf = NULL;
|
|
ops.mode = MTD_OOB_PLACE;
|
|
|
|
if (ops.ooboffs && ops.ooblen > (mtd->oobsize - ops.ooboffs))
|
|
return -EINVAL;
|
|
|
|
ops.oobbuf = kmalloc(length, GFP_KERNEL);
|
|
if (!ops.oobbuf)
|
|
return -ENOMEM;
|
|
|
|
start &= ~((uint64_t)mtd->oobsize - 1);
|
|
ret = mtd->read_oob(mtd, start, &ops);
|
|
|
|
if (put_user(ops.oobretlen, retp))
|
|
ret = -EFAULT;
|
|
else if (ops.oobretlen && copy_to_user(ptr, ops.oobbuf,
|
|
ops.oobretlen))
|
|
ret = -EFAULT;
|
|
|
|
kfree(ops.oobbuf);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Copies (and truncates, if necessary) data from the larger struct,
|
|
* nand_ecclayout, to the smaller, deprecated layout struct,
|
|
* nand_ecclayout_user. This is necessary only to suppport the deprecated
|
|
* API ioctl ECCGETLAYOUT while allowing all new functionality to use
|
|
* nand_ecclayout flexibly (i.e. the struct may change size in new
|
|
* releases without requiring major rewrites).
|
|
*/
|
|
static int shrink_ecclayout(const struct nand_ecclayout *from,
|
|
struct nand_ecclayout_user *to)
|
|
{
|
|
int i;
|
|
|
|
if (!from || !to)
|
|
return -EINVAL;
|
|
|
|
memset(to, 0, sizeof(*to));
|
|
|
|
to->eccbytes = min((int)from->eccbytes, MTD_MAX_ECCPOS_ENTRIES);
|
|
for (i = 0; i < to->eccbytes; i++)
|
|
to->eccpos[i] = from->eccpos[i];
|
|
|
|
for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES; i++) {
|
|
if (from->oobfree[i].length == 0 &&
|
|
from->oobfree[i].offset == 0)
|
|
break;
|
|
to->oobavail += from->oobfree[i].length;
|
|
to->oobfree[i] = from->oobfree[i];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_MTD_PARTITIONS
|
|
static int mtd_blkpg_ioctl(struct mtd_info *mtd,
|
|
struct blkpg_ioctl_arg __user *arg)
|
|
{
|
|
struct blkpg_ioctl_arg a;
|
|
struct blkpg_partition p;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
/* Only master mtd device must be used to control partitions */
|
|
if (!mtd_is_master(mtd))
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
|
|
return -EFAULT;
|
|
|
|
if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
|
|
return -EFAULT;
|
|
|
|
switch (a.op) {
|
|
case BLKPG_ADD_PARTITION:
|
|
|
|
return mtd_add_partition(mtd, p.devname, p.start, p.length);
|
|
|
|
case BLKPG_DEL_PARTITION:
|
|
|
|
if (p.pno < 0)
|
|
return -EINVAL;
|
|
|
|
return mtd_del_partition(mtd, p.pno);
|
|
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
void __user *argp = (void __user *)arg;
|
|
int ret = 0;
|
|
u_long size;
|
|
struct mtd_info_user info;
|
|
|
|
DEBUG(MTD_DEBUG_LEVEL0, "MTD_ioctl\n");
|
|
|
|
size = (cmd & IOCSIZE_MASK) >> IOCSIZE_SHIFT;
|
|
if (cmd & IOC_IN) {
|
|
if (!access_ok(VERIFY_READ, argp, size))
|
|
return -EFAULT;
|
|
}
|
|
if (cmd & IOC_OUT) {
|
|
if (!access_ok(VERIFY_WRITE, argp, size))
|
|
return -EFAULT;
|
|
}
|
|
|
|
switch (cmd) {
|
|
case MEMGETREGIONCOUNT:
|
|
if (copy_to_user(argp, &(mtd->numeraseregions), sizeof(int)))
|
|
return -EFAULT;
|
|
break;
|
|
|
|
case MEMGETREGIONINFO:
|
|
{
|
|
uint32_t ur_idx;
|
|
struct mtd_erase_region_info *kr;
|
|
struct region_info_user __user *ur = argp;
|
|
|
|
if (get_user(ur_idx, &(ur->regionindex)))
|
|
return -EFAULT;
|
|
|
|
if (ur_idx >= mtd->numeraseregions)
|
|
return -EINVAL;
|
|
|
|
kr = &(mtd->eraseregions[ur_idx]);
|
|
|
|
if (put_user(kr->offset, &(ur->offset))
|
|
|| put_user(kr->erasesize, &(ur->erasesize))
|
|
|| put_user(kr->numblocks, &(ur->numblocks)))
|
|
return -EFAULT;
|
|
|
|
break;
|
|
}
|
|
|
|
case MEMGETINFO:
|
|
info.type = mtd->type;
|
|
info.flags = mtd->flags;
|
|
info.size = mtd->size;
|
|
info.erasesize = mtd->erasesize;
|
|
info.writesize = mtd->writesize;
|
|
info.oobsize = mtd->oobsize;
|
|
/* The below fields are obsolete */
|
|
info.ecctype = -1;
|
|
info.eccsize = 0;
|
|
if (copy_to_user(argp, &info, sizeof(struct mtd_info_user)))
|
|
return -EFAULT;
|
|
break;
|
|
|
|
case MEMERASE:
|
|
case MEMERASE64:
|
|
{
|
|
struct erase_info *erase;
|
|
|
|
if(!(file->f_mode & FMODE_WRITE))
|
|
return -EPERM;
|
|
|
|
erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL);
|
|
if (!erase)
|
|
ret = -ENOMEM;
|
|
else {
|
|
wait_queue_head_t waitq;
|
|
DECLARE_WAITQUEUE(wait, current);
|
|
|
|
init_waitqueue_head(&waitq);
|
|
|
|
if (cmd == MEMERASE64) {
|
|
struct erase_info_user64 einfo64;
|
|
|
|
if (copy_from_user(&einfo64, argp,
|
|
sizeof(struct erase_info_user64))) {
|
|
kfree(erase);
|
|
return -EFAULT;
|
|
}
|
|
erase->addr = einfo64.start;
|
|
erase->len = einfo64.length;
|
|
} else {
|
|
struct erase_info_user einfo32;
|
|
|
|
if (copy_from_user(&einfo32, argp,
|
|
sizeof(struct erase_info_user))) {
|
|
kfree(erase);
|
|
return -EFAULT;
|
|
}
|
|
erase->addr = einfo32.start;
|
|
erase->len = einfo32.length;
|
|
}
|
|
erase->mtd = mtd;
|
|
erase->callback = mtdchar_erase_callback;
|
|
erase->priv = (unsigned long)&waitq;
|
|
|
|
/*
|
|
FIXME: Allow INTERRUPTIBLE. Which means
|
|
not having the wait_queue head on the stack.
|
|
|
|
If the wq_head is on the stack, and we
|
|
leave because we got interrupted, then the
|
|
wq_head is no longer there when the
|
|
callback routine tries to wake us up.
|
|
*/
|
|
ret = mtd->erase(mtd, erase);
|
|
if (!ret) {
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
add_wait_queue(&waitq, &wait);
|
|
if (erase->state != MTD_ERASE_DONE &&
|
|
erase->state != MTD_ERASE_FAILED)
|
|
schedule();
|
|
remove_wait_queue(&waitq, &wait);
|
|
set_current_state(TASK_RUNNING);
|
|
|
|
ret = (erase->state == MTD_ERASE_FAILED)?-EIO:0;
|
|
}
|
|
kfree(erase);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MEMWRITEOOB:
|
|
{
|
|
struct mtd_oob_buf buf;
|
|
struct mtd_oob_buf __user *buf_user = argp;
|
|
|
|
/* NOTE: writes return length to buf_user->length */
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_writeoob(file, mtd, buf.start, buf.length,
|
|
buf.ptr, &buf_user->length);
|
|
break;
|
|
}
|
|
|
|
case MEMREADOOB:
|
|
{
|
|
struct mtd_oob_buf buf;
|
|
struct mtd_oob_buf __user *buf_user = argp;
|
|
|
|
/* NOTE: writes return length to buf_user->start */
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_readoob(mtd, buf.start, buf.length,
|
|
buf.ptr, &buf_user->start);
|
|
break;
|
|
}
|
|
|
|
case MEMWRITEOOB64:
|
|
{
|
|
struct mtd_oob_buf64 buf;
|
|
struct mtd_oob_buf64 __user *buf_user = argp;
|
|
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_writeoob(file, mtd, buf.start, buf.length,
|
|
(void __user *)(uintptr_t)buf.usr_ptr,
|
|
&buf_user->length);
|
|
break;
|
|
}
|
|
|
|
case MEMREADOOB64:
|
|
{
|
|
struct mtd_oob_buf64 buf;
|
|
struct mtd_oob_buf64 __user *buf_user = argp;
|
|
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_readoob(mtd, buf.start, buf.length,
|
|
(void __user *)(uintptr_t)buf.usr_ptr,
|
|
&buf_user->length);
|
|
break;
|
|
}
|
|
|
|
case MEMLOCK:
|
|
{
|
|
struct erase_info_user einfo;
|
|
|
|
if (copy_from_user(&einfo, argp, sizeof(einfo)))
|
|
return -EFAULT;
|
|
|
|
if (!mtd->lock)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
ret = mtd->lock(mtd, einfo.start, einfo.length);
|
|
break;
|
|
}
|
|
|
|
case MEMUNLOCK:
|
|
{
|
|
struct erase_info_user einfo;
|
|
|
|
if (copy_from_user(&einfo, argp, sizeof(einfo)))
|
|
return -EFAULT;
|
|
|
|
if (!mtd->unlock)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
ret = mtd->unlock(mtd, einfo.start, einfo.length);
|
|
break;
|
|
}
|
|
|
|
case MEMISLOCKED:
|
|
{
|
|
struct erase_info_user einfo;
|
|
|
|
if (copy_from_user(&einfo, argp, sizeof(einfo)))
|
|
return -EFAULT;
|
|
|
|
if (!mtd->is_locked)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
ret = mtd->is_locked(mtd, einfo.start, einfo.length);
|
|
break;
|
|
}
|
|
|
|
/* Legacy interface */
|
|
case MEMGETOOBSEL:
|
|
{
|
|
struct nand_oobinfo oi;
|
|
|
|
if (!mtd->ecclayout)
|
|
return -EOPNOTSUPP;
|
|
if (mtd->ecclayout->eccbytes > ARRAY_SIZE(oi.eccpos))
|
|
return -EINVAL;
|
|
|
|
oi.useecc = MTD_NANDECC_AUTOPLACE;
|
|
memcpy(&oi.eccpos, mtd->ecclayout->eccpos, sizeof(oi.eccpos));
|
|
memcpy(&oi.oobfree, mtd->ecclayout->oobfree,
|
|
sizeof(oi.oobfree));
|
|
oi.eccbytes = mtd->ecclayout->eccbytes;
|
|
|
|
if (copy_to_user(argp, &oi, sizeof(struct nand_oobinfo)))
|
|
return -EFAULT;
|
|
break;
|
|
}
|
|
|
|
case MEMGETBADBLOCK:
|
|
{
|
|
loff_t offs;
|
|
|
|
if (copy_from_user(&offs, argp, sizeof(loff_t)))
|
|
return -EFAULT;
|
|
if (!mtd->block_isbad)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
return mtd->block_isbad(mtd, offs);
|
|
break;
|
|
}
|
|
|
|
case MEMSETBADBLOCK:
|
|
{
|
|
loff_t offs;
|
|
|
|
if (copy_from_user(&offs, argp, sizeof(loff_t)))
|
|
return -EFAULT;
|
|
if (!mtd->block_markbad)
|
|
ret = -EOPNOTSUPP;
|
|
else
|
|
return mtd->block_markbad(mtd, offs);
|
|
break;
|
|
}
|
|
|
|
#ifdef CONFIG_HAVE_MTD_OTP
|
|
case OTPSELECT:
|
|
{
|
|
int mode;
|
|
if (copy_from_user(&mode, argp, sizeof(int)))
|
|
return -EFAULT;
|
|
|
|
mfi->mode = MTD_MODE_NORMAL;
|
|
|
|
ret = otp_select_filemode(mfi, mode);
|
|
|
|
file->f_pos = 0;
|
|
break;
|
|
}
|
|
|
|
case OTPGETREGIONCOUNT:
|
|
case OTPGETREGIONINFO:
|
|
{
|
|
struct otp_info *buf = kmalloc(4096, GFP_KERNEL);
|
|
if (!buf)
|
|
return -ENOMEM;
|
|
ret = -EOPNOTSUPP;
|
|
switch (mfi->mode) {
|
|
case MTD_MODE_OTP_FACTORY:
|
|
if (mtd->get_fact_prot_info)
|
|
ret = mtd->get_fact_prot_info(mtd, buf, 4096);
|
|
break;
|
|
case MTD_MODE_OTP_USER:
|
|
if (mtd->get_user_prot_info)
|
|
ret = mtd->get_user_prot_info(mtd, buf, 4096);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (ret >= 0) {
|
|
if (cmd == OTPGETREGIONCOUNT) {
|
|
int nbr = ret / sizeof(struct otp_info);
|
|
ret = copy_to_user(argp, &nbr, sizeof(int));
|
|
} else
|
|
ret = copy_to_user(argp, buf, ret);
|
|
if (ret)
|
|
ret = -EFAULT;
|
|
}
|
|
kfree(buf);
|
|
break;
|
|
}
|
|
|
|
case OTPLOCK:
|
|
{
|
|
struct otp_info oinfo;
|
|
|
|
if (mfi->mode != MTD_MODE_OTP_USER)
|
|
return -EINVAL;
|
|
if (copy_from_user(&oinfo, argp, sizeof(oinfo)))
|
|
return -EFAULT;
|
|
if (!mtd->lock_user_prot_reg)
|
|
return -EOPNOTSUPP;
|
|
ret = mtd->lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
/* This ioctl is being deprecated - it truncates the ecc layout */
|
|
case ECCGETLAYOUT:
|
|
{
|
|
struct nand_ecclayout_user *usrlay;
|
|
|
|
if (!mtd->ecclayout)
|
|
return -EOPNOTSUPP;
|
|
|
|
usrlay = kmalloc(sizeof(*usrlay), GFP_KERNEL);
|
|
if (!usrlay)
|
|
return -ENOMEM;
|
|
|
|
shrink_ecclayout(mtd->ecclayout, usrlay);
|
|
|
|
if (copy_to_user(argp, usrlay, sizeof(*usrlay)))
|
|
ret = -EFAULT;
|
|
kfree(usrlay);
|
|
break;
|
|
}
|
|
|
|
case ECCGETSTATS:
|
|
{
|
|
if (copy_to_user(argp, &mtd->ecc_stats,
|
|
sizeof(struct mtd_ecc_stats)))
|
|
return -EFAULT;
|
|
break;
|
|
}
|
|
|
|
case MTDFILEMODE:
|
|
{
|
|
mfi->mode = 0;
|
|
|
|
switch(arg) {
|
|
case MTD_MODE_OTP_FACTORY:
|
|
case MTD_MODE_OTP_USER:
|
|
ret = otp_select_filemode(mfi, arg);
|
|
break;
|
|
|
|
case MTD_MODE_RAW:
|
|
if (!mtd->read_oob || !mtd->write_oob)
|
|
return -EOPNOTSUPP;
|
|
mfi->mode = arg;
|
|
|
|
case MTD_MODE_NORMAL:
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
file->f_pos = 0;
|
|
break;
|
|
}
|
|
|
|
#ifdef CONFIG_MTD_PARTITIONS
|
|
case BLKPG:
|
|
{
|
|
ret = mtd_blkpg_ioctl(mtd,
|
|
(struct blkpg_ioctl_arg __user *)arg);
|
|
break;
|
|
}
|
|
|
|
case BLKRRPART:
|
|
{
|
|
/* No reread partition feature. Just return ok */
|
|
ret = 0;
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
default:
|
|
ret = -ENOTTY;
|
|
}
|
|
|
|
return ret;
|
|
} /* memory_ioctl */
|
|
|
|
static long mtd_unlocked_ioctl(struct file *file, u_int cmd, u_long arg)
|
|
{
|
|
int ret;
|
|
|
|
mutex_lock(&mtd_mutex);
|
|
ret = mtd_ioctl(file, cmd, arg);
|
|
mutex_unlock(&mtd_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
struct mtd_oob_buf32 {
|
|
u_int32_t start;
|
|
u_int32_t length;
|
|
compat_caddr_t ptr; /* unsigned char* */
|
|
};
|
|
|
|
#define MEMWRITEOOB32 _IOWR('M', 3, struct mtd_oob_buf32)
|
|
#define MEMREADOOB32 _IOWR('M', 4, struct mtd_oob_buf32)
|
|
|
|
static long mtd_compat_ioctl(struct file *file, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
void __user *argp = compat_ptr(arg);
|
|
int ret = 0;
|
|
|
|
mutex_lock(&mtd_mutex);
|
|
|
|
switch (cmd) {
|
|
case MEMWRITEOOB32:
|
|
{
|
|
struct mtd_oob_buf32 buf;
|
|
struct mtd_oob_buf32 __user *buf_user = argp;
|
|
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_writeoob(file, mtd, buf.start,
|
|
buf.length, compat_ptr(buf.ptr),
|
|
&buf_user->length);
|
|
break;
|
|
}
|
|
|
|
case MEMREADOOB32:
|
|
{
|
|
struct mtd_oob_buf32 buf;
|
|
struct mtd_oob_buf32 __user *buf_user = argp;
|
|
|
|
/* NOTE: writes return length to buf->start */
|
|
if (copy_from_user(&buf, argp, sizeof(buf)))
|
|
ret = -EFAULT;
|
|
else
|
|
ret = mtd_do_readoob(mtd, buf.start,
|
|
buf.length, compat_ptr(buf.ptr),
|
|
&buf_user->start);
|
|
break;
|
|
}
|
|
default:
|
|
ret = mtd_ioctl(file, cmd, (unsigned long)argp);
|
|
}
|
|
|
|
mutex_unlock(&mtd_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
/*
|
|
* try to determine where a shared mapping can be made
|
|
* - only supported for NOMMU at the moment (MMU can't doesn't copy private
|
|
* mappings)
|
|
*/
|
|
#ifndef CONFIG_MMU
|
|
static unsigned long mtd_get_unmapped_area(struct file *file,
|
|
unsigned long addr,
|
|
unsigned long len,
|
|
unsigned long pgoff,
|
|
unsigned long flags)
|
|
{
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
|
|
if (mtd->get_unmapped_area) {
|
|
unsigned long offset;
|
|
|
|
if (addr != 0)
|
|
return (unsigned long) -EINVAL;
|
|
|
|
if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
|
|
return (unsigned long) -EINVAL;
|
|
|
|
offset = pgoff << PAGE_SHIFT;
|
|
if (offset > mtd->size - len)
|
|
return (unsigned long) -EINVAL;
|
|
|
|
return mtd->get_unmapped_area(mtd, len, offset, flags);
|
|
}
|
|
|
|
/* can't map directly */
|
|
return (unsigned long) -ENOSYS;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* set up a mapping for shared memory segments
|
|
*/
|
|
static int mtd_mmap(struct file *file, struct vm_area_struct *vma)
|
|
{
|
|
#ifdef CONFIG_MMU
|
|
struct mtd_file_info *mfi = file->private_data;
|
|
struct mtd_info *mtd = mfi->mtd;
|
|
struct map_info *map = mtd->priv;
|
|
unsigned long start;
|
|
unsigned long off;
|
|
u32 len;
|
|
|
|
if (mtd->type == MTD_RAM || mtd->type == MTD_ROM) {
|
|
off = vma->vm_pgoff << PAGE_SHIFT;
|
|
start = map->phys;
|
|
len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size);
|
|
start &= PAGE_MASK;
|
|
if ((vma->vm_end - vma->vm_start + off) > len)
|
|
return -EINVAL;
|
|
|
|
off += start;
|
|
vma->vm_pgoff = off >> PAGE_SHIFT;
|
|
vma->vm_flags |= VM_IO | VM_RESERVED;
|
|
|
|
#ifdef pgprot_noncached
|
|
if (file->f_flags & O_DSYNC || off >= __pa(high_memory))
|
|
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
#endif
|
|
if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
|
|
vma->vm_end - vma->vm_start,
|
|
vma->vm_page_prot))
|
|
return -EAGAIN;
|
|
|
|
return 0;
|
|
}
|
|
return -ENOSYS;
|
|
#else
|
|
return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS;
|
|
#endif
|
|
}
|
|
|
|
static const struct file_operations mtd_fops = {
|
|
.owner = THIS_MODULE,
|
|
.llseek = mtd_lseek,
|
|
.read = mtd_read,
|
|
.write = mtd_write,
|
|
.unlocked_ioctl = mtd_unlocked_ioctl,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_ioctl = mtd_compat_ioctl,
|
|
#endif
|
|
.open = mtd_open,
|
|
.release = mtd_close,
|
|
.mmap = mtd_mmap,
|
|
#ifndef CONFIG_MMU
|
|
.get_unmapped_area = mtd_get_unmapped_area,
|
|
#endif
|
|
};
|
|
|
|
static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type,
|
|
int flags, const char *dev_name, void *data)
|
|
{
|
|
return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC);
|
|
}
|
|
|
|
static struct file_system_type mtd_inodefs_type = {
|
|
.name = "mtd_inodefs",
|
|
.mount = mtd_inodefs_mount,
|
|
.kill_sb = kill_anon_super,
|
|
};
|
|
|
|
static void mtdchar_notify_add(struct mtd_info *mtd)
|
|
{
|
|
}
|
|
|
|
static void mtdchar_notify_remove(struct mtd_info *mtd)
|
|
{
|
|
struct inode *mtd_ino = ilookup(mtd_inode_mnt->mnt_sb, mtd->index);
|
|
|
|
if (mtd_ino) {
|
|
/* Destroy the inode if it exists */
|
|
mtd_ino->i_nlink = 0;
|
|
iput(mtd_ino);
|
|
}
|
|
}
|
|
|
|
static struct mtd_notifier mtdchar_notifier = {
|
|
.add = mtdchar_notify_add,
|
|
.remove = mtdchar_notify_remove,
|
|
};
|
|
|
|
static int __init init_mtdchar(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = __register_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS,
|
|
"mtd", &mtd_fops);
|
|
if (ret < 0) {
|
|
pr_notice("Can't allocate major number %d for "
|
|
"Memory Technology Devices.\n", MTD_CHAR_MAJOR);
|
|
return ret;
|
|
}
|
|
|
|
ret = register_filesystem(&mtd_inodefs_type);
|
|
if (ret) {
|
|
pr_notice("Can't register mtd_inodefs filesystem: %d\n", ret);
|
|
goto err_unregister_chdev;
|
|
}
|
|
|
|
mtd_inode_mnt = kern_mount(&mtd_inodefs_type);
|
|
if (IS_ERR(mtd_inode_mnt)) {
|
|
ret = PTR_ERR(mtd_inode_mnt);
|
|
pr_notice("Error mounting mtd_inodefs filesystem: %d\n", ret);
|
|
goto err_unregister_filesystem;
|
|
}
|
|
register_mtd_user(&mtdchar_notifier);
|
|
|
|
return ret;
|
|
|
|
err_unregister_filesystem:
|
|
unregister_filesystem(&mtd_inodefs_type);
|
|
err_unregister_chdev:
|
|
__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
|
|
return ret;
|
|
}
|
|
|
|
static void __exit cleanup_mtdchar(void)
|
|
{
|
|
unregister_mtd_user(&mtdchar_notifier);
|
|
mntput_long(mtd_inode_mnt);
|
|
unregister_filesystem(&mtd_inodefs_type);
|
|
__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
|
|
}
|
|
|
|
module_init(init_mtdchar);
|
|
module_exit(cleanup_mtdchar);
|
|
|
|
MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
|
|
MODULE_DESCRIPTION("Direct character-device access to MTD devices");
|
|
MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);
|