linux_dsm_epyc7002/fs/pstore/inode.c

492 lines
11 KiB
C
Raw Normal View History

/*
* Persistent Storage - ramfs parts.
*
* Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/string.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/ramfs.h>
#include <linux/parser.h>
#include <linux/sched.h>
#include <linux/magic.h>
#include <linux/pstore.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/syslog.h>
#include "internal.h"
#define PSTORE_NAMELEN 64
static DEFINE_SPINLOCK(allpstore_lock);
static LIST_HEAD(allpstore);
struct pstore_private {
struct list_head list;
struct pstore_info *psi;
enum pstore_type_id type;
u64 id;
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 07:07:44 +07:00
int count;
ssize_t size;
char data[];
};
struct pstore_ftrace_seq_data {
const void *ptr;
size_t off;
size_t size;
};
#define REC_SIZE sizeof(struct pstore_ftrace_record)
static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->off = ps->size % REC_SIZE;
data->off += *pos * REC_SIZE;
if (data->off + REC_SIZE > ps->size) {
kfree(data);
return NULL;
}
return data;
}
static void pstore_ftrace_seq_stop(struct seq_file *s, void *v)
{
kfree(v);
}
static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
data->off += REC_SIZE;
if (data->off + REC_SIZE > ps->size)
return NULL;
(*pos)++;
return data;
}
static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
{
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec = (void *)(ps->data + data->off);
seq_printf(s, "%d %08lx %08lx %pf <- %pF\n",
pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip,
(void *)rec->ip, (void *)rec->parent_ip);
return 0;
}
static const struct seq_operations pstore_ftrace_seq_ops = {
.start = pstore_ftrace_seq_start,
.next = pstore_ftrace_seq_next,
.stop = pstore_ftrace_seq_stop,
.show = pstore_ftrace_seq_show,
};
static int pstore_check_syslog_permissions(struct pstore_private *ps)
{
switch (ps->type) {
case PSTORE_TYPE_DMESG:
case PSTORE_TYPE_CONSOLE:
return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
SYSLOG_FROM_READER);
default:
return 0;
}
}
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
struct seq_file *sf = file->private_data;
struct pstore_private *ps = sf->private;
if (ps->type == PSTORE_TYPE_FTRACE)
return seq_read(file, userbuf, count, ppos);
return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size);
}
static int pstore_file_open(struct inode *inode, struct file *file)
{
struct pstore_private *ps = inode->i_private;
struct seq_file *sf;
int err;
const struct seq_operations *sops = NULL;
err = pstore_check_syslog_permissions(ps);
if (err)
return err;
if (ps->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
err = seq_open(file, sops);
if (err < 0)
return err;
sf = file->private_data;
sf->private = ps;
return 0;
}
static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
{
struct seq_file *sf = file->private_data;
if (sf->op)
return seq_lseek(file, off, whence);
return default_llseek(file, off, whence);
}
static const struct file_operations pstore_file_operations = {
.open = pstore_file_open,
.read = pstore_file_read,
.llseek = pstore_file_llseek,
.release = seq_release,
};
/*
* When a file is unlinked from our file system we call the
* platform driver to erase the record from persistent store.
*/
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
int err;
err = pstore_check_syslog_permissions(p);
if (err)
return err;
if (p->psi->erase)
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 07:07:44 +07:00
p->psi->erase(p->type, p->id, p->count,
d_inode(dentry)->i_ctime, p->psi);
else
return -EPERM;
return simple_unlink(dir, dentry);
}
static void pstore_evict_inode(struct inode *inode)
{
struct pstore_private *p = inode->i_private;
unsigned long flags;
clear_inode(inode);
if (p) {
spin_lock_irqsave(&allpstore_lock, flags);
list_del(&p->list);
spin_unlock_irqrestore(&allpstore_lock, flags);
kfree(p);
}
}
static const struct inode_operations pstore_dir_inode_operations = {
.lookup = simple_lookup,
.unlink = pstore_unlink,
};
static struct inode *pstore_get_inode(struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
return inode;
}
enum {
Opt_kmsg_bytes, Opt_err
};
static const match_table_t tokens = {
{Opt_kmsg_bytes, "kmsg_bytes=%u"},
{Opt_err, NULL}
};
static void parse_options(char *options)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!options)
return;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_kmsg_bytes:
if (!match_int(&args[0], &option))
pstore_set_kmsg_bytes(option);
break;
}
}
}
static int pstore_remount(struct super_block *sb, int *flags, char *data)
{
fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig <hch@infradead.org> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Evgeniy Dushistov <dushistov@mail.ru> Cc: Jan Kara <jack@suse.cz> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Anders Larsen <al@alarsen.net> Cc: Phillip Lougher <phillip@squashfs.org.uk> Cc: Kees Cook <keescook@chromium.org> Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> Cc: Petr Vandrovec <petr@vandrovec.name> Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org
2014-03-13 21:14:33 +07:00
sync_filesystem(sb);
parse_options(data);
return 0;
}
static const struct super_operations pstore_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.evict_inode = pstore_evict_inode,
.remount_fs = pstore_remount,
.show_options = generic_show_options,
};
static struct super_block *pstore_sb;
bool pstore_is_mounted(void)
{
return pstore_sb != NULL;
}
/*
* Make a regular file in the root directory of our file system.
* Load it up with "size" bytes of data from "buf".
* Set the mtime & ctime to the date that this record was originally stored.
*/
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 07:07:44 +07:00
int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
char *data, bool compressed, size_t size,
struct timespec time, struct pstore_info *psi)
{
struct dentry *root = pstore_sb->s_root;
struct dentry *dentry;
struct inode *inode;
int rc = 0;
char name[PSTORE_NAMELEN];
struct pstore_private *private, *pos;
unsigned long flags;
spin_lock_irqsave(&allpstore_lock, flags);
list_for_each_entry(pos, &allpstore, list) {
if (pos->type == type &&
pos->id == id &&
pos->psi == psi) {
rc = -EEXIST;
break;
}
}
spin_unlock_irqrestore(&allpstore_lock, flags);
if (rc)
return rc;
rc = -ENOMEM;
inode = pstore_get_inode(pstore_sb);
if (!inode)
goto fail;
inode->i_mode = S_IFREG | 0444;
inode->i_fop = &pstore_file_operations;
private = kmalloc(sizeof *private + size, GFP_KERNEL);
if (!private)
goto fail_alloc;
private->type = type;
private->id = id;
efi_pstore: Add a sequence counter to a variable name [Issue] Currently, a variable name, which identifies each entry, consists of type, id and ctime. But if multiple events happens in a short time, a second/third event may fail to log because efi_pstore can't distinguish each event with current variable name. [Solution] A reasonable way to identify all events precisely is introducing a sequence counter to the variable name. The sequence counter has already supported in a pstore layer with "oopscount". So, this patch adds it to a variable name. Also, it is passed to read/erase callbacks of platform drivers in accordance with the modification of the variable name. <before applying this patch> a variable name of first event: dump-type0-1-12345678 a variable name of second event: dump-type0-1-12345678 type:0 id:1 ctime:12345678 If multiple events happen in a short time, efi_pstore can't distinguish them because variable names are same among them. <after applying this patch> it can be distinguishable by adding a sequence counter as follows. a variable name of first event: dump-type0-1-1-12345678 a variable name of Second event: dump-type0-1-2-12345678 type:0 id:1 sequence counter: 1(first event), 2(second event) ctime:12345678 In case of a write callback executed in pstore_console_write(), "0" is added to an argument of the write callback because it just logs all kernel messages and doesn't need to care about multiple events. Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Mike Waychison <mikew@google.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
2012-11-27 07:07:44 +07:00
private->count = count;
private->psi = psi;
switch (type) {
case PSTORE_TYPE_DMESG:
scnprintf(name, sizeof(name), "dmesg-%s-%lld%s",
psname, id, compressed ? ".enc.z" : "");
break;
case PSTORE_TYPE_CONSOLE:
scnprintf(name, sizeof(name), "console-%s-%lld", psname, id);
break;
case PSTORE_TYPE_FTRACE:
scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id);
break;
case PSTORE_TYPE_MCE:
scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id);
break;
case PSTORE_TYPE_PPC_RTAS:
scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id);
break;
case PSTORE_TYPE_PPC_OF:
scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld",
psname, id);
break;
case PSTORE_TYPE_PPC_COMMON:
scnprintf(name, sizeof(name), "powerpc-common-%s-%lld",
psname, id);
break;
case PSTORE_TYPE_PMSG:
scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
break;
case PSTORE_TYPE_PPC_OPAL:
sprintf(name, "powerpc-opal-%s-%lld", psname, id);
break;
case PSTORE_TYPE_UNKNOWN:
scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
break;
default:
scnprintf(name, sizeof(name), "type%d-%s-%lld",
type, psname, id);
break;
}
inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry)
goto fail_lockedalloc;
memcpy(private->data, data, size);
inode->i_size = private->size = size;
inode->i_private = private;
if (time.tv_sec)
inode->i_mtime = inode->i_ctime = time;
d_add(dentry, inode);
spin_lock_irqsave(&allpstore_lock, flags);
list_add(&private->list, &allpstore);
spin_unlock_irqrestore(&allpstore_lock, flags);
inode_unlock(d_inode(root));
return 0;
fail_lockedalloc:
inode_unlock(d_inode(root));
kfree(private);
fail_alloc:
iput(inode);
fail:
return rc;
}
static int pstore_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
save_mount_options(sb, data);
pstore_sb = sb;
sb->s_maxbytes = MAX_LFS_FILESIZE;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = PSTOREFS_MAGIC;
sb->s_op = &pstore_ops;
sb->s_time_gran = 1;
parse_options(data);
inode = pstore_get_inode(sb);
if (inode) {
inode->i_mode = S_IFDIR | 0755;
inode->i_op = &pstore_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
}
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
pstore_get_records(0);
return 0;
}
static struct dentry *pstore_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_single(fs_type, flags, data, pstore_fill_super);
}
static void pstore_kill_sb(struct super_block *sb)
{
kill_litter_super(sb);
pstore_sb = NULL;
}
static struct file_system_type pstore_fs_type = {
.owner = THIS_MODULE,
.name = "pstore",
.mount = pstore_mount,
.kill_sb = pstore_kill_sb,
};
static int __init init_pstore_fs(void)
{
int err;
/* Create a convenient mount point for people to access pstore */
err = sysfs_create_mount_point(fs_kobj, "pstore");
if (err)
goto out;
err = register_filesystem(&pstore_fs_type);
if (err < 0)
sysfs_remove_mount_point(fs_kobj, "pstore");
out:
return err;
}
module_init(init_pstore_fs)
static void __exit exit_pstore_fs(void)
{
unregister_filesystem(&pstore_fs_type);
sysfs_remove_mount_point(fs_kobj, "pstore");
}
module_exit(exit_pstore_fs)
MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
MODULE_LICENSE("GPL");