linux_dsm_epyc7002/fs/nfs/fscache.c
David Howells f1fe29b4a0 NFS: Use i_writecount to control whether to get an fscache cookie in nfs_open()
Use i_writecount to control whether to get an fscache cookie in nfs_open() as
NFS does not do write caching yet.  I *think* this is the cause of a problem
encountered by Mark Moseley whereby __fscache_uncache_page() gets a NULL
pointer dereference because cookie->def is NULL:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
IP: [<ffffffff812a1903>] __fscache_uncache_page+0x23/0x160
PGD 0
Thread overran stack, or stack corrupted
Oops: 0000 [#1] SMP
Modules linked in: ...
CPU: 7 PID: 18993 Comm: php Not tainted 3.11.1 #1
Hardware name: Dell Inc. PowerEdge R420/072XWF, BIOS 1.3.5 08/21/2012
task: ffff8804203460c0 ti: ffff880420346640
RIP: 0010:[<ffffffff812a1903>] __fscache_uncache_page+0x23/0x160
RSP: 0018:ffff8801053af878 EFLAGS: 00210286
RAX: 0000000000000000 RBX: ffff8800be2f8780 RCX: ffff88022ffae5e8
RDX: 0000000000004c66 RSI: ffffea00055ff440 RDI: ffff8800be2f8780
RBP: ffff8801053af898 R08: 0000000000000001 R09: 0000000000000003
R10: 0000000000000000 R11: 0000000000000000 R12: ffffea00055ff440
R13: 0000000000001000 R14: ffff8800c50be538 R15: 0000000000000000
FS: 0000000000000000(0000) GS:ffff88042fc60000(0063) knlGS:00000000e439c700
CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 0000000000000010 CR3: 0000000001d8f000 CR4: 00000000000607f0
Stack:
...
Call Trace:
[<ffffffff81365a72>] __nfs_fscache_invalidate_page+0x42/0x70
[<ffffffff813553d5>] nfs_invalidate_page+0x75/0x90
[<ffffffff811b8f5e>] truncate_inode_page+0x8e/0x90
[<ffffffff811b90ad>] truncate_inode_pages_range.part.12+0x14d/0x620
[<ffffffff81d6387d>] ? __mutex_lock_slowpath+0x1fd/0x2e0
[<ffffffff811b95d3>] truncate_inode_pages_range+0x53/0x70
[<ffffffff811b969d>] truncate_inode_pages+0x2d/0x40
[<ffffffff811b96ff>] truncate_pagecache+0x4f/0x70
[<ffffffff81356840>] nfs_setattr_update_inode+0xa0/0x120
[<ffffffff81368de4>] nfs3_proc_setattr+0xc4/0xe0
[<ffffffff81357f78>] nfs_setattr+0xc8/0x150
[<ffffffff8122d95b>] notify_change+0x1cb/0x390
[<ffffffff8120a55b>] do_truncate+0x7b/0xc0
[<ffffffff8121f96c>] do_last+0xa4c/0xfd0
[<ffffffff8121ffbc>] path_openat+0xcc/0x670
[<ffffffff81220a0e>] do_filp_open+0x4e/0xb0
[<ffffffff8120ba1f>] do_sys_open+0x13f/0x2b0
[<ffffffff8126aaf6>] compat_SyS_open+0x36/0x50
[<ffffffff81d7204c>] sysenter_dispatch+0x7/0x24

The code at the instruction pointer was disassembled:

> (gdb) disas __fscache_uncache_page
> Dump of assembler code for function __fscache_uncache_page:
> ...
> 0xffffffff812a18ff <+31>: mov 0x48(%rbx),%rax
> 0xffffffff812a1903 <+35>: cmpb $0x0,0x10(%rax)
> 0xffffffff812a1907 <+39>: je 0xffffffff812a19cd <__fscache_uncache_page+237>

These instructions make up:

	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);

That cmpb is the faulting instruction (%rax is 0).  So cookie->def is NULL -
which presumably means that the cookie has already been at least partway
through __fscache_relinquish_cookie().

What I think may be happening is something like a three-way race on the same
file:

	PROCESS 1	PROCESS 2	PROCESS 3
	===============	===============	===============
	open(O_TRUNC|O_WRONLY)
			open(O_RDONLY)
					open(O_WRONLY)
	-->nfs_open()
	-->nfs_fscache_set_inode_cookie()
	nfs_fscache_inode_lock()
	nfs_fscache_disable_inode_cookie()
	__fscache_relinquish_cookie()
	nfs_inode->fscache = NULL
	<--nfs_fscache_set_inode_cookie()

			-->nfs_open()
			-->nfs_fscache_set_inode_cookie()
			nfs_fscache_inode_lock()
			nfs_fscache_enable_inode_cookie()
			__fscache_acquire_cookie()
			nfs_inode->fscache = cookie
			<--nfs_fscache_set_inode_cookie()
	<--nfs_open()
	-->nfs_setattr()
	...
	...
	-->nfs_invalidate_page()
	-->__nfs_fscache_invalidate_page()
	cookie = nfsi->fscache
					-->nfs_open()
					-->nfs_fscache_set_inode_cookie()
					nfs_fscache_inode_lock()
					nfs_fscache_disable_inode_cookie()
					-->__fscache_relinquish_cookie()
	-->__fscache_uncache_page(cookie)
	<crash>
					<--__fscache_relinquish_cookie()
					nfs_inode->fscache = NULL
					<--nfs_fscache_set_inode_cookie()

What is needed is something to prevent process #2 from reacquiring the cookie
- and I think checking i_writecount should do the trick.

It's also possible to have a two-way race on this if the file is opened
O_TRUNC|O_RDONLY instead.

Reported-by: Mark Moseley <moseleymark@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
2013-09-27 18:40:25 +01:00

440 lines
12 KiB
C

/* NFS filesystem cache interface
*
* Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include <linux/in6.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#define NFSDBG_FACILITY NFSDBG_FSCACHE
static struct rb_root nfs_fscache_keys = RB_ROOT;
static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
/*
* Get the per-client index cookie for an NFS client if the appropriate mount
* flag was set
* - We always try and get an index cookie for the client, but get filehandle
* cookies on a per-superblock basis, depending on the mount flags
*/
void nfs_fscache_get_client_cookie(struct nfs_client *clp)
{
/* create a cache index for looking up filehandles */
clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
&nfs_fscache_server_index_def,
clp, true);
dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
}
/*
* Dispose of a per-client cookie
*/
void nfs_fscache_release_client_cookie(struct nfs_client *clp)
{
dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
fscache_relinquish_cookie(clp->fscache, 0);
clp->fscache = NULL;
}
/*
* Get the cache cookie for an NFS superblock. We have to handle
* uniquification here because the cache doesn't do it for us.
*
* The default uniquifier is just an empty string, but it may be overridden
* either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
* superblock across an automount point of some nature.
*/
void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
int diff;
if (!uniq) {
uniq = "";
ulen = 1;
}
key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
if (!key)
return;
key->nfs_client = nfss->nfs_client;
key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
key->key.nfs_server.flags = nfss->flags;
key->key.nfs_server.rsize = nfss->rsize;
key->key.nfs_server.wsize = nfss->wsize;
key->key.nfs_server.acregmin = nfss->acregmin;
key->key.nfs_server.acregmax = nfss->acregmax;
key->key.nfs_server.acdirmin = nfss->acdirmin;
key->key.nfs_server.acdirmax = nfss->acdirmax;
key->key.nfs_server.fsid = nfss->fsid;
key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor;
key->key.uniq_len = ulen;
memcpy(key->key.uniquifier, uniq, ulen);
spin_lock(&nfs_fscache_keys_lock);
p = &nfs_fscache_keys.rb_node;
parent = NULL;
while (*p) {
parent = *p;
xkey = rb_entry(parent, struct nfs_fscache_key, node);
if (key->nfs_client < xkey->nfs_client)
goto go_left;
if (key->nfs_client > xkey->nfs_client)
goto go_right;
diff = memcmp(&key->key, &xkey->key, sizeof(key->key));
if (diff < 0)
goto go_left;
if (diff > 0)
goto go_right;
if (key->key.uniq_len == 0)
goto non_unique;
diff = memcmp(key->key.uniquifier,
xkey->key.uniquifier,
key->key.uniq_len);
if (diff < 0)
goto go_left;
if (diff > 0)
goto go_right;
goto non_unique;
go_left:
p = &(*p)->rb_left;
continue;
go_right:
p = &(*p)->rb_right;
}
rb_link_node(&key->node, parent, p);
rb_insert_color(&key->node, &nfs_fscache_keys);
spin_unlock(&nfs_fscache_keys_lock);
nfss->fscache_key = key;
/* create a cache index for looking up filehandles */
nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
&nfs_fscache_super_index_def,
nfss, true);
dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
return;
non_unique:
spin_unlock(&nfs_fscache_keys_lock);
kfree(key);
nfss->fscache_key = NULL;
nfss->fscache = NULL;
printk(KERN_WARNING "NFS:"
" Cache request denied due to non-unique superblock keys\n");
}
/*
* release a per-superblock cookie
*/
void nfs_fscache_release_super_cookie(struct super_block *sb)
{
struct nfs_server *nfss = NFS_SB(sb);
dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
fscache_relinquish_cookie(nfss->fscache, 0);
nfss->fscache = NULL;
if (nfss->fscache_key) {
spin_lock(&nfs_fscache_keys_lock);
rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys);
spin_unlock(&nfs_fscache_keys_lock);
kfree(nfss->fscache_key);
nfss->fscache_key = NULL;
}
}
/*
* Initialise the per-inode cache cookie pointer for an NFS inode.
*/
void nfs_fscache_init_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
nfsi->fscache = NULL;
if (!S_ISREG(inode->i_mode))
return;
nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
&nfs_fscache_inode_object_def,
nfsi, false);
}
/*
* Release a per-inode cookie.
*/
void nfs_fscache_clear_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
fscache_relinquish_cookie(cookie, false);
nfsi->fscache = NULL;
}
static bool nfs_fscache_can_enable(void *data)
{
struct inode *inode = data;
return !inode_is_open_for_write(inode);
}
/*
* Enable or disable caching for a file that is being opened as appropriate.
* The cookie is allocated when the inode is initialised, but is not enabled at
* that time. Enablement is deferred to file-open time to avoid stat() and
* access() thrashing the cache.
*
* For now, with NFS, only regular files that are open read-only will be able
* to use the cache.
*
* We enable the cache for an inode if we open it read-only and it isn't
* currently open for writing. We disable the cache if the inode is open
* write-only.
*
* The caller uses the file struct to pin i_writecount on the inode before
* calling us when a file is opened for writing, so we can make use of that.
*
* Note that this may be invoked multiple times in parallel by parallel
* nfs_open() functions.
*/
void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
if (!fscache_cookie_valid(cookie))
return;
if (inode_is_open_for_write(inode)) {
dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
fscache_disable_cookie(cookie, true);
fscache_uncache_all_inode_pages(cookie, inode);
} else {
dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
if (fscache_cookie_enabled(cookie))
set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
}
}
EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
/*
* Release the caching state associated with a page, if the page isn't busy
* interacting with the cache.
* - Returns true (can release page) or false (page busy).
*/
int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
if (PageFsCache(page)) {
struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
cookie, page, NFS_I(page->mapping->host));
if (!fscache_maybe_release_page(cookie, page, gfp))
return 0;
nfs_add_fscache_stats(page->mapping->host,
NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
}
return 1;
}
/*
* Release the caching state associated with a page if undergoing complete page
* invalidation.
*/
void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
{
struct fscache_cookie *cookie = nfs_i_fscache(inode);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
cookie, page, NFS_I(inode));
fscache_wait_on_page_write(cookie, page);
BUG_ON(!PageLocked(page));
fscache_uncache_page(cookie, page);
nfs_add_fscache_stats(page->mapping->host,
NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
}
/*
* Handle completion of a page being read from the cache.
* - Called in process (keventd) context.
*/
static void nfs_readpage_from_fscache_complete(struct page *page,
void *context,
int error)
{
dfprintk(FSCACHE,
"NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
page, context, error);
/* if the read completes with an error, we just unlock the page and let
* the VM reissue the readpage */
if (!error) {
SetPageUptodate(page);
unlock_page(page);
} else {
error = nfs_readpage_async(context, page->mapping->host, page);
if (error)
unlock_page(page);
}
}
/*
* Retrieve a page from fscache
*/
int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
struct inode *inode, struct page *page)
{
int ret;
dfprintk(FSCACHE,
"NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
nfs_i_fscache(inode), page, page->index, page->flags, inode);
ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
page,
nfs_readpage_from_fscache_complete,
ctx,
GFP_KERNEL);
switch (ret) {
case 0: /* read BIO submitted (page in fscache) */
dfprintk(FSCACHE,
"NFS: readpage_from_fscache: BIO submitted\n");
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
return ret;
case -ENOBUFS: /* inode not in cache */
case -ENODATA: /* page not in cache */
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
dfprintk(FSCACHE,
"NFS: readpage_from_fscache %d\n", ret);
return 1;
default:
dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
}
return ret;
}
/*
* Retrieve a set of pages from fscache
*/
int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
unsigned npages = *nr_pages;
int ret;
dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
nfs_i_fscache(inode), npages, inode);
ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
mapping, pages, nr_pages,
nfs_readpage_from_fscache_complete,
ctx,
mapping_gfp_mask(mapping));
if (*nr_pages < npages)
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK,
npages);
if (*nr_pages > 0)
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL,
*nr_pages);
switch (ret) {
case 0: /* read submitted to the cache for all pages */
BUG_ON(!list_empty(pages));
BUG_ON(*nr_pages != 0);
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: submitted\n");
return ret;
case -ENOBUFS: /* some pages aren't cached and can't be */
case -ENODATA: /* some pages aren't cached */
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
return 1;
default:
dfprintk(FSCACHE,
"NFS: nfs_getpages_from_fscache: ret %d\n", ret);
}
return ret;
}
/*
* Store a newly fetched page in fscache
* - PG_fscache must be set on the page
*/
void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
{
int ret;
dfprintk(FSCACHE,
"NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
nfs_i_fscache(inode), page, page->index, page->flags, sync);
ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
dfprintk(FSCACHE,
"NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
page, page->index, page->flags, ret);
if (ret != 0) {
fscache_uncache_page(nfs_i_fscache(inode), page);
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
} else {
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
}
}