linux_dsm_epyc7002/fs/fuse/readdir.c
Miklos Szeredi 69e3455115 fuse: allow caching readdir
This patch just adds the cache filling functions, which are invoked if
FOPEN_CACHE_DIR flag is set in the OPENDIR reply.

Cache reading and cache invalidation are added by subsequent patches.

The directory cache uses the page cache.  Directory entries are packed into
a page in the same format as in the READDIR reply.  A page only contains
whole entries, the space at the end of the page is cleared.  The page is
locked while being modified.

Multiple parallel readdirs on the same directory can fill the cache; the
only constraint is that continuity must be maintained (d_off of last entry
points to position of current entry).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2018-10-01 10:07:04 +02:00

354 lines
8.2 KiB
C

/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
*/
#include "fuse_i.h"
#include <linux/posix_acl.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_add_dirent_to_cache(struct file *file,
struct fuse_dirent *dirent, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
size_t reclen = FUSE_DIRENT_SIZE(dirent);
pgoff_t index;
struct page *page;
loff_t size;
unsigned int offset;
void *addr;
spin_lock(&fi->rdc.lock);
/*
* Is cache already completed? Or this entry does not go at the end of
* cache?
*/
if (fi->rdc.cached || pos != fi->rdc.pos) {
spin_unlock(&fi->rdc.lock);
return;
}
size = fi->rdc.size;
offset = size & ~PAGE_MASK;
index = size >> PAGE_SHIFT;
/* Dirent doesn't fit in current page? Jump to next page. */
if (offset + reclen > PAGE_SIZE) {
index++;
offset = 0;
}
spin_unlock(&fi->rdc.lock);
if (offset) {
page = find_lock_page(file->f_mapping, index);
} else {
page = find_or_create_page(file->f_mapping, index,
mapping_gfp_mask(file->f_mapping));
}
if (!page)
return;
spin_lock(&fi->rdc.lock);
/* Raced with another readdir */
if (fi->rdc.size != size || WARN_ON(fi->rdc.pos != pos))
goto unlock;
addr = kmap_atomic(page);
if (!offset)
clear_page(addr);
memcpy(addr + offset, dirent, reclen);
kunmap_atomic(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
fi->rdc.pos = dirent->off;
unlock:
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
}
static void fuse_readdir_cache_end(struct file *file, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
loff_t end;
spin_lock(&fi->rdc.lock);
/* does cache end position match current position? */
if (fi->rdc.pos != pos) {
spin_unlock(&fi->rdc.lock);
return;
}
fi->rdc.cached = true;
end = ALIGN(fi->rdc.size, PAGE_SIZE);
spin_unlock(&fi->rdc.lock);
/* truncate unused tail of cache */
truncate_inode_pages(file->f_mapping, end);
}
static bool fuse_emit(struct file *file, struct dir_context *ctx,
struct fuse_dirent *dirent)
{
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!fuse_emit(file, ctx, dirent))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !fuse_emit(file, ctx, dirent);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
int fuse_readdir(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
bool locked;
if (is_bad_inode(inode))
return -EIO;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (!nbytes) {
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx, attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
}
__free_page(page);
fuse_invalidate_atime(inode);
return err;
}