linux_dsm_epyc7002/fs/reiserfs/dir.c
Jeff Mahoney 278f6679f4 reiserfs: locking, handle nested locks properly
The reiserfs write lock replaced the BKL and uses similar semantics.

Frederic's locking code makes a distinction between when the lock is nested
and when it's being acquired/released, but I don't think that's the right
distinction to make.

The right distinction is between the lock being released at end-of-use and
the lock being released for a schedule. The unlock should return the depth
and the lock should restore it, rather than the other way around as it is now.

This patch implements that and adds a number of places where the lock
should be dropped.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
2013-08-08 17:34:46 -04:00

317 lines
9.0 KiB
C

/*
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include "reiserfs.h"
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
extern const struct reiserfs_key MIN_KEY;
static int reiserfs_readdir(struct file *, struct dir_context *);
static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
int datasync);
const struct file_operations reiserfs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate = reiserfs_readdir,
.fsync = reiserfs_dir_fsync,
.unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
#endif
};
static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
struct inode *inode = filp->f_mapping->host;
int err;
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
mutex_lock(&inode->i_mutex);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
mutex_unlock(&inode->i_mutex);
if (err < 0)
return err;
return 0;
}
#define store_ih(where,what) copy_item_head (where, what)
static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh)
{
struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root;
return (privroot->d_inode &&
deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
}
int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
{
struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
INITIALIZE_PATH(path_to_entry);
struct buffer_head *bh;
int item_num, entry_num;
const struct reiserfs_key *rkey;
struct item_head *ih, tmp_ih;
int search_res;
char *local_buf;
loff_t next_pos;
char small_buf[32]; /* avoid kmalloc if we can */
struct reiserfs_dir_entry de;
int ret = 0;
int depth;
reiserfs_write_lock(inode->i_sb);
reiserfs_check_lock_depth(inode->i_sb, "readdir");
/* form key for search the next directory entry using f_pos field of
file structure */
make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
next_pos = cpu_key_k_offset(&pos_key);
path_to_entry.reada = PATH_READA;
while (1) {
research:
/* search the directory item, containing entry with specified key */
search_res =
search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
&de);
if (search_res == IO_ERROR) {
// FIXME: we could just skip part of directory which could
// not be read
ret = -EIO;
goto out;
}
entry_num = de.de_entry_num;
bh = de.de_bh;
item_num = de.de_item_num;
ih = de.de_ih;
store_ih(&tmp_ih, ih);
/* we must have found item, that is item of this directory, */
RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
"vs-9000: found item %h does not match to dir we readdir %K",
ih, &pos_key);
RFALSE(item_num > B_NR_ITEMS(bh) - 1,
"vs-9005 item_num == %d, item amount == %d",
item_num, B_NR_ITEMS(bh));
/* and entry must be not more than number of entries in the item */
RFALSE(I_ENTRY_COUNT(ih) < entry_num,
"vs-9010: entry number is too big %d (%d)",
entry_num, I_ENTRY_COUNT(ih));
if (search_res == POSITION_FOUND
|| entry_num < I_ENTRY_COUNT(ih)) {
/* go through all entries in the directory item beginning from the entry, that has been found */
struct reiserfs_de_head *deh =
B_I_DEH(bh, ih) + entry_num;
for (; entry_num < I_ENTRY_COUNT(ih);
entry_num++, deh++) {
int d_reclen;
char *d_name;
ino_t d_ino;
if (!de_visible(deh))
/* it is hidden entry */
continue;
d_reclen = entry_length(bh, ih, entry_num);
d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
if (d_reclen <= 0 ||
d_name + d_reclen > bh->b_data + bh->b_size) {
/* There is corrupted data in entry,
* We'd better stop here */
pathrelse(&path_to_entry);
ret = -EIO;
goto out;
}
if (!d_name[d_reclen - 1])
d_reclen = strlen(d_name);
if (d_reclen >
REISERFS_MAX_NAME(inode->i_sb->
s_blocksize)) {
/* too big to send back to VFS */
continue;
}
/* Ignore the .reiserfs_priv entry */
if (is_privroot_deh(inode, deh))
continue;
ctx->pos = deh_offset(deh);
d_ino = deh_objectid(deh);
if (d_reclen <= 32) {
local_buf = small_buf;
} else {
local_buf = kmalloc(d_reclen,
GFP_NOFS);
if (!local_buf) {
pathrelse(&path_to_entry);
ret = -ENOMEM;
goto out;
}
if (item_moved(&tmp_ih, &path_to_entry)) {
kfree(local_buf);
goto research;
}
}
// Note, that we copy name to user space via temporary
// buffer (local_buf) because filldir will block if
// user space buffer is swapped out. At that time
// entry can move to somewhere else
memcpy(local_buf, d_name, d_reclen);
/*
* Since filldir might sleep, we can release
* the write lock here for other waiters
*/
depth = reiserfs_write_unlock_nested(inode->i_sb);
if (!dir_emit
(ctx, local_buf, d_reclen, d_ino,
DT_UNKNOWN)) {
reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
goto end;
}
reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
// next entry should be looked for with such offset
next_pos = deh_offset(deh) + 1;
if (item_moved(&tmp_ih, &path_to_entry)) {
set_cpu_key_k_offset(&pos_key,
next_pos);
goto research;
}
} /* for */
}
if (item_num != B_NR_ITEMS(bh) - 1)
// end of directory has been reached
goto end;
/* item we went through is last item of node. Using right
delimiting key check is it directory end */
rkey = get_rkey(&path_to_entry, inode->i_sb);
if (!comp_le_keys(rkey, &MIN_KEY)) {
/* set pos_key to key, that is the smallest and greater
that key of the last entry in the item */
set_cpu_key_k_offset(&pos_key, next_pos);
continue;
}
if (COMP_SHORT_KEYS(rkey, &pos_key)) {
// end of directory has been reached
goto end;
}
/* directory continues in the right neighboring block */
set_cpu_key_k_offset(&pos_key,
le_key_k_offset(KEY_FORMAT_3_5, rkey));
} /* while */
end:
ctx->pos = next_pos;
pathrelse(&path_to_entry);
reiserfs_check_path(&path_to_entry);
out:
reiserfs_write_unlock(inode->i_sb);
return ret;
}
static int reiserfs_readdir(struct file *file, struct dir_context *ctx)
{
return reiserfs_readdir_inode(file_inode(file), ctx);
}
/* compose directory item containing "." and ".." entries (entries are
not aligned to 4 byte boundary) */
/* the last four params are LE */
void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
__le32 par_dirid, __le32 par_objid)
{
struct reiserfs_de_head *deh;
memset(body, 0, EMPTY_DIR_SIZE_V1);
deh = (struct reiserfs_de_head *)body;
/* direntry header of "." */
put_deh_offset(&(deh[0]), DOT_OFFSET);
/* these two are from make_le_item_head, and are are LE */
deh[0].deh_dir_id = dirid;
deh[0].deh_objectid = objid;
deh[0].deh_state = 0; /* Endian safe if 0 */
put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen("."));
mark_de_visible(&(deh[0]));
/* direntry header of ".." */
put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
/* key of ".." for the root directory */
/* these two are from the inode, and are are LE */
deh[1].deh_dir_id = par_dirid;
deh[1].deh_objectid = par_objid;
deh[1].deh_state = 0; /* Endian safe if 0 */
put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen(".."));
mark_de_visible(&(deh[1]));
/* copy ".." and "." */
memcpy(body + deh_location(&(deh[0])), ".", 1);
memcpy(body + deh_location(&(deh[1])), "..", 2);
}
/* compose directory item containing "." and ".." entries */
void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
__le32 par_dirid, __le32 par_objid)
{
struct reiserfs_de_head *deh;
memset(body, 0, EMPTY_DIR_SIZE);
deh = (struct reiserfs_de_head *)body;
/* direntry header of "." */
put_deh_offset(&(deh[0]), DOT_OFFSET);
/* these two are from make_le_item_head, and are are LE */
deh[0].deh_dir_id = dirid;
deh[0].deh_objectid = objid;
deh[0].deh_state = 0; /* Endian safe if 0 */
put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
mark_de_visible(&(deh[0]));
/* direntry header of ".." */
put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
/* key of ".." for the root directory */
/* these two are from the inode, and are are LE */
deh[1].deh_dir_id = par_dirid;
deh[1].deh_objectid = par_objid;
deh[1].deh_state = 0; /* Endian safe if 0 */
put_deh_location(&(deh[1]),
deh_location(&(deh[0])) - ROUND_UP(strlen("..")));
mark_de_visible(&(deh[1]));
/* copy ".." and "." */
memcpy(body + deh_location(&(deh[0])), ".", 1);
memcpy(body + deh_location(&(deh[1])), "..", 2);
}