linux_dsm_epyc7002/fs/overlayfs/namei.c
Amir Goldstein fa0096e3ba ovl: do not cleanup unsupported index entries
With index=on, ovl_indexdir_cleanup() tries to cleanup invalid index
entries (e.g. bad index name). This behavior could result in cleaning of
entries created by newer kernels and is therefore undesirable.
Instead, abort mount if such entries are encountered. We still cleanup
'stale' entries and 'orphan' entries, both those cases can be a result
of offline changes to lower and upper dirs.

When encoutering an index entry of type directory or whiteout, kernel
was supposed to fallback to read-only mount, but the fill_super()
operation returns EROFS in this case instead of returning success with
read-only mount flag, so mount fails when encoutering directory or
whiteout index entries. Bless this behavior by returning -EINVAL on
directory and whiteout index entries as we do for all unsupported index
entries.

Fixes: 61b674710c ("ovl: do not cleanup directory and whiteout index..")
Cc: <stable@vger.kernel.org> # v4.13
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
2017-10-24 16:06:17 +02:00

799 lines
18 KiB
C

/*
* Copyright (C) 2011 Novell Inc.
* Copyright (C) 2016 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
#include "ovl_entry.h"
struct ovl_lookup_data {
struct qstr name;
bool is_dir;
bool opaque;
bool stop;
bool last;
char *redirect;
};
static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
char *s, *next, *buf = NULL;
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
goto fail;
}
buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
if (res == 0)
goto invalid;
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
if (res < 0)
goto fail;
if (res == 0)
goto invalid;
if (buf[0] == '/') {
for (s = buf; *s++ == '/'; s = next) {
next = strchrnul(s, '/');
if (s == next)
goto invalid;
}
} else {
if (strchr(buf, '/') != NULL)
goto invalid;
memmove(buf + prelen, buf, res);
memcpy(buf, d->name.name, prelen);
}
strcat(buf, post);
kfree(d->redirect);
d->redirect = buf;
d->name.name = d->redirect;
d->name.len = strlen(d->redirect);
return 0;
err_free:
kfree(buf);
return 0;
fail:
pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
goto err_free;
invalid:
pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
goto err_free;
}
static int ovl_acceptable(void *ctx, struct dentry *dentry)
{
return 1;
}
static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
{
int res;
struct ovl_fh *fh = NULL;
res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return NULL;
goto fail;
}
/* Zero size value means "copied up but origin unknown" */
if (res == 0)
return NULL;
fh = kzalloc(res, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
if (res < 0)
goto fail;
if (res < sizeof(struct ovl_fh) || res < fh->len)
goto invalid;
if (fh->magic != OVL_FH_MAGIC)
goto invalid;
/* Treat larger version and unknown flags as "origin unknown" */
if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
goto out;
/* Treat endianness mismatch as "origin unknown" */
if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
(fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
goto out;
return fh;
out:
kfree(fh);
return NULL;
fail:
pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
goto out;
invalid:
pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
goto out;
}
static struct dentry *ovl_get_origin(struct dentry *dentry,
struct vfsmount *mnt)
{
struct dentry *origin = NULL;
struct ovl_fh *fh = ovl_get_origin_fh(dentry);
int bytes;
if (IS_ERR_OR_NULL(fh))
return (struct dentry *)fh;
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
goto out;
bytes = (fh->len - offsetof(struct ovl_fh, fid));
origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
bytes >> 2, (int)fh->type,
ovl_acceptable, NULL);
if (IS_ERR(origin)) {
/* Treat stale file handle as "origin unknown" */
if (origin == ERR_PTR(-ESTALE))
origin = NULL;
goto out;
}
if (ovl_dentry_weird(origin) ||
((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
goto invalid;
out:
kfree(fh);
return origin;
invalid:
pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
dput(origin);
origin = NULL;
goto out;
}
static bool ovl_is_opaquedir(struct dentry *dentry)
{
return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
const char *name, unsigned int namelen,
size_t prelen, const char *post,
struct dentry **ret)
{
struct dentry *this;
int err;
this = lookup_one_len_unlocked(name, base, namelen);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
if (err == -ENOENT || err == -ENAMETOOLONG)
goto out;
goto out_err;
}
if (!this->d_inode)
goto put_and_out;
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
err = -EREMOTE;
goto out_err;
}
if (ovl_is_whiteout(this)) {
d->stop = d->opaque = true;
goto put_and_out;
}
if (!d_can_lookup(this)) {
d->stop = true;
if (d->is_dir)
goto put_and_out;
goto out;
}
d->is_dir = true;
if (!d->last && ovl_is_opaquedir(this)) {
d->stop = d->opaque = true;
goto out;
}
err = ovl_check_redirect(this, d, prelen, post);
if (err)
goto out_err;
out:
*ret = this;
return 0;
put_and_out:
dput(this);
this = NULL;
goto out;
out_err:
dput(this);
return err;
}
static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
struct dentry **ret)
{
/* Counting down from the end, since the prefix can change */
size_t rem = d->name.len - 1;
struct dentry *dentry = NULL;
int err;
if (d->name.name[0] != '/')
return ovl_lookup_single(base, d, d->name.name, d->name.len,
0, "", ret);
while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
const char *s = d->name.name + d->name.len - rem;
const char *next = strchrnul(s, '/');
size_t thislen = next - s;
bool end = !next[0];
/* Verify we did not go off the rails */
if (WARN_ON(s[-1] != '/'))
return -EIO;
err = ovl_lookup_single(base, d, s, thislen,
d->name.len - rem, next, &base);
dput(dentry);
if (err)
return err;
dentry = base;
if (end)
break;
rem -= thislen + 1;
if (WARN_ON(rem >= d->name.len))
return -EIO;
}
*ret = dentry;
return 0;
}
static int ovl_check_origin(struct dentry *upperdentry,
struct path *lowerstack, unsigned int numlower,
struct path **stackp, unsigned int *ctrp)
{
struct vfsmount *mnt;
struct dentry *origin = NULL;
int i;
for (i = 0; i < numlower; i++) {
mnt = lowerstack[i].mnt;
origin = ovl_get_origin(upperdentry, mnt);
if (IS_ERR(origin))
return PTR_ERR(origin);
if (origin)
break;
}
if (!origin)
return 0;
BUG_ON(*ctrp);
if (!*stackp)
*stackp = kmalloc(sizeof(struct path), GFP_KERNEL);
if (!*stackp) {
dput(origin);
return -ENOMEM;
}
**stackp = (struct path) { .dentry = origin, .mnt = mnt };
*ctrp = 1;
return 0;
}
/*
* Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
{
struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
int err = 0;
if (!ofh)
return -ENODATA;
if (IS_ERR(ofh))
return PTR_ERR(ofh);
if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
err = -ESTALE;
kfree(ofh);
return err;
}
/*
* Verify that an inode matches the origin file handle stored in upper inode.
*
* If @set is true and there is no stored file handle, encode and store origin
* file handle in OVL_XATTR_ORIGIN.
*
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
struct dentry *origin, bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
fh = ovl_encode_fh(origin, is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto fail;
err = ovl_verify_origin_fh(dentry, fh);
if (set && err == -ENODATA)
err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
if (err)
goto fail;
out:
kfree(fh);
return err;
fail:
inode = d_inode(origin);
pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
origin, inode ? inode->i_ino : 0, err);
goto out;
}
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
int ovl_verify_index(struct dentry *index, struct path *lowerstack,
unsigned int numlower)
{
struct ovl_fh *fh = NULL;
size_t len;
struct path origin = { };
struct path *stack = &origin;
unsigned int ctr = 0;
int err;
if (!d_inode(index))
return 0;
/*
* Directory index entries are going to be used for looking up
* redirected upper dirs by lower dir fh when decoding an overlay
* file handle of a merge dir. Whiteout index entries are going to be
* used as an indication that an exported overlay file handle should
* be treated as stale (i.e. after unlink of the overlay inode).
* We don't know the verification rules for directory and whiteout
* index entries, because they have not been implemented yet, so return
* EINVAL if those entries are found to abort the mount to avoid
* corrupting an index that was created by a newer kernel.
*/
err = -EINVAL;
if (d_is_dir(index) || ovl_is_whiteout(index))
goto fail;
if (index->d_name.len < sizeof(struct ovl_fh)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
fh = kzalloc(len, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
goto fail;
err = ovl_verify_origin_fh(index, fh);
if (err)
goto fail;
err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
if (!err && !ctr)
err = -ESTALE;
if (err)
goto fail;
/* Check if index is orphan and don't warn before cleaning it */
if (d_inode(index)->i_nlink == 1 &&
ovl_get_nlink(index, origin.dentry, 0) == 0)
err = -ENOENT;
dput(origin.dentry);
out:
kfree(fh);
return err;
fail:
pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
index, d_inode(index)->i_mode & S_IFMT, err);
goto out;
}
/*
* Lookup in indexdir for the index entry of a lower real inode or a copy up
* origin inode. The index entry name is the hex representation of the lower
* inode file handle.
*
* If the index dentry in negative, then either no lower aliases have been
* copied up yet, or aliases have been copied up in older kernels and are
* not indexed.
*
* If the index dentry for a copy up origin inode is positive, but points
* to an inode different than the upper inode, then either the upper inode
* has been copied up and not indexed or it was indexed, but since then
* index dir was cleared. Either way, that index cannot be used to indentify
* the overlay inode.
*/
int ovl_get_index_name(struct dentry *origin, struct qstr *name)
{
int err;
struct ovl_fh *fh;
char *n, *s;
fh = ovl_encode_fh(origin, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = -ENOMEM;
n = kzalloc(fh->len * 2, GFP_KERNEL);
if (n) {
s = bin2hex(n, fh, fh->len);
*name = (struct qstr) QSTR_INIT(n, s - n);
err = 0;
}
kfree(fh);
return err;
}
static struct dentry *ovl_lookup_index(struct dentry *dentry,
struct dentry *upper,
struct dentry *origin)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *index;
struct inode *inode;
struct qstr name;
int err;
err = ovl_get_index_name(origin, &name);
if (err)
return ERR_PTR(err);
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
index = NULL;
goto out;
}
pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
goto out;
}
inode = d_inode(index);
if (d_is_negative(index)) {
goto out_dput;
} else if (upper && d_inode(upper) != inode) {
goto out_dput;
} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
/*
* Index should always be of the same file type as origin
* except for the case of a whiteout index. A whiteout
* index should only exist if all lower aliases have been
* unlinked, which means that finding a lower origin on lookup
* whose index is a whiteout should be treated as an error.
*/
pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
index, d_inode(index)->i_mode & S_IFMT,
d_inode(origin)->i_mode & S_IFMT);
goto fail;
}
out:
kfree(name.name);
return index;
out_dput:
dput(index);
index = NULL;
goto out;
fail:
dput(index);
index = ERR_PTR(-EIO);
goto out;
}
/*
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
*/
int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
BUG_ON(idx < 0);
if (idx == 0) {
ovl_path_upper(dentry, path);
if (path->dentry)
return oe->numlower ? 1 : -1;
idx++;
}
BUG_ON(idx > oe->numlower);
*path = oe->lowerstack[idx - 1];
return (idx < oe->numlower) ? idx + 1 : -1;
}
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct ovl_entry *oe;
const struct cred *old_cred;
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *index = NULL;
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
char *upperredirect = NULL;
struct dentry *this;
unsigned int i;
int err;
struct ovl_lookup_data d = {
.name = dentry->d_name,
.is_dir = false,
.opaque = false,
.stop = false,
.last = !poe->numlower,
.redirect = NULL,
};
if (dentry->d_name.len > ofs->namelen)
return ERR_PTR(-ENAMETOOLONG);
old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
err = ovl_lookup_layer(upperdir, &d, &upperdentry);
if (err)
goto out;
if (upperdentry && unlikely(ovl_dentry_remote(upperdentry))) {
dput(upperdentry);
err = -EREMOTE;
goto out;
}
if (upperdentry && !d.is_dir) {
BUG_ON(!d.stop || d.redirect);
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
* because we only need to hold the origin inode in
* cache and use its inode number. We may even get a
* connected dentry, that is not under any of the lower
* layers root. That is also fine for using it's inode
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
err = ovl_check_origin(upperdentry, roe->lowerstack,
roe->numlower, &stack, &ctr);
if (err)
goto out;
}
if (d.redirect) {
err = -ENOMEM;
upperredirect = kstrdup(d.redirect, GFP_KERNEL);
if (!upperredirect)
goto out_put_upper;
if (d.redirect[0] == '/')
poe = roe;
}
upperopaque = d.opaque;
}
if (!d.stop && poe->numlower) {
err = -ENOMEM;
stack = kcalloc(ofs->numlower, sizeof(struct path),
GFP_KERNEL);
if (!stack)
goto out_put_upper;
}
for (i = 0; !d.stop && i < poe->numlower; i++) {
struct path lowerpath = poe->lowerstack[i];
d.last = i == poe->numlower - 1;
err = ovl_lookup_layer(lowerpath.dentry, &d, &this);
if (err)
goto out_put;
if (!this)
continue;
stack[ctr].dentry = this;
stack[ctr].mnt = lowerpath.mnt;
ctr++;
if (d.stop)
break;
if (d.redirect && d.redirect[0] == '/' && poe != roe) {
poe = roe;
/* Find the current layer on the root dentry */
for (i = 0; i < poe->numlower; i++)
if (poe->lowerstack[i].mnt == lowerpath.mnt)
break;
if (WARN_ON(i == poe->numlower))
break;
}
}
/* Lookup index by lower inode and verify it matches upper inode */
if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
struct dentry *origin = stack[0].dentry;
index = ovl_lookup_index(dentry, upperdentry, origin);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
goto out_put;
}
}
oe = ovl_alloc_entry(ctr);
err = -ENOMEM;
if (!oe)
goto out_put;
oe->opaque = upperopaque;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
dentry->d_fsdata = oe;
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
else if (index)
upperdentry = dget(index);
if (upperdentry || ctr) {
inode = ovl_get_inode(dentry, upperdentry, index);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
OVL_I(inode)->redirect = upperredirect;
if (index)
ovl_set_flag(OVL_INDEX, inode);
}
revert_creds(old_cred);
dput(index);
kfree(stack);
kfree(d.redirect);
d_add(dentry, inode);
return NULL;
out_free_oe:
dentry->d_fsdata = NULL;
kfree(oe);
out_put:
dput(index);
for (i = 0; i < ctr; i++)
dput(stack[i].dentry);
kfree(stack);
out_put_upper:
dput(upperdentry);
kfree(upperredirect);
out:
kfree(d.redirect);
revert_creds(old_cred);
return ERR_PTR(err);
}
bool ovl_lower_positive(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
const struct qstr *name = &dentry->d_name;
unsigned int i;
bool positive = false;
bool done = false;
/*
* If dentry is negative, then lower is positive iff this is a
* whiteout.
*/
if (!dentry->d_inode)
return oe->opaque;
/* Negative upper -> positive lower */
if (!ovl_dentry_upper(dentry))
return true;
/* Positive upper -> have to look up lower to see whether it exists */
for (i = 0; !done && !positive && i < poe->numlower; i++) {
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;
this = lookup_one_len_unlocked(name->name, lowerdir,
name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
case -ENOENT:
case -ENAMETOOLONG:
break;
default:
/*
* Assume something is there, we just couldn't
* access it.
*/
positive = true;
break;
}
} else {
if (this->d_inode) {
positive = !ovl_is_whiteout(this);
done = true;
}
dput(this);
}
}
return positive;
}