linux_dsm_epyc7002/net/sunrpc/auth_gss/auth_gss.c

2119 lines
54 KiB
C
Raw Normal View History

/*
* linux/net/sunrpc/auth_gss/auth_gss.c
*
* RPCSEC_GSS client authentication.
*
* Copyright (c) 2000 The Regents of the University of Michigan.
* All rights reserved.
*
* Dug Song <dugsong@monkey.org>
* Andy Adamson <andros@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/auth_gss.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/gss_err.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/uaccess.h>
#include <linux/hashtable.h>
#include "../netns.h"
static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
static const struct rpc_credops gss_nullops;
#define GSS_RETRY_EXPIRED 5
static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
#define GSS_KEY_EXPIRE_TIMEO 240
static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
#define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2)
/* length of a krb5 verifier (48), plus data added before arguments when
* using integrity (two 4-byte integers): */
#define GSS_VERF_SLACK 100
static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
static DEFINE_SPINLOCK(gss_auth_hash_lock);
struct gss_pipe {
struct rpc_pipe_dir_object pdo;
struct rpc_pipe *pipe;
struct rpc_clnt *clnt;
const char *name;
struct kref kref;
};
struct gss_auth {
struct kref kref;
struct hlist_node hash;
struct rpc_auth rpc_auth;
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
struct net *net;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
* mechanism (for example, "krb5") and exists for
* backwards-compatibility with older gssd's.
*/
struct gss_pipe *gss_pipe[2];
const char *target_name;
};
/* pipe_version >= 0 if and only if someone has a pipe open. */
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
static void gss_put_auth(struct gss_auth *gss_auth);
static void gss_free_ctx(struct gss_cl_ctx *);
static const struct rpc_pipe_ops gss_upcall_ops_v0;
static const struct rpc_pipe_ops gss_upcall_ops_v1;
static inline struct gss_cl_ctx *
gss_get_ctx(struct gss_cl_ctx *ctx)
{
refcount_inc(&ctx->count);
return ctx;
}
static inline void
gss_put_ctx(struct gss_cl_ctx *ctx)
{
if (refcount_dec_and_test(&ctx->count))
gss_free_ctx(ctx);
}
/* gss_cred_set_ctx:
* called by gss_upcall_callback and gss_create_upcall in order
* to set the gss context. The actual exchange of an old context
* and a new one is protected by the pipe->lock.
*/
static void
gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
return;
gss_get_ctx(ctx);
rcu_assign_pointer(gss_cred->gc_ctx, ctx);
set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
smp_mb__before_atomic();
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
static const void *
simple_get_bytes(const void *p, const void *end, void *res, size_t len)
{
const void *q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
memcpy(res, p, len);
return q;
}
static inline const void *
simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
p = simple_get_bytes(p, end, &len, sizeof(len));
if (IS_ERR(p))
return p;
q = (const void *)((const char *)p + len);
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
dest->data = kmemdup(p, len, GFP_NOFS);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
dest->len = len;
return q;
}
static struct gss_cl_ctx *
gss_cred_get_ctx(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx = NULL;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (ctx)
gss_get_ctx(ctx);
rcu_read_unlock();
return ctx;
}
static struct gss_cl_ctx *
gss_alloc_context(void)
{
struct gss_cl_ctx *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_NOFS);
if (ctx != NULL) {
ctx->gc_proc = RPC_GSS_PROC_DATA;
ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */
spin_lock_init(&ctx->gc_seq_lock);
refcount_set(&ctx->count,1);
}
return ctx;
}
#define GSSD_MIN_TIMEOUT (60 * 60)
static const void *
gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm)
{
const void *q;
unsigned int seclen;
unsigned int timeout;
unsigned long now = jiffies;
u32 window_size;
int ret;
/* First unsigned int gives the remaining lifetime in seconds of the
* credential - e.g. the remaining TGT lifetime for Kerberos or
* the -t value passed to GSSD.
*/
p = simple_get_bytes(p, end, &timeout, sizeof(timeout));
if (IS_ERR(p))
goto err;
if (timeout == 0)
timeout = GSSD_MIN_TIMEOUT;
ctx->gc_expiry = now + ((unsigned long)timeout * HZ);
/* Sequence number window. Determines the maximum number of
* simultaneous requests
*/
p = simple_get_bytes(p, end, &window_size, sizeof(window_size));
if (IS_ERR(p))
goto err;
ctx->gc_win = window_size;
/* gssd signals an error by passing ctx->gc_win = 0: */
if (ctx->gc_win == 0) {
/*
* in which case, p points to an error code. Anything other
* than -EKEYEXPIRED gets converted to -EACCES.
*/
p = simple_get_bytes(p, end, &ret, sizeof(ret));
if (!IS_ERR(p))
p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) :
ERR_PTR(-EACCES);
goto err;
}
/* copy the opaque wire context */
p = simple_get_netobj(p, end, &ctx->gc_wire_ctx);
if (IS_ERR(p))
goto err;
/* import the opaque security context */
p = simple_get_bytes(p, end, &seclen, sizeof(seclen));
if (IS_ERR(p))
goto err;
q = (const void *)((const char *)p + seclen);
if (unlikely(q > end || q < p)) {
p = ERR_PTR(-EFAULT);
goto err;
}
ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
if (ret < 0) {
p = ERR_PTR(ret);
goto err;
}
/* is there any trailing data? */
if (q == end) {
p = q;
goto done;
}
/* pull in acceptor name (if there is one) */
p = simple_get_netobj(q, end, &ctx->gc_acceptor);
if (IS_ERR(p))
goto err;
done:
dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
ctx->gc_acceptor.data);
return p;
err:
dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p;
}
#define UPCALL_BUF_LEN 128
struct gss_upcall_msg {
refcount_t count;
kuid_t uid;
struct rpc_pipe_msg msg;
struct list_head list;
struct gss_auth *auth;
struct rpc_pipe *pipe;
struct rpc_wait_queue rpc_waitqueue;
wait_queue_head_t waitqueue;
struct gss_cl_ctx *ctx;
char databuf[UPCALL_BUF_LEN];
};
static int get_pipe_version(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret;
spin_lock(&pipe_version_lock);
if (sn->pipe_version >= 0) {
atomic_inc(&sn->pipe_users);
ret = sn->pipe_version;
} else
ret = -EAGAIN;
spin_unlock(&pipe_version_lock);
return ret;
}
static void put_pipe_version(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) {
sn->pipe_version = -1;
spin_unlock(&pipe_version_lock);
}
}
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
struct net *net = gss_msg->auth->net;
if (!refcount_dec_and_test(&gss_msg->count))
return;
put_pipe_version(net);
BUG_ON(!list_empty(&gss_msg->list));
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
gss_put_auth(gss_msg->auth);
kfree(gss_msg);
}
static struct gss_upcall_msg *
__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
{
struct gss_upcall_msg *pos;
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
if (auth && pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}
/* Try to add an upcall to the pipefs queue.
* If an upcall owned by our uid already exists, then we return a reference
* to that upcall instead of adding the new upcall.
*/
static inline struct gss_upcall_msg *
gss_add_msg(struct gss_upcall_msg *gss_msg)
{
struct rpc_pipe *pipe = gss_msg->pipe;
struct gss_upcall_msg *old;
spin_lock(&pipe->lock);
old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
if (old == NULL) {
refcount_inc(&gss_msg->count);
list_add(&gss_msg->list, &pipe->in_downcall);
} else
gss_msg = old;
spin_unlock(&pipe->lock);
return gss_msg;
}
static void
__gss_unhash_msg(struct gss_upcall_msg *gss_msg)
{
list_del_init(&gss_msg->list);
rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
wake_up_all(&gss_msg->waitqueue);
refcount_dec(&gss_msg->count);
}
static void
gss_unhash_msg(struct gss_upcall_msg *gss_msg)
{
struct rpc_pipe *pipe = gss_msg->pipe;
if (list_empty(&gss_msg->list))
return;
spin_lock(&pipe->lock);
if (!list_empty(&gss_msg->list))
__gss_unhash_msg(gss_msg);
spin_unlock(&pipe->lock);
}
static void
gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
{
switch (gss_msg->msg.errno) {
case 0:
if (gss_msg->ctx == NULL)
break;
clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
break;
case -EKEYEXPIRED:
set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
}
gss_cred->gc_upcall_timestamp = jiffies;
gss_cred->gc_upcall = NULL;
rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
}
static void
gss_upcall_callback(struct rpc_task *task)
{
struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
struct rpc_pipe *pipe = gss_msg->pipe;
spin_lock(&pipe->lock);
gss_handle_downcall_result(gss_cred, gss_msg);
spin_unlock(&pipe->lock);
task->tk_status = gss_msg->msg.errno;
gss_release_msg(gss_msg);
}
static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
{
uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
memcpy(gss_msg->databuf, &uid, sizeof(uid));
gss_msg->msg.data = gss_msg->databuf;
gss_msg->msg.len = sizeof(uid);
BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
}
static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
const char *service_name,
const char *target_name)
{
struct gss_api_mech *mech = gss_msg->auth->mech;
char *p = gss_msg->databuf;
size_t buflen = sizeof(gss_msg->databuf);
int len;
len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
buflen -= len;
p += len;
gss_msg->msg.len = len;
if (target_name) {
len = scnprintf(p, buflen, "target=%s ", target_name);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
if (service_name != NULL) {
len = scnprintf(p, buflen, "service=%s ", service_name);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
if (mech->gm_upcall_enctypes) {
len = scnprintf(p, buflen, "enctypes=%s ",
mech->gm_upcall_enctypes);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
len = scnprintf(p, buflen, "\n");
if (len == 0)
goto out_overflow;
gss_msg->msg.len += len;
gss_msg->msg.data = gss_msg->databuf;
return 0;
out_overflow:
WARN_ON_ONCE(1);
return -ENOMEM;
}
static struct gss_upcall_msg *
gss_alloc_msg(struct gss_auth *gss_auth,
kuid_t uid, const char *service_name)
{
struct gss_upcall_msg *gss_msg;
int vers;
int err = -ENOMEM;
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
if (gss_msg == NULL)
goto err;
vers = get_pipe_version(gss_auth->net);
err = vers;
if (err < 0)
goto err_free_msg;
gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
INIT_LIST_HEAD(&gss_msg->list);
rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
init_waitqueue_head(&gss_msg->waitqueue);
refcount_set(&gss_msg->count, 1);
gss_msg->uid = uid;
gss_msg->auth = gss_auth;
switch (vers) {
case 0:
gss_encode_v0_msg(gss_msg);
break;
default:
err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
if (err)
goto err_put_pipe_version;
};
kref_get(&gss_auth->kref);
return gss_msg;
err_put_pipe_version:
put_pipe_version(gss_auth->net);
err_free_msg:
kfree(gss_msg);
err:
return ERR_PTR(err);
}
static struct gss_upcall_msg *
gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_new, *gss_msg;
kuid_t uid = cred->cr_uid;
gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
if (IS_ERR(gss_new))
return gss_new;
gss_msg = gss_add_msg(gss_new);
if (gss_msg == gss_new) {
SUNRPC: fix refcounting problems with auth_gss messages. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Cc: stable@vger.kernel.org Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-12-05 11:10:11 +07:00
int res;
refcount_inc(&gss_msg->count);
SUNRPC: fix refcounting problems with auth_gss messages. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Cc: stable@vger.kernel.org Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-12-05 11:10:11 +07:00
res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg);
if (res) {
gss_unhash_msg(gss_new);
refcount_dec(&gss_msg->count);
SUNRPC: fix refcounting problems with auth_gss messages. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Cc: stable@vger.kernel.org Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-12-05 11:10:11 +07:00
gss_release_msg(gss_new);
gss_msg = ERR_PTR(res);
}
} else
gss_release_msg(gss_new);
return gss_msg;
}
static void warn_gssd(void)
{
dprintk("AUTH_GSS upcall failed. Please check user daemon is running.\n");
}
static inline int
gss_refresh_upcall(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_auth *gss_auth = container_of(cred->cr_auth,
struct gss_auth, rpc_auth);
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg;
struct rpc_pipe *pipe;
int err = 0;
dprintk("RPC: %5u %s for uid %u\n",
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
* shouldn't normally hit this case on a refresh. */
warn_gssd();
task->tk_timeout = 15*HZ;
rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
return -EAGAIN;
}
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
goto out;
}
pipe = gss_msg->pipe;
spin_lock(&pipe->lock);
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
task->tk_timeout = 0;
gss_cred->gc_upcall = gss_msg;
/* gss_upcall_callback will release the reference to gss_upcall_msg */
refcount_inc(&gss_msg->count);
rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
} else {
gss_handle_downcall_result(gss_cred, gss_msg);
err = gss_msg->msg.errno;
}
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
dprintk("RPC: %5u %s for uid %u result %d\n",
task->tk_pid, __func__,
from_kuid(&init_user_ns, cred->cr_uid), err);
return err;
}
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
struct net *net = gss_auth->net;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
DEFINE_WAIT(wait);
int err;
dprintk("RPC: %s for uid %u\n",
__func__, from_kuid(&init_user_ns, cred->cr_uid));
retry:
err = 0;
/* if gssd is down, just skip upcalling altogether */
if (!gssd_running(net)) {
warn_gssd();
return -EACCES;
}
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
sn->pipe_version >= 0, 15 * HZ);
if (sn->pipe_version < 0) {
warn_gssd();
err = -EACCES;
}
if (err < 0)
goto out;
goto retry;
}
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
goto out;
}
pipe = gss_msg->pipe;
for (;;) {
prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE);
spin_lock(&pipe->lock);
if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
break;
}
spin_unlock(&pipe->lock);
if (fatal_signal_pending(current)) {
err = -ERESTARTSYS;
goto out_intr;
}
schedule();
}
if (gss_msg->ctx)
gss_cred_set_ctx(cred, gss_msg->ctx);
else
err = gss_msg->msg.errno;
spin_unlock(&pipe->lock);
out_intr:
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
dprintk("RPC: %s for uid %u result %d\n",
__func__, from_kuid(&init_user_ns, cred->cr_uid), err);
return err;
}
#define MSG_BUF_MAXSIZE 1024
static ssize_t
gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
const void *p, *end;
void *buf;
struct gss_upcall_msg *gss_msg;
struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe;
struct gss_cl_ctx *ctx;
uid_t id;
kuid_t uid;
ssize_t err = -EFBIG;
if (mlen > MSG_BUF_MAXSIZE)
goto out;
err = -ENOMEM;
buf = kmalloc(mlen, GFP_NOFS);
if (!buf)
goto out;
err = -EFAULT;
if (copy_from_user(buf, src, mlen))
goto err;
end = (const void *)((char *)buf + mlen);
p = simple_get_bytes(buf, end, &id, sizeof(id));
if (IS_ERR(p)) {
err = PTR_ERR(p);
goto err;
}
uid = make_kuid(&init_user_ns, id);
if (!uid_valid(uid)) {
err = -EINVAL;
goto err;
}
err = -ENOMEM;
ctx = gss_alloc_context();
if (ctx == NULL)
goto err;
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
gss_msg = __gss_find_upcall(pipe, uid, NULL);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
}
list_del_init(&gss_msg->list);
spin_unlock(&pipe->lock);
p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
if (IS_ERR(p)) {
err = PTR_ERR(p);
switch (err) {
case -EACCES:
case -EKEYEXPIRED:
gss_msg->msg.errno = err;
err = mlen;
break;
case -EFAULT:
case -ENOMEM:
case -EINVAL:
case -ENOSYS:
gss_msg->msg.errno = -EAGAIN;
break;
default:
printk(KERN_CRIT "%s: bad return from "
"gss_fill_context: %zd\n", __func__, err);
auth_gss: fix panic in gss_pipe_downcall() in fips mode On Mon, 15 Feb 2016, Trond Myklebust wrote: > Hi Scott, > > On Mon, Feb 15, 2016 at 2:28 PM, Scott Mayhew <smayhew@redhat.com> wrote: > > md5 is disabled in fips mode, and attempting to import a gss context > > using md5 while in fips mode will result in crypto_alg_mod_lookup() > > returning -ENOENT, which will make its way back up to > > gss_pipe_downcall(), where the BUG() is triggered. Handling the -ENOENT > > allows for a more graceful failure. > > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > > --- > > net/sunrpc/auth_gss/auth_gss.c | 3 +++ > > 1 file changed, 3 insertions(+) > > > > diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c > > index 799e65b..c30fc3b 100644 > > --- a/net/sunrpc/auth_gss/auth_gss.c > > +++ b/net/sunrpc/auth_gss/auth_gss.c > > @@ -737,6 +737,9 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) > > case -ENOSYS: > > gss_msg->msg.errno = -EAGAIN; > > break; > > + case -ENOENT: > > + gss_msg->msg.errno = -EPROTONOSUPPORT; > > + break; > > default: > > printk(KERN_CRIT "%s: bad return from " > > "gss_fill_context: %zd\n", __func__, err); > > -- > > 2.4.3 > > > > Well debugged, but I unfortunately do have to ask if this patch is > sufficient? In addition to -ENOENT, and -ENOMEM, it looks to me as if > crypto_alg_mod_lookup() can also fail with -EINTR, -ETIMEDOUT, and > -EAGAIN. Don't we also want to handle those? You're right, I was focusing on the panic that I could easily reproduce. I'm still not sure how I could trigger those other conditions. > > In fact, peering into the rats nest that is > gss_import_sec_context_kerberos(), it looks as if that is just a tiny > subset of all the errors that we might run into. Perhaps the right > thing to do here is to get rid of the BUG() (but keep the above > printk) and just return a generic error? That sounds fine to me -- updated patch attached. -Scott >From d54c6b64a107a90a38cab97577de05f9a4625052 Mon Sep 17 00:00:00 2001 From: Scott Mayhew <smayhew@redhat.com> Date: Mon, 15 Feb 2016 15:12:19 -0500 Subject: [PATCH] auth_gss: remove the BUG() from gss_pipe_downcall() Instead return a generic error via gss_msg->msg.errno. None of the errors returned by gss_fill_context() should necessarily trigger a kernel panic. Signed-off-by: Scott Mayhew <smayhew@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-02-17 04:20:25 +07:00
gss_msg->msg.errno = -EIO;
}
goto err_release_msg;
}
gss_msg->ctx = gss_get_ctx(ctx);
err = mlen;
err_release_msg:
spin_lock(&pipe->lock);
__gss_unhash_msg(gss_msg);
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
err_put_ctx:
gss_put_ctx(ctx);
err:
kfree(buf);
out:
dprintk("RPC: %s returning %zd\n", __func__, err);
return err;
}
static int gss_pipe_open(struct inode *inode, int new_version)
{
struct net *net = inode->i_sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int ret = 0;
spin_lock(&pipe_version_lock);
if (sn->pipe_version < 0) {
/* First open of any gss pipe determines the version: */
sn->pipe_version = new_version;
rpc_wake_up(&pipe_version_rpc_waitqueue);
wake_up(&pipe_version_waitqueue);
} else if (sn->pipe_version != new_version) {
/* Trying to open a pipe of a different version */
ret = -EBUSY;
goto out;
}
atomic_inc(&sn->pipe_users);
out:
spin_unlock(&pipe_version_lock);
return ret;
}
static int gss_pipe_open_v0(struct inode *inode)
{
return gss_pipe_open(inode, 0);
}
static int gss_pipe_open_v1(struct inode *inode)
{
return gss_pipe_open(inode, 1);
}
static void
gss_pipe_release(struct inode *inode)
{
struct net *net = inode->i_sb->s_fs_info;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
struct gss_upcall_msg *gss_msg;
restart:
spin_lock(&pipe->lock);
list_for_each_entry(gss_msg, &pipe->in_downcall, list) {
if (!list_empty(&gss_msg->msg.list))
continue;
gss_msg->msg.errno = -EPIPE;
refcount_inc(&gss_msg->count);
__gss_unhash_msg(gss_msg);
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
goto restart;
}
spin_unlock(&pipe->lock);
put_pipe_version(net);
}
static void
gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
if (msg->errno < 0) {
dprintk("RPC: %s releasing msg %p\n",
__func__, gss_msg);
refcount_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
warn_gssd();
gss_release_msg(gss_msg);
}
SUNRPC: fix refcounting problems with auth_gss messages. There are two problems with refcounting of auth_gss messages. First, the reference on the pipe->pipe list (taken by a call to rpc_queue_upcall()) is not counted. It seems to be assumed that a message in pipe->pipe will always also be in pipe->in_downcall, where it is correctly reference counted. However there is no guaranty of this. I have a report of a NULL dereferences in rpc_pipe_read() which suggests a msg that has been freed is still on the pipe->pipe list. One way I imagine this might happen is: - message is queued for uid=U and auth->service=S1 - rpc.gssd reads this message and starts processing. This removes the message from pipe->pipe - message is queued for uid=U and auth->service=S2 - rpc.gssd replies to the first message. gss_pipe_downcall() calls __gss_find_upcall(pipe, U, NULL) and it finds the *second* message, as new messages are placed at the head of ->in_downcall, and the service type is not checked. - This second message is removed from ->in_downcall and freed by gss_release_msg() (even though it is still on pipe->pipe) - rpc.gssd tries to read another message, and dereferences a pointer to this message that has just been freed. I fix this by incrementing the reference count before calling rpc_queue_upcall(), and decrementing it if that fails, or normally in gss_pipe_destroy_msg(). It seems strange that the reply doesn't target the message more precisely, but I don't know all the details. In any case, I think the reference counting irregularity became a measureable bug when the extra arg was added to __gss_find_upcall(), hence the Fixes: line below. The second problem is that if rpc_queue_upcall() fails, the new message is not freed. gss_alloc_msg() set the ->count to 1, gss_add_msg() increments this to 2, gss_unhash_msg() decrements to 1, then the pointer is discarded so the memory never gets freed. Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Cc: stable@vger.kernel.org Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1011250 Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-12-05 11:10:11 +07:00
gss_release_msg(gss_msg);
}
static void gss_pipe_dentry_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *gss_pipe = pdo->pdo_data;
struct rpc_pipe *pipe = gss_pipe->pipe;
if (pipe->dentry != NULL) {
rpc_unlink(pipe->dentry);
pipe->dentry = NULL;
}
}
static int gss_pipe_dentry_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *p = pdo->pdo_data;
struct dentry *dentry;
dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
p->pipe->dentry = dentry;
return 0;
}
static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
.create = gss_pipe_dentry_create,
.destroy = gss_pipe_dentry_destroy,
};
static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
const char *name,
const struct rpc_pipe_ops *upcall_ops)
{
struct gss_pipe *p;
int err = -ENOMEM;
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
goto err;
p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
if (IS_ERR(p->pipe)) {
err = PTR_ERR(p->pipe);
goto err_free_gss_pipe;
}
p->name = name;
p->clnt = clnt;
kref_init(&p->kref);
rpc_init_pipe_dir_object(&p->pdo,
&gss_pipe_dir_object_ops,
p);
return p;
err_free_gss_pipe:
kfree(p);
err:
return ERR_PTR(err);
}
struct gss_alloc_pdo {
struct rpc_clnt *clnt;
const char *name;
const struct rpc_pipe_ops *upcall_ops;
};
static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
{
struct gss_pipe *gss_pipe;
struct gss_alloc_pdo *args = data;
if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
return 0;
gss_pipe = container_of(pdo, struct gss_pipe, pdo);
if (strcmp(gss_pipe->name, args->name) != 0)
return 0;
if (!kref_get_unless_zero(&gss_pipe->kref))
return 0;
return 1;
}
static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
{
struct gss_pipe *gss_pipe;
struct gss_alloc_pdo *args = data;
gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
if (!IS_ERR(gss_pipe))
return &gss_pipe->pdo;
return NULL;
}
static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
const char *name,
const struct rpc_pipe_ops *upcall_ops)
{
struct net *net = rpc_net_ns(clnt);
struct rpc_pipe_dir_object *pdo;
struct gss_alloc_pdo args = {
.clnt = clnt,
.name = name,
.upcall_ops = upcall_ops,
};
pdo = rpc_find_or_alloc_pipe_dir_object(net,
&clnt->cl_pipedir_objects,
gss_pipe_match_pdo,
gss_pipe_alloc_pdo,
&args);
if (pdo != NULL)
return container_of(pdo, struct gss_pipe, pdo);
return ERR_PTR(-ENOMEM);
}
static void __gss_pipe_free(struct gss_pipe *p)
{
struct rpc_clnt *clnt = p->clnt;
struct net *net = rpc_net_ns(clnt);
rpc_remove_pipe_dir_object(net,
&clnt->cl_pipedir_objects,
&p->pdo);
rpc_destroy_pipe_data(p->pipe);
kfree(p);
}
static void __gss_pipe_release(struct kref *kref)
{
struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
__gss_pipe_free(p);
}
static void gss_pipe_free(struct gss_pipe *p)
{
if (p != NULL)
kref_put(&p->kref, __gss_pipe_release);
}
/*
* NOTE: we have the opportunity to use different
* parameters based on the input flavor (which must be a pseudoflavor)
*/
static struct gss_auth *
gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
rpc_authflavor_t flavor = args->pseudoflavor;
struct gss_auth *gss_auth;
struct gss_pipe *gss_pipe;
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */
dprintk("RPC: creating GSS authenticator for client %p\n", clnt);
if (!try_module_get(THIS_MODULE))
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
goto out_dec;
INIT_HLIST_NODE(&gss_auth->hash);
gss_auth->target_name = NULL;
if (args->target_name) {
gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
if (gss_auth->target_name == NULL)
goto err_free;
}
gss_auth->client = clnt;
gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech) {
dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
goto err_put_net;
}
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
if (!gssd_running(gss_auth->net))
goto err_put_mech;
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
sunrpc: move NO_CRKEY_TIMEOUT to the auth->au_flags A generic_cred can be used to look up a unx_cred or a gss_cred, so it's not really safe to use the the generic_cred->acred->ac_flags to store the NO_CRKEY_TIMEOUT flag. A lookup for a unx_cred triggered while the KEY_EXPIRE_SOON flag is already set will cause both NO_CRKEY_TIMEOUT and KEY_EXPIRE_SOON to be set in the ac_flags, leaving the user associated with the auth_cred to be in a state where they're perpetually doing 4K NFS_FILE_SYNC writes. This can be reproduced as follows: 1. Mount two NFS filesystems, one with sec=krb5 and one with sec=sys. They do not need to be the same export, nor do they even need to be from the same NFS server. Also, v3 is fine. $ sudo mount -o v3,sec=krb5 server1:/export /mnt/krb5 $ sudo mount -o v3,sec=sys server2:/export /mnt/sys 2. As the normal user, before accessing the kerberized mount, kinit with a short lifetime (but not so short that renewing the ticket would leave you within the 4-minute window again by the time the original ticket expires), e.g. $ kinit -l 10m -r 60m 3. Do some I/O to the kerberized mount and verify that the writes are wsize, UNSTABLE: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 4. Wait until you're within 4 minutes of key expiry, then do some more I/O to the kerberized mount to ensure that RPC_CRED_KEY_EXPIRE_SOON gets set. Verify that the writes are 4K, FILE_SYNC: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 5. Now do some I/O to the sec=sys mount. This will cause RPC_CRED_NO_CRKEY_TIMEOUT to be set: $ dd if=/dev/zero of=/mnt/sys/file bs=1M count=1 6. Writes for that user will now be permanently 4K, FILE_SYNC for that user, regardless of which mount is being written to, until you reboot the client. Renewing the kerberos ticket (assuming it hasn't already expired) will have no effect. Grabbing a new kerberos ticket at this point will have no effect either. Move the flag to the auth->au_flags field (which is currently unused) and rename it slightly to reflect that it's no longer associated with the auth_cred->ac_flags. Add the rpc_auth to the arg list of rpcauth_cred_key_to_expire and check the au_flags there too. Finally, add the inode to the arg list of nfs_ctx_key_to_expire so we can determine the rpc_auth to pass to rpcauth_cred_key_to_expire. Signed-off-by: Scott Mayhew <smayhew@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2016-06-08 02:14:48 +07:00
auth->au_flags = 0;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
atomic_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
err = rpcauth_init_credcache(auth);
if (err)
goto err_put_mech;
/*
* Note: if we created the old pipe first, then someone who
* examined the directory at the right moment might conclude
* that we supported only the old pipe. So we instead create
* the new pipe first.
*/
gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
if (IS_ERR(gss_pipe)) {
err = PTR_ERR(gss_pipe);
goto err_destroy_credcache;
}
gss_auth->gss_pipe[1] = gss_pipe;
gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
&gss_upcall_ops_v0);
if (IS_ERR(gss_pipe)) {
err = PTR_ERR(gss_pipe);
goto err_destroy_pipe_1;
}
gss_auth->gss_pipe[0] = gss_pipe;
return gss_auth;
err_destroy_pipe_1:
gss_pipe_free(gss_auth->gss_pipe[1]);
err_destroy_credcache:
rpcauth_destroy_credcache(auth);
err_put_mech:
gss_mech_put(gss_auth->mech);
err_put_net:
put_net(gss_auth->net);
err_free:
kfree(gss_auth->target_name);
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
return ERR_PTR(err);
}
static void
gss_free(struct gss_auth *gss_auth)
{
gss_pipe_free(gss_auth->gss_pipe[0]);
gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
put_net(gss_auth->net);
kfree(gss_auth->target_name);
kfree(gss_auth);
module_put(THIS_MODULE);
}
static void
gss_free_callback(struct kref *kref)
{
struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref);
gss_free(gss_auth);
}
static void
gss_put_auth(struct gss_auth *gss_auth)
{
kref_put(&gss_auth->kref, gss_free_callback);
}
static void
gss_destroy(struct rpc_auth *auth)
{
struct gss_auth *gss_auth = container_of(auth,
struct gss_auth, rpc_auth);
dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
auth, auth->au_flavor);
if (hash_hashed(&gss_auth->hash)) {
spin_lock(&gss_auth_hash_lock);
hash_del(&gss_auth->hash);
spin_unlock(&gss_auth_hash_lock);
}
gss_pipe_free(gss_auth->gss_pipe[0]);
gss_auth->gss_pipe[0] = NULL;
gss_pipe_free(gss_auth->gss_pipe[1]);
gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);
gss_put_auth(gss_auth);
}
RPCSEC_GSS: fix crash on destroying gss auth This fixes a regression since eb6dc19d8e72ce3a957af5511d20c0db0a8bd007 "RPCSEC_GSS: Share all credential caches on a per-transport basis" which could cause an occasional oops in the nfsd code (see below). The problem was that an auth was left referencing a client that had been freed. To avoid this we need to ensure that auths are shared only between descendants of a common client; the fact that a clone of an rpc_client takes a reference on its parent then ensures that the parent client will last as long as the auth. Also add a comment explaining what I think was the intention of this code. general protection fault: 0000 [#1] PREEMPT SMP Modules linked in: rpcsec_gss_krb5 nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc CPU: 3 PID: 4071 Comm: kworker/u8:2 Not tainted 3.11.0-rc2-00182-g025145f #1665 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: nfsd4_callbacks nfsd4_do_callback_rpc [nfsd] task: ffff88003e206080 ti: ffff88003c384000 task.ti: ffff88003c384000 RIP: 0010:[<ffffffffa00001f3>] [<ffffffffa00001f3>] rpc_net_ns+0x53/0x70 [sunrpc] RSP: 0000:ffff88003c385ab8 EFLAGS: 00010246 RAX: 6b6b6b6b6b6b6b6b RBX: ffff88003af9a800 RCX: 0000000000000002 RDX: ffffffffa00001a5 RSI: 0000000000000001 RDI: ffffffff81e284e0 RBP: ffff88003c385ad8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000015 R12: ffff88003c990840 R13: ffff88003c990878 R14: ffff88003c385ba8 R15: ffff88003e206080 FS: 0000000000000000(0000) GS:ffff88003fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fcdf737e000 CR3: 000000003ad2b000 CR4: 00000000000006e0 Stack: ffffffffa00001a5 0000000000000006 0000000000000006 ffff88003af9a800 ffff88003c385b08 ffffffffa00d52a4 ffff88003c385ba8 ffff88003c751bd8 ffff88003c751bc0 ffff88003e113600 ffff88003c385b18 ffffffffa00d530c Call Trace: [<ffffffffa00001a5>] ? rpc_net_ns+0x5/0x70 [sunrpc] [<ffffffffa00d52a4>] __gss_pipe_release+0x54/0x90 [auth_rpcgss] [<ffffffffa00d530c>] gss_pipe_free+0x2c/0x30 [auth_rpcgss] [<ffffffffa00d678b>] gss_destroy+0x9b/0xf0 [auth_rpcgss] [<ffffffffa000de63>] rpcauth_release+0x23/0x30 [sunrpc] [<ffffffffa0001e81>] rpc_release_client+0x51/0xb0 [sunrpc] [<ffffffffa00020d5>] rpc_shutdown_client+0xe5/0x170 [sunrpc] [<ffffffff81098a14>] ? cpuacct_charge+0xa4/0xb0 [<ffffffff81098975>] ? cpuacct_charge+0x5/0xb0 [<ffffffffa019556f>] nfsd4_process_cb_update.isra.17+0x2f/0x210 [nfsd] [<ffffffff819a4ac0>] ? _raw_spin_unlock_irq+0x30/0x60 [<ffffffff819a4acb>] ? _raw_spin_unlock_irq+0x3b/0x60 [<ffffffff810703ab>] ? process_one_work+0x15b/0x510 [<ffffffffa01957dd>] nfsd4_do_callback_rpc+0x8d/0xa0 [nfsd] [<ffffffff8107041e>] process_one_work+0x1ce/0x510 [<ffffffff810703ab>] ? process_one_work+0x15b/0x510 [<ffffffff810712ab>] worker_thread+0x11b/0x370 [<ffffffff81071190>] ? manage_workers.isra.24+0x2b0/0x2b0 [<ffffffff8107854b>] kthread+0xdb/0xe0 [<ffffffff819a4ac0>] ? _raw_spin_unlock_irq+0x30/0x60 [<ffffffff81078470>] ? __init_kthread_worker+0x70/0x70 [<ffffffff819ac7dc>] ret_from_fork+0x7c/0xb0 [<ffffffff81078470>] ? __init_kthread_worker+0x70/0x70 Code: a5 01 00 a0 31 d2 31 f6 48 c7 c7 e0 84 e2 81 e8 f4 91 0a e1 48 8b 43 60 48 c7 c2 a5 01 00 a0 be 01 00 00 00 48 c7 c7 e0 84 e2 81 <48> 8b 98 10 07 00 00 e8 91 8f 0a e1 e8 +3c 4e 07 e1 48 83 c4 18 RIP [<ffffffffa00001f3>] rpc_net_ns+0x53/0x70 [sunrpc] RSP <ffff88003c385ab8> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-09-18 22:16:03 +07:00
/*
* Auths may be shared between rpc clients that were cloned from a
* common client with the same xprt, if they also share the flavor and
* target_name.
*
* The auth is looked up from the oldest parent sharing the same
* cl_xprt, and the auth itself references only that common parent
* (which is guaranteed to last as long as any of its descendants).
*/
static struct gss_auth *
gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
struct rpc_clnt *clnt,
struct gss_auth *new)
{
struct gss_auth *gss_auth;
unsigned long hashval = (unsigned long)clnt;
spin_lock(&gss_auth_hash_lock);
hash_for_each_possible(gss_auth_hash_table,
gss_auth,
hash,
hashval) {
RPCSEC_GSS: fix crash on destroying gss auth This fixes a regression since eb6dc19d8e72ce3a957af5511d20c0db0a8bd007 "RPCSEC_GSS: Share all credential caches on a per-transport basis" which could cause an occasional oops in the nfsd code (see below). The problem was that an auth was left referencing a client that had been freed. To avoid this we need to ensure that auths are shared only between descendants of a common client; the fact that a clone of an rpc_client takes a reference on its parent then ensures that the parent client will last as long as the auth. Also add a comment explaining what I think was the intention of this code. general protection fault: 0000 [#1] PREEMPT SMP Modules linked in: rpcsec_gss_krb5 nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc CPU: 3 PID: 4071 Comm: kworker/u8:2 Not tainted 3.11.0-rc2-00182-g025145f #1665 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: nfsd4_callbacks nfsd4_do_callback_rpc [nfsd] task: ffff88003e206080 ti: ffff88003c384000 task.ti: ffff88003c384000 RIP: 0010:[<ffffffffa00001f3>] [<ffffffffa00001f3>] rpc_net_ns+0x53/0x70 [sunrpc] RSP: 0000:ffff88003c385ab8 EFLAGS: 00010246 RAX: 6b6b6b6b6b6b6b6b RBX: ffff88003af9a800 RCX: 0000000000000002 RDX: ffffffffa00001a5 RSI: 0000000000000001 RDI: ffffffff81e284e0 RBP: ffff88003c385ad8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000015 R12: ffff88003c990840 R13: ffff88003c990878 R14: ffff88003c385ba8 R15: ffff88003e206080 FS: 0000000000000000(0000) GS:ffff88003fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fcdf737e000 CR3: 000000003ad2b000 CR4: 00000000000006e0 Stack: ffffffffa00001a5 0000000000000006 0000000000000006 ffff88003af9a800 ffff88003c385b08 ffffffffa00d52a4 ffff88003c385ba8 ffff88003c751bd8 ffff88003c751bc0 ffff88003e113600 ffff88003c385b18 ffffffffa00d530c Call Trace: [<ffffffffa00001a5>] ? rpc_net_ns+0x5/0x70 [sunrpc] [<ffffffffa00d52a4>] __gss_pipe_release+0x54/0x90 [auth_rpcgss] [<ffffffffa00d530c>] gss_pipe_free+0x2c/0x30 [auth_rpcgss] [<ffffffffa00d678b>] gss_destroy+0x9b/0xf0 [auth_rpcgss] [<ffffffffa000de63>] rpcauth_release+0x23/0x30 [sunrpc] [<ffffffffa0001e81>] rpc_release_client+0x51/0xb0 [sunrpc] [<ffffffffa00020d5>] rpc_shutdown_client+0xe5/0x170 [sunrpc] [<ffffffff81098a14>] ? cpuacct_charge+0xa4/0xb0 [<ffffffff81098975>] ? cpuacct_charge+0x5/0xb0 [<ffffffffa019556f>] nfsd4_process_cb_update.isra.17+0x2f/0x210 [nfsd] [<ffffffff819a4ac0>] ? _raw_spin_unlock_irq+0x30/0x60 [<ffffffff819a4acb>] ? _raw_spin_unlock_irq+0x3b/0x60 [<ffffffff810703ab>] ? process_one_work+0x15b/0x510 [<ffffffffa01957dd>] nfsd4_do_callback_rpc+0x8d/0xa0 [nfsd] [<ffffffff8107041e>] process_one_work+0x1ce/0x510 [<ffffffff810703ab>] ? process_one_work+0x15b/0x510 [<ffffffff810712ab>] worker_thread+0x11b/0x370 [<ffffffff81071190>] ? manage_workers.isra.24+0x2b0/0x2b0 [<ffffffff8107854b>] kthread+0xdb/0xe0 [<ffffffff819a4ac0>] ? _raw_spin_unlock_irq+0x30/0x60 [<ffffffff81078470>] ? __init_kthread_worker+0x70/0x70 [<ffffffff819ac7dc>] ret_from_fork+0x7c/0xb0 [<ffffffff81078470>] ? __init_kthread_worker+0x70/0x70 Code: a5 01 00 a0 31 d2 31 f6 48 c7 c7 e0 84 e2 81 e8 f4 91 0a e1 48 8b 43 60 48 c7 c2 a5 01 00 a0 be 01 00 00 00 48 c7 c7 e0 84 e2 81 <48> 8b 98 10 07 00 00 e8 91 8f 0a e1 e8 +3c 4e 07 e1 48 83 c4 18 RIP [<ffffffffa00001f3>] rpc_net_ns+0x53/0x70 [sunrpc] RSP <ffff88003c385ab8> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-09-18 22:16:03 +07:00
if (gss_auth->client != clnt)
continue;
if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
continue;
if (gss_auth->target_name != args->target_name) {
if (gss_auth->target_name == NULL)
continue;
if (args->target_name == NULL)
continue;
if (strcmp(gss_auth->target_name, args->target_name))
continue;
}
if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
continue;
goto out;
}
if (new)
hash_add(gss_auth_hash_table, &new->hash, hashval);
gss_auth = new;
out:
spin_unlock(&gss_auth_hash_lock);
return gss_auth;
}
static struct gss_auth *
gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
struct gss_auth *gss_auth;
struct gss_auth *new;
gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
if (gss_auth != NULL)
goto out;
new = gss_create_new(args, clnt);
if (IS_ERR(new))
return new;
gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
if (gss_auth != new)
gss_destroy(&new->rpc_auth);
out:
return gss_auth;
}
static struct rpc_auth *
gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
struct gss_auth *gss_auth;
struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
while (clnt != clnt->cl_parent) {
struct rpc_clnt *parent = clnt->cl_parent;
/* Find the original parent for this transport */
if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
break;
clnt = parent;
}
gss_auth = gss_create_hashed(args, clnt);
if (IS_ERR(gss_auth))
return ERR_CAST(gss_auth);
return &gss_auth->rpc_auth;
}
/*
* gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
* all RPCSEC_GSS state associated with that context.
*/
static int
gss_destroying_context(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
struct rpc_task *task;
if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
return 0;
ctx->gc_proc = RPC_GSS_PROC_DESTROY;
cred->cr_ops = &gss_nullops;
/* Take a reference to ensure the cred will be destroyed either
* by the RPC call or by the put_rpccred() below */
get_rpccred(cred);
task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
if (!IS_ERR(task))
rpc_put_task(task);
put_rpccred(cred);
return 1;
}
/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
* to create a new cred or context, so they check that things have been
* allocated before freeing them. */
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
dprintk("RPC: %s\n", __func__);
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
kfree(ctx->gc_acceptor.data);
kfree(ctx);
}
static void
gss_free_ctx_callback(struct rcu_head *head)
{
struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu);
gss_do_free_ctx(ctx);
}
static void
gss_free_ctx(struct gss_cl_ctx *ctx)
{
call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
}
static void
gss_free_cred(struct gss_cred *gss_cred)
{
dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}
static void
gss_free_cred_callback(struct rcu_head *head)
{
struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu);
gss_free_cred(gss_cred);
}
static void
gss_destroy_nullcred(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
gss_put_auth(gss_auth);
}
static void
gss_destroy_cred(struct rpc_cred *cred)
{
if (gss_destroying_context(cred))
return;
gss_destroy_nullcred(cred);
}
static int
gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
{
return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
}
/*
* Lookup RPCSEC_GSS cred for the current process
*/
static struct rpc_cred *
gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
}
static struct rpc_cred *
gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
{
struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
struct gss_cred *cred = NULL;
int err = -ENOMEM;
dprintk("RPC: %s for uid %d, flavor %d\n",
__func__, from_kuid(&init_user_ns, acred->uid),
auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err;
rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
/*
* Note: in order to force a call to call_refresh(), we deliberately
* fail to flag the credential as RPCAUTH_CRED_UPTODATE.
*/
cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
cred->gc_service = gss_auth->service;
cred->gc_principal = NULL;
if (acred->machine_cred)
cred->gc_principal = acred->principal;
kref_get(&gss_auth->kref);
return &cred->gc_base;
out_err:
dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}
static int
gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
{
struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
struct gss_cred *gss_cred = container_of(cred,struct gss_cred, gc_base);
int err;
do {
err = gss_create_upcall(gss_auth, gss_cred);
} while (err == -EAGAIN);
return err;
}
static char *
gss_stringify_acceptor(struct rpc_cred *cred)
{
char *string = NULL;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor Bruce reported that he was seeing the following BUG pop: BUG: sleeping function called from invalid context at mm/slab.c:2846 in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs 2 locks held by mount.nfs/4539: #0: (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4] #1: (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001 0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0 0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00 Call Trace: [<ffffffff81a534cf>] dump_stack+0x4f/0x7c [<ffffffff81097854>] __might_sleep+0x114/0x180 [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280 [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss] [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc] [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4] [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4] [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4] [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4] [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4] [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4] [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs] [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs] [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4] [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc] [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4] [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4] [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4] [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs] [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs] [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10 [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs] [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffff811b5830>] do_mount+0x210/0xbe0 [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160 [<ffffffff811b651f>] SyS_mount+0x6f/0xb0 [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17 Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping the rcu_read_lock before doing the allocation and then reacquiring it and redoing the dereference before doing the copy. If we find that the string has somehow grown in the meantime, we'll reallocate and try again. Cc: <stable@vger.kernel.org> # v3.17+ Reported-by: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-11-13 19:30:46 +07:00
unsigned int len;
struct xdr_netobj *acceptor;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (!ctx)
goto out;
sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor Bruce reported that he was seeing the following BUG pop: BUG: sleeping function called from invalid context at mm/slab.c:2846 in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs 2 locks held by mount.nfs/4539: #0: (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4] #1: (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001 0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0 0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00 Call Trace: [<ffffffff81a534cf>] dump_stack+0x4f/0x7c [<ffffffff81097854>] __might_sleep+0x114/0x180 [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280 [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss] [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc] [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4] [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4] [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4] [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4] [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4] [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4] [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs] [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs] [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4] [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc] [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4] [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4] [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4] [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs] [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs] [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10 [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs] [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffff811b5830>] do_mount+0x210/0xbe0 [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160 [<ffffffff811b651f>] SyS_mount+0x6f/0xb0 [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17 Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping the rcu_read_lock before doing the allocation and then reacquiring it and redoing the dereference before doing the copy. If we find that the string has somehow grown in the meantime, we'll reallocate and try again. Cc: <stable@vger.kernel.org> # v3.17+ Reported-by: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-11-13 19:30:46 +07:00
len = ctx->gc_acceptor.len;
rcu_read_unlock();
/* no point if there's no string */
sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor Bruce reported that he was seeing the following BUG pop: BUG: sleeping function called from invalid context at mm/slab.c:2846 in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs 2 locks held by mount.nfs/4539: #0: (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4] #1: (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001 0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0 0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00 Call Trace: [<ffffffff81a534cf>] dump_stack+0x4f/0x7c [<ffffffff81097854>] __might_sleep+0x114/0x180 [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280 [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss] [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc] [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4] [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4] [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4] [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4] [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4] [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4] [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs] [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs] [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4] [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc] [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4] [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4] [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4] [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs] [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs] [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10 [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs] [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffff811b5830>] do_mount+0x210/0xbe0 [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160 [<ffffffff811b651f>] SyS_mount+0x6f/0xb0 [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17 Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping the rcu_read_lock before doing the allocation and then reacquiring it and redoing the dereference before doing the copy. If we find that the string has somehow grown in the meantime, we'll reallocate and try again. Cc: <stable@vger.kernel.org> # v3.17+ Reported-by: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-11-13 19:30:46 +07:00
if (!len)
return NULL;
realloc:
string = kmalloc(len + 1, GFP_KERNEL);
if (!string)
sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor Bruce reported that he was seeing the following BUG pop: BUG: sleeping function called from invalid context at mm/slab.c:2846 in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs 2 locks held by mount.nfs/4539: #0: (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4] #1: (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001 0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0 0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00 Call Trace: [<ffffffff81a534cf>] dump_stack+0x4f/0x7c [<ffffffff81097854>] __might_sleep+0x114/0x180 [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280 [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss] [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc] [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4] [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4] [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4] [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4] [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4] [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4] [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs] [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs] [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4] [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc] [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4] [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4] [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4] [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs] [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs] [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10 [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs] [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffff811b5830>] do_mount+0x210/0xbe0 [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160 [<ffffffff811b651f>] SyS_mount+0x6f/0xb0 [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17 Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping the rcu_read_lock before doing the allocation and then reacquiring it and redoing the dereference before doing the copy. If we find that the string has somehow grown in the meantime, we'll reallocate and try again. Cc: <stable@vger.kernel.org> # v3.17+ Reported-by: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-11-13 19:30:46 +07:00
return NULL;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
/* did the ctx disappear or was it replaced by one with no acceptor? */
if (!ctx || !ctx->gc_acceptor.len) {
kfree(string);
string = NULL;
goto out;
sunrpc: fix sleeping under rcu_read_lock in gss_stringify_acceptor Bruce reported that he was seeing the following BUG pop: BUG: sleeping function called from invalid context at mm/slab.c:2846 in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs 2 locks held by mount.nfs/4539: #0: (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4] #1: (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001 0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0 0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00 Call Trace: [<ffffffff81a534cf>] dump_stack+0x4f/0x7c [<ffffffff81097854>] __might_sleep+0x114/0x180 [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280 [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss] [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss] [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc] [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4] [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4] [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4] [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4] [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4] [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4] [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs] [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs] [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4] [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc] [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4] [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4] [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4] [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs] [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs] [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10 [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs] [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs] [<ffffffff81196489>] mount_fs+0x39/0x1b0 [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20 [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150 [<ffffffff811b5830>] do_mount+0x210/0xbe0 [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160 [<ffffffff811b651f>] SyS_mount+0x6f/0xb0 [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17 Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping the rcu_read_lock before doing the allocation and then reacquiring it and redoing the dereference before doing the copy. If we find that the string has somehow grown in the meantime, we'll reallocate and try again. Cc: <stable@vger.kernel.org> # v3.17+ Reported-by: "J. Bruce Fields" <bfields@fieldses.org> Signed-off-by: Jeff Layton <jlayton@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-11-13 19:30:46 +07:00
}
acceptor = &ctx->gc_acceptor;
/*
* Did we find a new acceptor that's longer than the original? Allocate
* a longer buffer and try again.
*/
if (len < acceptor->len) {
len = acceptor->len;
rcu_read_unlock();
kfree(string);
goto realloc;
}
memcpy(string, acceptor->data, acceptor->len);
string[acceptor->len] = '\0';
out:
rcu_read_unlock();
return string;
}
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
/*
* Returns -EACCES if GSS context is NULL or will expire within the
* timeout (miliseconds)
*/
static int
gss_key_timeout(struct rpc_cred *rc)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
int ret = 0;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (!ctx || time_after(timeout, ctx->gc_expiry))
ret = -EACCES;
rcu_read_unlock();
return ret;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
}
static int
gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
int ret;
if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
/* Don't match with creds that have expired. */
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (!ctx || time_after(jiffies, ctx->gc_expiry)) {
rcu_read_unlock();
return 0;
}
rcu_read_unlock();
if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
return 0;
out:
if (acred->principal != NULL) {
if (gss_cred->gc_principal == NULL)
return 0;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
goto check_expire;
}
if (gss_cred->gc_principal != NULL)
return 0;
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
ret = uid_eq(rc->cr_uid, acred->uid);
check_expire:
if (ret == 0)
return ret;
/* Notify acred users of GSS context expiration timeout */
if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
(gss_key_timeout(rc) != 0)) {
/* test will now be done from generic cred */
test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
/* tell NFS layer that key will expire soon */
set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
}
return ret;
}
/*
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
static __be32 *
gss_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *cred_len;
u32 maj_stat = 0;
struct xdr_netobj mic;
struct kvec iov;
struct xdr_buf verf_buf;
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
*p++ = htonl(RPC_AUTH_GSS);
cred_len = p++;
spin_lock(&ctx->gc_seq_lock);
req->rq_seqno = ctx->gc_seq++;
spin_unlock(&ctx->gc_seq_lock);
*p++ = htonl((u32) RPC_GSS_VERSION);
*p++ = htonl((u32) ctx->gc_proc);
*p++ = htonl((u32) req->rq_seqno);
*p++ = htonl((u32) gss_cred->gc_service);
p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
*cred_len = htonl((p - (cred_len + 1)) << 2);
/* We compute the checksum for the verifier over the xdr-encoded bytes
* starting with the xid and ending at the end of the credential: */
iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
req->rq_snd_buf.head[0].iov_base);
iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
xdr_buf_from_iov(&iov, &verf_buf);
/* set verifier flavor*/
*p++ = htonl(RPC_AUTH_GSS);
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
} else if (maj_stat != 0) {
printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
goto out_put_ctx;
}
p = xdr_encode_opaque(p, NULL, mic.len);
gss_put_ctx(ctx);
return p;
out_put_ctx:
gss_put_ctx(ctx);
return NULL;
}
static int gss_renew_cred(struct rpc_task *task)
{
struct rpc_cred *oldcred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(oldcred,
struct gss_cred,
gc_base);
struct rpc_auth *auth = oldcred->cr_auth;
struct auth_cred acred = {
.uid = oldcred->cr_uid,
.principal = gss_cred->gc_principal,
.machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
};
struct rpc_cred *new;
new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
if (IS_ERR(new))
return PTR_ERR(new);
task->tk_rqstp->rq_cred = new;
put_rpccred(oldcred);
return 0;
}
static int gss_cred_is_negative_entry(struct rpc_cred *cred)
{
if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
unsigned long now = jiffies;
unsigned long begin, expire;
struct gss_cred *gss_cred;
gss_cred = container_of(cred, struct gss_cred, gc_base);
begin = gss_cred->gc_upcall_timestamp;
expire = begin + gss_expired_cred_retry_delay * HZ;
if (time_in_range_open(now, begin, expire))
return 1;
}
return 0;
}
/*
* Refresh credentials. XXX - finish
*/
static int
gss_refresh(struct rpc_task *task)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
int ret = 0;
if (gss_cred_is_negative_entry(cred))
return -EKEYEXPIRED;
if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
ret = gss_renew_cred(task);
if (ret < 0)
goto out;
cred = task->tk_rqstp->rq_cred;
}
if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
ret = gss_refresh_upcall(task);
out:
return ret;
}
/* Dummy refresh routine: used only when destroying the context */
static int
gss_refresh_null(struct rpc_task *task)
{
return 0;
}
static __be32 *
gss_validate(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *seq = NULL;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
u32 flav,len;
u32 maj_stat;
__be32 *ret = ERR_PTR(-EIO);
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
flav = ntohl(*p++);
if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
goto out_bad;
if (flav != RPC_AUTH_GSS)
goto out_bad;
seq = kmalloc(4, GFP_NOFS);
if (!seq)
goto out_bad;
*seq = htonl(task->tk_rqstp->rq_seqno);
iov.iov_base = seq;
iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_buf);
mic.data = (u8 *)p;
mic.len = len;
ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat) {
dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
task->tk_pid, __func__, maj_stat);
goto out_bad;
}
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
gss_put_ctx(ctx);
dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
task->tk_pid, __func__);
kfree(seq);
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
PTR_ERR(ret));
kfree(seq);
return ret;
}
static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_stream xdr;
xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
encode(rqstp, &xdr, obj);
}
static inline int
gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf integ_buf;
__be32 *integ_len = NULL;
struct xdr_netobj mic;
u32 offset;
__be32 *q;
struct kvec *iov;
u32 maj_stat = 0;
int status = -EIO;
integ_len = p++;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
gss_wrap_req_encode(encode, rqstp, p, obj);
if (xdr_buf_subsegment(snd_buf, &integ_buf,
offset, snd_buf->len - offset))
return status;
*integ_len = htonl(integ_buf.len);
/* guess whether we're in the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
iov = snd_buf->head;
p = iov->iov_base + iov->iov_len;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
status = -EIO; /* XXX? */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
q = xdr_encode_opaque(p, NULL, mic.len);
offset = (u8 *)q - (u8 *)p;
iov->iov_len += offset;
snd_buf->len += offset;
return 0;
}
static void
priv_release_snd_buf(struct rpc_rqst *rqstp)
{
int i;
for (i=0; i < rqstp->rq_enc_pages_num; i++)
__free_page(rqstp->rq_enc_pages[i]);
kfree(rqstp->rq_enc_pages);
}
static int
alloc_enc_pages(struct rpc_rqst *rqstp)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
int first, last, i;
if (snd_buf->page_len == 0) {
rqstp->rq_enc_pages_num = 0;
return 0;
}
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
first = snd_buf->page_base >> PAGE_SHIFT;
last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
rqstp->rq_enc_pages_num = last - first + 1 + 1;
rqstp->rq_enc_pages
= kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
GFP_NOFS);
if (!rqstp->rq_enc_pages)
goto out;
for (i=0; i < rqstp->rq_enc_pages_num; i++) {
rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
if (rqstp->rq_enc_pages[i] == NULL)
goto out_free;
}
rqstp->rq_release_snd_buf = priv_release_snd_buf;
return 0;
out_free:
rqstp->rq_enc_pages_num = i;
priv_release_snd_buf(rqstp);
out:
return -EAGAIN;
}
static inline int
gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdreproc_t encode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
u32 offset;
u32 maj_stat;
int status;
__be32 *opaque_len;
struct page **inpages;
int first;
int pad;
struct kvec *iov;
char *tmp;
opaque_len = p++;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
gss_wrap_req_encode(encode, rqstp, p, obj);
status = alloc_enc_pages(rqstp);
if (status)
return status;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
first = snd_buf->page_base >> PAGE_SHIFT;
inpages = snd_buf->pages + first;
snd_buf->pages = rqstp->rq_enc_pages;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
snd_buf->page_base -= first << PAGE_SHIFT;
/*
* Give the tail its own page, in case we need extra space in the
* head when wrapping:
*
* call_allocate() allocates twice the slack space required
* by the authentication flavor to rq_callsize.
* For GSS, slack is GSS_CRED_SLACK.
*/
if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
snd_buf->tail[0].iov_base = tmp;
}
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
BUG_ON(snd_buf->len > snd_buf->buflen);
status = -EIO;
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
return status;
*opaque_len = htonl(snd_buf->len - offset);
/* guess whether we're in the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
iov = snd_buf->head;
p = iov->iov_base + iov->iov_len;
pad = 3 - ((snd_buf->len - offset - 1) & 3);
memset(p, 0, pad);
iov->iov_len += pad;
snd_buf->len += pad;
return 0;
}
static int
gss_wrap_req(struct rpc_task *task,
kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
int status = -EIO;
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
gss_wrap_req_encode(encode, rqstp, p, obj);
status = 0;
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
gss_wrap_req_encode(encode, rqstp, p, obj);
status = 0;
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
break;
case RPC_GSS_SVC_PRIVACY:
status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
break;
}
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}
static inline int
gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_rqst *rqstp, __be32 **p)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct xdr_buf integ_buf;
struct xdr_netobj mic;
u32 data_offset, mic_offset;
u32 integ_len;
u32 maj_stat;
int status = -EIO;
integ_len = ntohl(*(*p)++);
if (integ_len & 3)
return status;
data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len)
return status;
if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status;
if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
mic_offset - data_offset))
return status;
if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
return status;
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
return 0;
}
static inline int
gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_rqst *rqstp, __be32 **p)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
u32 offset;
u32 opaque_len;
u32 maj_stat;
int status = -EIO;
opaque_len = ntohl(*(*p)++);
offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
if (offset + opaque_len > rcv_buf->len)
return status;
/* remove padding: */
rcv_buf->len = offset + opaque_len;
maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
return status;
if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status;
return 0;
}
static int
gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
__be32 *p, void *obj)
{
struct xdr_stream xdr;
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
return decode(rqstp, &xdr, obj);
}
static int
gss_unwrap_resp(struct rpc_task *task,
kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *savedp = p;
struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
int savedlen = head->iov_len;
int status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA)
goto out_decode;
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
if (status)
goto out;
break;
case RPC_GSS_SVC_PRIVACY:
status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
if (status)
goto out;
break;
}
/* take into account extra slack for integrity and privacy cases: */
cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ (savedlen - head->iov_len);
out_decode:
status = gss_unwrap_req_decode(decode, rqstp, p, obj);
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s returning %d\n",
task->tk_pid, __func__, status);
return status;
}
static const struct rpc_authops authgss_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_GSS,
.au_name = "RPCSEC_GSS",
.create = gss_create,
.destroy = gss_destroy,
.hash_cred = gss_hash_cred,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
.info2flavor = gss_mech_info2flavor,
.flavor2info = gss_mech_flavor2info,
};
static const struct rpc_credops gss_credops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred,
.cr_init = gss_cred_init,
.crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh,
.crvalidate = gss_validate,
.crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp,
.crkey_timeout = gss_key_timeout,
.crstringify_acceptor = gss_stringify_acceptor,
};
static const struct rpc_credops gss_nullops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_nullcred,
.crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh_null,
.crvalidate = gss_validate,
.crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp,
.crstringify_acceptor = gss_stringify_acceptor,
};
static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
.upcall = rpc_pipe_generic_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v0,
.release_pipe = gss_pipe_release,
};
static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
.upcall = rpc_pipe_generic_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
.open_pipe = gss_pipe_open_v1,
.release_pipe = gss_pipe_release,
};
static __net_init int rpcsec_gss_init_net(struct net *net)
{
return gss_svc_init_net(net);
}
static __net_exit void rpcsec_gss_exit_net(struct net *net)
{
gss_svc_shutdown_net(net);
}
static struct pernet_operations rpcsec_gss_net_ops = {
.init = rpcsec_gss_init_net,
.exit = rpcsec_gss_exit_net,
};
/*
* Initialize RPCSEC_GSS module
*/
static int __init init_rpcsec_gss(void)
{
int err = 0;
err = rpcauth_register(&authgss_ops);
if (err)
goto out;
err = gss_svc_init();
if (err)
goto out_unregister;
err = register_pernet_subsys(&rpcsec_gss_net_ops);
if (err)
goto out_svc_exit;
rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version");
return 0;
out_svc_exit:
gss_svc_shutdown();
out_unregister:
rpcauth_unregister(&authgss_ops);
out:
return err;
}
static void __exit exit_rpcsec_gss(void)
{
unregister_pernet_subsys(&rpcsec_gss_net_ops);
gss_svc_shutdown();
rpcauth_unregister(&authgss_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
MODULE_ALIAS("rpc-auth-6");
MODULE_LICENSE("GPL");
module_param_named(expired_cred_retry_delay,
gss_expired_cred_retry_delay,
uint, 0644);
MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
"the RPC engine retries an expired credential");
SUNRPC new rpc_credops to test credential expiry This patch provides the RPC layer helper functions to allow NFS to manage data in the face of expired credentials - such as avoiding buffered WRITEs and COMMITs when the gss context will expire before the WRITEs are flushed and COMMITs are sent. These helper functions enable checking the expiration of an underlying credential key for a generic rpc credential, e.g. the gss_cred gss context gc_expiry which for Kerberos is set to the remaining TGT lifetime. A new rpc_authops key_timeout is only defined for the generic auth. A new rpc_credops crkey_to_expire is only defined for the generic cred. A new rpc_credops crkey_timeout is only defined for the gss cred. Set a credential key expiry watermark, RPC_KEY_EXPIRE_TIMEO set to 240 seconds as a default and can be set via a module parameter as we need to ensure there is time for any dirty data to be flushed. If key_timeout is called on a credential with an underlying credential key that will expire within watermark seconds, we set the RPC_CRED_KEY_EXPIRE_SOON flag in the generic_cred acred so that the NFS layer can clean up prior to key expiration. Checking a generic credential's underlying credential involves a cred lookup. To avoid this lookup in the normal case when the underlying credential has a key that is valid (before the watermark), a notify flag is set in the generic credential the first time the key_timeout is called. The generic credential then stops checking the underlying credential key expiry, and the underlying credential (gss_cred) match routine then checks the key expiration upon each normal use and sets a flag in the associated generic credential only when the key expiration is within the watermark. This in turn signals the generic credential key_timeout to perform the extra credential lookup thereafter. Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2013-08-14 22:59:15 +07:00
module_param_named(key_expire_timeo,
gss_key_expire_timeo,
uint, 0644);
MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
"credential keys lifetime where the NFS layer cleans up "
"prior to key expiration");
module_init(init_rpcsec_gss)
module_exit(exit_rpcsec_gss)