linux_dsm_epyc7002/net/sctp/proc.c
Vegard Nossum 54236ab09e net/sctp: always initialise sctp_ht_iter::start_fail
sctp_transport_seq_start() does not currently clear iter->start_fail on
success, but relies on it being zero when it is allocated (by
seq_open_net()).

This can be a problem in the following sequence:

    open() // allocates iter (and implicitly sets iter->start_fail = 0)
    read()
     - iter->start() // fails and sets iter->start_fail = 1
     - iter->stop() // doesn't call sctp_transport_walk_stop() (correct)
    read() again
     - iter->start() // succeeds, but doesn't change iter->start_fail
     - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong)

We should initialize sctp_ht_iter::start_fail to zero if ->start()
succeeds, otherwise it's possible that we leave an old value of 1 there,
which will cause ->stop() to not call sctp_transport_walk_stop(), which
causes all sorts of problems like not calling rcu_read_unlock() (and
preempt_enable()), eventually leading to more warnings like this:

    BUG: sleeping function called from invalid context at mm/slab.h:388
    in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2
    Preemption disabled at:[<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150

     [<ffffffff81149abb>] preempt_count_add+0x1fb/0x280
     [<ffffffff83295892>] _raw_spin_lock+0x12/0x40
     [<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150
     [<ffffffff82ec665f>] sctp_transport_walk_start+0x2f/0x60
     [<ffffffff82edda1d>] sctp_transport_seq_start+0x4d/0x150
     [<ffffffff81439e50>] traverse+0x170/0x850
     [<ffffffff8143aeec>] seq_read+0x7cc/0x1180
     [<ffffffff814f996c>] proc_reg_read+0xbc/0x180
     [<ffffffff813d0384>] do_loop_readv_writev+0x134/0x210
     [<ffffffff813d2a95>] do_readv_writev+0x565/0x660
     [<ffffffff813d6857>] vfs_readv+0x67/0xa0
     [<ffffffff813d6c16>] do_preadv+0x126/0x170
     [<ffffffff813d710c>] SyS_preadv+0xc/0x10
     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
     [<ffffffff83296225>] return_from_SYSCALL_64+0x0/0x6a
     [<ffffffffffffffff>] 0xffffffffffffffff

Notice that this is a subtly different stacktrace from the one in commit
5fc382d875 ("net/sctp: terminate rhashtable walk correctly").

Cc: Xin Long <lucien.xin@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-By: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-08-13 15:10:16 -07:00

523 lines
14 KiB
C

/* SCTP kernel implementation
* Copyright (c) 2003 International Business Machines, Corp.
*
* This file is part of the SCTP kernel implementation
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This SCTP implementation is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* ************************
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU CC; see the file COPYING. If not, see
* <http://www.gnu.org/licenses/>.
*
* Please send any bug reports or fixes you make to the
* email address(es):
* lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Sridhar Samudrala <sri@us.ibm.com>
*/
#include <linux/types.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/export.h>
#include <net/sctp/sctp.h>
#include <net/ip.h> /* for snmp_fold_field */
static const struct snmp_mib sctp_snmp_list[] = {
SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB),
SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS),
SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS),
SNMP_MIB_ITEM("SctpAborteds", SCTP_MIB_ABORTEDS),
SNMP_MIB_ITEM("SctpShutdowns", SCTP_MIB_SHUTDOWNS),
SNMP_MIB_ITEM("SctpOutOfBlues", SCTP_MIB_OUTOFBLUES),
SNMP_MIB_ITEM("SctpChecksumErrors", SCTP_MIB_CHECKSUMERRORS),
SNMP_MIB_ITEM("SctpOutCtrlChunks", SCTP_MIB_OUTCTRLCHUNKS),
SNMP_MIB_ITEM("SctpOutOrderChunks", SCTP_MIB_OUTORDERCHUNKS),
SNMP_MIB_ITEM("SctpOutUnorderChunks", SCTP_MIB_OUTUNORDERCHUNKS),
SNMP_MIB_ITEM("SctpInCtrlChunks", SCTP_MIB_INCTRLCHUNKS),
SNMP_MIB_ITEM("SctpInOrderChunks", SCTP_MIB_INORDERCHUNKS),
SNMP_MIB_ITEM("SctpInUnorderChunks", SCTP_MIB_INUNORDERCHUNKS),
SNMP_MIB_ITEM("SctpFragUsrMsgs", SCTP_MIB_FRAGUSRMSGS),
SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS),
SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS),
SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS),
SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS),
SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS),
SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS),
SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS),
SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS),
SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS),
SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS),
SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS),
SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ),
SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG),
SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS),
SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS),
SNMP_MIB_SENTINEL
};
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
int i;
for (i = 0; sctp_snmp_list[i].name != NULL; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
snmp_fold_field(net->sctp.sctp_statistics,
sctp_snmp_list[i].entry));
return 0;
}
/* Initialize the seq file operations for 'snmp' object. */
static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
{
return single_open_net(inode, file, sctp_snmp_seq_show);
}
static const struct file_operations sctp_snmp_seq_fops = {
.owner = THIS_MODULE,
.open = sctp_snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release_net,
};
/* Set up the proc fs entry for 'snmp' object. */
int __net_init sctp_snmp_proc_init(struct net *net)
{
struct proc_dir_entry *p;
p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_snmp_seq_fops);
if (!p)
return -ENOMEM;
return 0;
}
/* Cleanup the proc fs entry for 'snmp' object. */
void sctp_snmp_proc_exit(struct net *net)
{
remove_proc_entry("snmp", net->sctp.proc_net_sctp);
}
/* Dump local addresses of an association/endpoint. */
static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
{
struct sctp_association *asoc;
struct sctp_sockaddr_entry *laddr;
struct sctp_transport *peer;
union sctp_addr *addr, *primary = NULL;
struct sctp_af *af;
if (epb->type == SCTP_EP_TYPE_ASSOCIATION) {
asoc = sctp_assoc(epb);
peer = asoc->peer.primary_path;
if (unlikely(peer == NULL)) {
WARN(1, "Association %p with NULL primary path!\n", asoc);
return;
}
primary = &peer->saddr;
}
rcu_read_lock();
list_for_each_entry_rcu(laddr, &epb->bind_addr.address_list, list) {
if (!laddr->valid)
continue;
addr = &laddr->a;
af = sctp_get_af_specific(addr->sa.sa_family);
if (primary && af->cmp_addr(addr, primary)) {
seq_printf(seq, "*");
}
af->seq_dump_addr(seq, addr);
}
rcu_read_unlock();
}
/* Dump remote addresses of an association. */
static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_association *assoc)
{
struct sctp_transport *transport;
union sctp_addr *addr, *primary;
struct sctp_af *af;
primary = &assoc->peer.primary_addr;
list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
transports) {
addr = &transport->ipaddr;
af = sctp_get_af_specific(addr->sa.sa_family);
if (af->cmp_addr(addr, primary)) {
seq_printf(seq, "*");
}
af->seq_dump_addr(seq, addr);
}
}
static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos >= sctp_ep_hashsize)
return NULL;
if (*pos < 0)
*pos = 0;
if (*pos == 0)
seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n");
return (void *)pos;
}
static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
{
}
static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
if (++*pos >= sctp_ep_hashsize)
return NULL;
return pos;
}
/* Display sctp endpoints (/proc/net/sctp/eps). */
static int sctp_eps_seq_show(struct seq_file *seq, void *v)
{
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
struct sctp_endpoint *ep;
struct sock *sk;
int hash = *(loff_t *)v;
if (hash >= sctp_ep_hashsize)
return -ENOMEM;
head = &sctp_ep_hashtable[hash];
local_bh_disable();
read_lock(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
ep = sctp_ep(epb);
sk = epb->sk;
if (!net_eq(sock_net(sk), seq_file_net(seq)))
continue;
seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
sctp_sk(sk)->type, sk->sk_state, hash,
epb->bind_addr.port,
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk));
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "\n");
}
read_unlock(&head->lock);
local_bh_enable();
return 0;
}
static const struct seq_operations sctp_eps_ops = {
.start = sctp_eps_seq_start,
.next = sctp_eps_seq_next,
.stop = sctp_eps_seq_stop,
.show = sctp_eps_seq_show,
};
/* Initialize the seq file operations for 'eps' object. */
static int sctp_eps_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_eps_ops,
sizeof(struct seq_net_private));
}
static const struct file_operations sctp_eps_seq_fops = {
.open = sctp_eps_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
/* Set up the proc fs entry for 'eps' object. */
int __net_init sctp_eps_proc_init(struct net *net)
{
struct proc_dir_entry *p;
p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_eps_seq_fops);
if (!p)
return -ENOMEM;
return 0;
}
/* Cleanup the proc fs entry for 'eps' object. */
void sctp_eps_proc_exit(struct net *net)
{
remove_proc_entry("eps", net->sctp.proc_net_sctp);
}
struct sctp_ht_iter {
struct seq_net_private p;
struct rhashtable_iter hti;
int start_fail;
};
static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
int err = sctp_transport_walk_start(&iter->hti);
if (err) {
iter->start_fail = 1;
return ERR_PTR(err);
}
iter->start_fail = 0;
return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
}
static void sctp_transport_seq_stop(struct seq_file *seq, void *v)
{
struct sctp_ht_iter *iter = seq->private;
if (iter->start_fail)
return;
sctp_transport_walk_stop(&iter->hti);
}
static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
++*pos;
return sctp_transport_get_next(seq_file_net(seq), &iter->hti);
}
/* Display sctp associations (/proc/net/sctp/assocs). */
static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
{
struct sctp_transport *transport;
struct sctp_association *assoc;
struct sctp_ep_common *epb;
struct sock *sk;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
"ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
"RPORT LADDRS <-> RADDRS "
"HBINT INS OUTS MAXRT T1X T2X RTXC "
"wmema wmemq sndbuf rcvbuf\n");
return 0;
}
transport = (struct sctp_transport *)v;
if (!sctp_transport_hold(transport))
return 0;
assoc = transport->asoc;
epb = &assoc->base;
sk = epb->sk;
seq_printf(seq,
"%8pK %8pK %-3d %-3d %-2d %-4d "
"%4d %8d %8d %7u %5lu %-5d %5d ",
assoc, sk, sctp_sk(sk)->type, sk->sk_state,
assoc->state, 0,
assoc->assoc_id,
assoc->sndbuf_used,
atomic_read(&assoc->rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk),
epb->bind_addr.port,
assoc->peer.port);
seq_printf(seq, " ");
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "<-> ");
sctp_seq_dump_remote_addrs(seq, assoc);
seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
"%8d %8d %8d %8d",
assoc->hbinterval, assoc->c.sinit_max_instreams,
assoc->c.sinit_num_ostreams, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
atomic_read(&sk->sk_wmem_alloc),
sk->sk_wmem_queued,
sk->sk_sndbuf,
sk->sk_rcvbuf);
seq_printf(seq, "\n");
sctp_transport_put(transport);
return 0;
}
static const struct seq_operations sctp_assoc_ops = {
.start = sctp_transport_seq_start,
.next = sctp_transport_seq_next,
.stop = sctp_transport_seq_stop,
.show = sctp_assocs_seq_show,
};
/* Initialize the seq file operations for 'assocs' object. */
static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_assoc_ops,
sizeof(struct sctp_ht_iter));
}
static const struct file_operations sctp_assocs_seq_fops = {
.open = sctp_assocs_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
/* Set up the proc fs entry for 'assocs' object. */
int __net_init sctp_assocs_proc_init(struct net *net)
{
struct proc_dir_entry *p;
p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_assocs_seq_fops);
if (!p)
return -ENOMEM;
return 0;
}
/* Cleanup the proc fs entry for 'assocs' object. */
void sctp_assocs_proc_exit(struct net *net)
{
remove_proc_entry("assocs", net->sctp.proc_net_sctp);
}
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
struct sctp_association *assoc;
struct sctp_transport *transport, *tsp;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
"REM_ADDR_RTX START STATE\n");
return 0;
}
transport = (struct sctp_transport *)v;
if (!sctp_transport_hold(transport))
return 0;
assoc = transport->asoc;
list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
transports) {
/*
* The remote address (ADDR)
*/
tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr);
seq_printf(seq, " ");
/*
* The association ID (ASSOC_ID)
*/
seq_printf(seq, "%d ", tsp->asoc->assoc_id);
/*
* If the Heartbeat is active (HB_ACT)
* Note: 1 = Active, 0 = Inactive
*/
seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer));
/*
* Retransmit time out (RTO)
*/
seq_printf(seq, "%lu ", tsp->rto);
/*
* Maximum path retransmit count (PATH_MAX_RTX)
*/
seq_printf(seq, "%d ", tsp->pathmaxrxt);
/*
* remote address retransmit count (REM_ADDR_RTX)
* Note: We don't have a way to tally this at the moment
* so lets just leave it as zero for the moment
*/
seq_puts(seq, "0 ");
/*
* remote address start time (START). This is also not
* currently implemented, but we can record it with a
* jiffies marker in a subsequent patch
*/
seq_puts(seq, "0 ");
/*
* The current state of this destination. I.e.
* SCTP_ACTIVE, SCTP_INACTIVE, ...
*/
seq_printf(seq, "%d", tsp->state);
seq_printf(seq, "\n");
}
sctp_transport_put(transport);
return 0;
}
static const struct seq_operations sctp_remaddr_ops = {
.start = sctp_transport_seq_start,
.next = sctp_transport_seq_next,
.stop = sctp_transport_seq_stop,
.show = sctp_remaddr_seq_show,
};
/* Cleanup the proc fs entry for 'remaddr' object. */
void sctp_remaddr_proc_exit(struct net *net)
{
remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
}
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &sctp_remaddr_ops,
sizeof(struct sctp_ht_iter));
}
static const struct file_operations sctp_remaddr_seq_fops = {
.open = sctp_remaddr_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
};
int __net_init sctp_remaddr_proc_init(struct net *net)
{
struct proc_dir_entry *p;
p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_remaddr_seq_fops);
if (!p)
return -ENOMEM;
return 0;
}