mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 18:40:57 +07:00
4381548237
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
566 lines
13 KiB
C
566 lines
13 KiB
C
/*
|
|
* File: socket.c
|
|
*
|
|
* Phonet sockets
|
|
*
|
|
* Copyright (C) 2008 Nokia Corporation.
|
|
*
|
|
* Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
|
|
* Original author: Sakari Ailus <sakari.ailus@nokia.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* version 2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
|
* 02110-1301 USA
|
|
*/
|
|
|
|
#include <linux/gfp.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/net.h>
|
|
#include <linux/poll.h>
|
|
#include <net/sock.h>
|
|
#include <net/tcp_states.h>
|
|
|
|
#include <linux/phonet.h>
|
|
#include <net/phonet/phonet.h>
|
|
#include <net/phonet/pep.h>
|
|
#include <net/phonet/pn_dev.h>
|
|
|
|
static int pn_socket_release(struct socket *sock)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
|
|
if (sk) {
|
|
sock->sk = NULL;
|
|
sk->sk_prot->close(sk, 0);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#define PN_HASHSIZE 16
|
|
#define PN_HASHMASK (PN_HASHSIZE-1)
|
|
|
|
|
|
static struct {
|
|
struct hlist_head hlist[PN_HASHSIZE];
|
|
spinlock_t lock;
|
|
} pnsocks;
|
|
|
|
void __init pn_sock_init(void)
|
|
{
|
|
unsigned i;
|
|
|
|
for (i = 0; i < PN_HASHSIZE; i++)
|
|
INIT_HLIST_HEAD(pnsocks.hlist + i);
|
|
spin_lock_init(&pnsocks.lock);
|
|
}
|
|
|
|
static struct hlist_head *pn_hash_list(u16 obj)
|
|
{
|
|
return pnsocks.hlist + (obj & PN_HASHMASK);
|
|
}
|
|
|
|
/*
|
|
* Find address based on socket address, match only certain fields.
|
|
* Also grab sock if it was found. Remember to sock_put it later.
|
|
*/
|
|
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
|
|
{
|
|
struct hlist_node *node;
|
|
struct sock *sknode;
|
|
struct sock *rval = NULL;
|
|
u16 obj = pn_sockaddr_get_object(spn);
|
|
u8 res = spn->spn_resource;
|
|
struct hlist_head *hlist = pn_hash_list(obj);
|
|
|
|
spin_lock_bh(&pnsocks.lock);
|
|
|
|
sk_for_each(sknode, node, hlist) {
|
|
struct pn_sock *pn = pn_sk(sknode);
|
|
BUG_ON(!pn->sobject); /* unbound socket */
|
|
|
|
if (!net_eq(sock_net(sknode), net))
|
|
continue;
|
|
if (pn_port(obj)) {
|
|
/* Look up socket by port */
|
|
if (pn_port(pn->sobject) != pn_port(obj))
|
|
continue;
|
|
} else {
|
|
/* If port is zero, look up by resource */
|
|
if (pn->resource != res)
|
|
continue;
|
|
}
|
|
if (pn_addr(pn->sobject) &&
|
|
pn_addr(pn->sobject) != pn_addr(obj))
|
|
continue;
|
|
|
|
rval = sknode;
|
|
sock_hold(sknode);
|
|
break;
|
|
}
|
|
|
|
spin_unlock_bh(&pnsocks.lock);
|
|
|
|
return rval;
|
|
}
|
|
|
|
/* Deliver a broadcast packet (only in bottom-half) */
|
|
void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
|
|
{
|
|
struct hlist_head *hlist = pnsocks.hlist;
|
|
unsigned h;
|
|
|
|
spin_lock(&pnsocks.lock);
|
|
for (h = 0; h < PN_HASHSIZE; h++) {
|
|
struct hlist_node *node;
|
|
struct sock *sknode;
|
|
|
|
sk_for_each(sknode, node, hlist) {
|
|
struct sk_buff *clone;
|
|
|
|
if (!net_eq(sock_net(sknode), net))
|
|
continue;
|
|
if (!sock_flag(sknode, SOCK_BROADCAST))
|
|
continue;
|
|
|
|
clone = skb_clone(skb, GFP_ATOMIC);
|
|
if (clone) {
|
|
sock_hold(sknode);
|
|
sk_receive_skb(sknode, clone, 0);
|
|
}
|
|
}
|
|
hlist++;
|
|
}
|
|
spin_unlock(&pnsocks.lock);
|
|
}
|
|
|
|
void pn_sock_hash(struct sock *sk)
|
|
{
|
|
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
|
|
|
|
spin_lock_bh(&pnsocks.lock);
|
|
sk_add_node(sk, hlist);
|
|
spin_unlock_bh(&pnsocks.lock);
|
|
}
|
|
EXPORT_SYMBOL(pn_sock_hash);
|
|
|
|
void pn_sock_unhash(struct sock *sk)
|
|
{
|
|
spin_lock_bh(&pnsocks.lock);
|
|
sk_del_node_init(sk);
|
|
spin_unlock_bh(&pnsocks.lock);
|
|
}
|
|
EXPORT_SYMBOL(pn_sock_unhash);
|
|
|
|
static DEFINE_MUTEX(port_mutex);
|
|
|
|
static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct pn_sock *pn = pn_sk(sk);
|
|
struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
|
|
int err;
|
|
u16 handle;
|
|
u8 saddr;
|
|
|
|
if (sk->sk_prot->bind)
|
|
return sk->sk_prot->bind(sk, addr, len);
|
|
|
|
if (len < sizeof(struct sockaddr_pn))
|
|
return -EINVAL;
|
|
if (spn->spn_family != AF_PHONET)
|
|
return -EAFNOSUPPORT;
|
|
|
|
handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr);
|
|
saddr = pn_addr(handle);
|
|
if (saddr && phonet_address_lookup(sock_net(sk), saddr))
|
|
return -EADDRNOTAVAIL;
|
|
|
|
lock_sock(sk);
|
|
if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) {
|
|
err = -EINVAL; /* attempt to rebind */
|
|
goto out;
|
|
}
|
|
WARN_ON(sk_hashed(sk));
|
|
mutex_lock(&port_mutex);
|
|
err = sk->sk_prot->get_port(sk, pn_port(handle));
|
|
if (err)
|
|
goto out_port;
|
|
|
|
/* get_port() sets the port, bind() sets the address if applicable */
|
|
pn->sobject = pn_object(saddr, pn_port(pn->sobject));
|
|
pn->resource = spn->spn_resource;
|
|
|
|
/* Enable RX on the socket */
|
|
sk->sk_prot->hash(sk);
|
|
out_port:
|
|
mutex_unlock(&port_mutex);
|
|
out:
|
|
release_sock(sk);
|
|
return err;
|
|
}
|
|
|
|
static int pn_socket_autobind(struct socket *sock)
|
|
{
|
|
struct sockaddr_pn sa;
|
|
int err;
|
|
|
|
memset(&sa, 0, sizeof(sa));
|
|
sa.spn_family = AF_PHONET;
|
|
err = pn_socket_bind(sock, (struct sockaddr *)&sa,
|
|
sizeof(struct sockaddr_pn));
|
|
if (err != -EINVAL)
|
|
return err;
|
|
BUG_ON(!pn_port(pn_sk(sock->sk)->sobject));
|
|
return 0; /* socket was already bound */
|
|
}
|
|
|
|
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
|
|
int flags)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct sock *newsk;
|
|
int err;
|
|
|
|
newsk = sk->sk_prot->accept(sk, flags, &err);
|
|
if (!newsk)
|
|
return err;
|
|
|
|
lock_sock(newsk);
|
|
sock_graft(newsk, newsock);
|
|
newsock->state = SS_CONNECTED;
|
|
release_sock(newsk);
|
|
return 0;
|
|
}
|
|
|
|
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
|
|
int *sockaddr_len, int peer)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct pn_sock *pn = pn_sk(sk);
|
|
|
|
memset(addr, 0, sizeof(struct sockaddr_pn));
|
|
addr->sa_family = AF_PHONET;
|
|
if (!peer) /* Race with bind() here is userland's problem. */
|
|
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
|
|
pn->sobject);
|
|
|
|
*sockaddr_len = sizeof(struct sockaddr_pn);
|
|
return 0;
|
|
}
|
|
|
|
static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
|
|
poll_table *wait)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct pep_sock *pn = pep_sk(sk);
|
|
unsigned int mask = 0;
|
|
|
|
poll_wait(file, sk_sleep(sk), wait);
|
|
|
|
switch (sk->sk_state) {
|
|
case TCP_LISTEN:
|
|
return hlist_empty(&pn->ackq) ? 0 : POLLIN;
|
|
case TCP_CLOSE:
|
|
return POLLERR;
|
|
}
|
|
|
|
if (!skb_queue_empty(&sk->sk_receive_queue))
|
|
mask |= POLLIN | POLLRDNORM;
|
|
if (!skb_queue_empty(&pn->ctrlreq_queue))
|
|
mask |= POLLPRI;
|
|
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
|
|
return POLLHUP;
|
|
|
|
if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits))
|
|
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
|
|
|
|
return mask;
|
|
}
|
|
|
|
static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct pn_sock *pn = pn_sk(sk);
|
|
|
|
if (cmd == SIOCPNGETOBJECT) {
|
|
struct net_device *dev;
|
|
u16 handle;
|
|
u8 saddr;
|
|
|
|
if (get_user(handle, (__u16 __user *)arg))
|
|
return -EFAULT;
|
|
|
|
lock_sock(sk);
|
|
if (sk->sk_bound_dev_if)
|
|
dev = dev_get_by_index(sock_net(sk),
|
|
sk->sk_bound_dev_if);
|
|
else
|
|
dev = phonet_device_get(sock_net(sk));
|
|
if (dev && (dev->flags & IFF_UP))
|
|
saddr = phonet_address_get(dev, pn_addr(handle));
|
|
else
|
|
saddr = PN_NO_ADDR;
|
|
release_sock(sk);
|
|
|
|
if (dev)
|
|
dev_put(dev);
|
|
if (saddr == PN_NO_ADDR)
|
|
return -EHOSTUNREACH;
|
|
|
|
handle = pn_object(saddr, pn_port(pn->sobject));
|
|
return put_user(handle, (__u16 __user *)arg);
|
|
}
|
|
|
|
return sk->sk_prot->ioctl(sk, cmd, arg);
|
|
}
|
|
|
|
static int pn_socket_listen(struct socket *sock, int backlog)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
int err = 0;
|
|
|
|
if (sock->state != SS_UNCONNECTED)
|
|
return -EINVAL;
|
|
if (pn_socket_autobind(sock))
|
|
return -ENOBUFS;
|
|
|
|
lock_sock(sk);
|
|
if (sk->sk_state != TCP_CLOSE) {
|
|
err = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
sk->sk_state = TCP_LISTEN;
|
|
sk->sk_ack_backlog = 0;
|
|
sk->sk_max_ack_backlog = backlog;
|
|
out:
|
|
release_sock(sk);
|
|
return err;
|
|
}
|
|
|
|
static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock,
|
|
struct msghdr *m, size_t total_len)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
|
|
if (pn_socket_autobind(sock))
|
|
return -EAGAIN;
|
|
|
|
return sk->sk_prot->sendmsg(iocb, sk, m, total_len);
|
|
}
|
|
|
|
const struct proto_ops phonet_dgram_ops = {
|
|
.family = AF_PHONET,
|
|
.owner = THIS_MODULE,
|
|
.release = pn_socket_release,
|
|
.bind = pn_socket_bind,
|
|
.connect = sock_no_connect,
|
|
.socketpair = sock_no_socketpair,
|
|
.accept = sock_no_accept,
|
|
.getname = pn_socket_getname,
|
|
.poll = datagram_poll,
|
|
.ioctl = pn_socket_ioctl,
|
|
.listen = sock_no_listen,
|
|
.shutdown = sock_no_shutdown,
|
|
.setsockopt = sock_no_setsockopt,
|
|
.getsockopt = sock_no_getsockopt,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_setsockopt = sock_no_setsockopt,
|
|
.compat_getsockopt = sock_no_getsockopt,
|
|
#endif
|
|
.sendmsg = pn_socket_sendmsg,
|
|
.recvmsg = sock_common_recvmsg,
|
|
.mmap = sock_no_mmap,
|
|
.sendpage = sock_no_sendpage,
|
|
};
|
|
|
|
const struct proto_ops phonet_stream_ops = {
|
|
.family = AF_PHONET,
|
|
.owner = THIS_MODULE,
|
|
.release = pn_socket_release,
|
|
.bind = pn_socket_bind,
|
|
.connect = sock_no_connect,
|
|
.socketpair = sock_no_socketpair,
|
|
.accept = pn_socket_accept,
|
|
.getname = pn_socket_getname,
|
|
.poll = pn_socket_poll,
|
|
.ioctl = pn_socket_ioctl,
|
|
.listen = pn_socket_listen,
|
|
.shutdown = sock_no_shutdown,
|
|
.setsockopt = sock_common_setsockopt,
|
|
.getsockopt = sock_common_getsockopt,
|
|
#ifdef CONFIG_COMPAT
|
|
.compat_setsockopt = compat_sock_common_setsockopt,
|
|
.compat_getsockopt = compat_sock_common_getsockopt,
|
|
#endif
|
|
.sendmsg = pn_socket_sendmsg,
|
|
.recvmsg = sock_common_recvmsg,
|
|
.mmap = sock_no_mmap,
|
|
.sendpage = sock_no_sendpage,
|
|
};
|
|
EXPORT_SYMBOL(phonet_stream_ops);
|
|
|
|
/* allocate port for a socket */
|
|
int pn_sock_get_port(struct sock *sk, unsigned short sport)
|
|
{
|
|
static int port_cur;
|
|
struct net *net = sock_net(sk);
|
|
struct pn_sock *pn = pn_sk(sk);
|
|
struct sockaddr_pn try_sa;
|
|
struct sock *tmpsk;
|
|
|
|
memset(&try_sa, 0, sizeof(struct sockaddr_pn));
|
|
try_sa.spn_family = AF_PHONET;
|
|
WARN_ON(!mutex_is_locked(&port_mutex));
|
|
if (!sport) {
|
|
/* search free port */
|
|
int port, pmin, pmax;
|
|
|
|
phonet_get_local_port_range(&pmin, &pmax);
|
|
for (port = pmin; port <= pmax; port++) {
|
|
port_cur++;
|
|
if (port_cur < pmin || port_cur > pmax)
|
|
port_cur = pmin;
|
|
|
|
pn_sockaddr_set_port(&try_sa, port_cur);
|
|
tmpsk = pn_find_sock_by_sa(net, &try_sa);
|
|
if (tmpsk == NULL) {
|
|
sport = port_cur;
|
|
goto found;
|
|
} else
|
|
sock_put(tmpsk);
|
|
}
|
|
} else {
|
|
/* try to find specific port */
|
|
pn_sockaddr_set_port(&try_sa, sport);
|
|
tmpsk = pn_find_sock_by_sa(net, &try_sa);
|
|
if (tmpsk == NULL)
|
|
/* No sock there! We can use that port... */
|
|
goto found;
|
|
else
|
|
sock_put(tmpsk);
|
|
}
|
|
/* the port must be in use already */
|
|
return -EADDRINUSE;
|
|
|
|
found:
|
|
pn->sobject = pn_object(pn_addr(pn->sobject), sport);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(pn_sock_get_port);
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
|
|
{
|
|
struct net *net = seq_file_net(seq);
|
|
struct hlist_head *hlist = pnsocks.hlist;
|
|
struct hlist_node *node;
|
|
struct sock *sknode;
|
|
unsigned h;
|
|
|
|
for (h = 0; h < PN_HASHSIZE; h++) {
|
|
sk_for_each(sknode, node, hlist) {
|
|
if (!net_eq(net, sock_net(sknode)))
|
|
continue;
|
|
if (!pos)
|
|
return sknode;
|
|
pos--;
|
|
}
|
|
hlist++;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static struct sock *pn_sock_get_next(struct seq_file *seq, struct sock *sk)
|
|
{
|
|
struct net *net = seq_file_net(seq);
|
|
|
|
do
|
|
sk = sk_next(sk);
|
|
while (sk && !net_eq(net, sock_net(sk)));
|
|
|
|
return sk;
|
|
}
|
|
|
|
static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos)
|
|
__acquires(pnsocks.lock)
|
|
{
|
|
spin_lock_bh(&pnsocks.lock);
|
|
return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
|
|
}
|
|
|
|
static void *pn_sock_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
{
|
|
struct sock *sk;
|
|
|
|
if (v == SEQ_START_TOKEN)
|
|
sk = pn_sock_get_idx(seq, 0);
|
|
else
|
|
sk = pn_sock_get_next(seq, v);
|
|
(*pos)++;
|
|
return sk;
|
|
}
|
|
|
|
static void pn_sock_seq_stop(struct seq_file *seq, void *v)
|
|
__releases(pnsocks.lock)
|
|
{
|
|
spin_unlock_bh(&pnsocks.lock);
|
|
}
|
|
|
|
static int pn_sock_seq_show(struct seq_file *seq, void *v)
|
|
{
|
|
int len;
|
|
|
|
if (v == SEQ_START_TOKEN)
|
|
seq_printf(seq, "%s%n", "pt loc rem rs st tx_queue rx_queue "
|
|
" uid inode ref pointer drops", &len);
|
|
else {
|
|
struct sock *sk = v;
|
|
struct pn_sock *pn = pn_sk(sk);
|
|
|
|
seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
|
|
"%d %p %d%n",
|
|
sk->sk_protocol, pn->sobject, 0, pn->resource,
|
|
sk->sk_state,
|
|
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
|
|
sock_i_uid(sk), sock_i_ino(sk),
|
|
atomic_read(&sk->sk_refcnt), sk,
|
|
atomic_read(&sk->sk_drops), &len);
|
|
}
|
|
seq_printf(seq, "%*s\n", 127 - len, "");
|
|
return 0;
|
|
}
|
|
|
|
static const struct seq_operations pn_sock_seq_ops = {
|
|
.start = pn_sock_seq_start,
|
|
.next = pn_sock_seq_next,
|
|
.stop = pn_sock_seq_stop,
|
|
.show = pn_sock_seq_show,
|
|
};
|
|
|
|
static int pn_sock_open(struct inode *inode, struct file *file)
|
|
{
|
|
return seq_open_net(inode, file, &pn_sock_seq_ops,
|
|
sizeof(struct seq_net_private));
|
|
}
|
|
|
|
const struct file_operations pn_sock_seq_fops = {
|
|
.owner = THIS_MODULE,
|
|
.open = pn_sock_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = seq_release_net,
|
|
};
|
|
#endif
|