linux_dsm_epyc7002/net/atm/common.c

905 lines
21 KiB
C
Raw Normal View History

/* net/atm/common.c - ATM sockets (common part for PVC and SVC) */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/net.h> /* struct socket, struct proto_ops */
#include <linux/atm.h> /* ATM stuff */
#include <linux/atmdev.h>
#include <linux/socket.h> /* SOL_SOCKET */
#include <linux/errno.h> /* error codes */
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/time64.h> /* 64-bit time for seconds */
#include <linux/skbuff.h>
#include <linux/bitops.h>
#include <linux/init.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <net/sock.h> /* struct sock */
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <linux/atomic.h>
#include "resources.h" /* atm_find_dev */
#include "common.h" /* prototypes */
#include "protocols.h" /* atm_init_<transport> */
#include "addr.h" /* address registry */
#include "signaling.h" /* for WAITING and sigd_attach */
struct hlist_head vcc_hash[VCC_HTABLE_SIZE];
EXPORT_SYMBOL(vcc_hash);
DEFINE_RWLOCK(vcc_sklist_lock);
EXPORT_SYMBOL(vcc_sklist_lock);
static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
static void __vcc_insert_socket(struct sock *sk)
{
struct atm_vcc *vcc = atm_sk(sk);
struct hlist_head *head = &vcc_hash[vcc->vci & (VCC_HTABLE_SIZE - 1)];
[INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-04 04:13:38 +07:00
sk->sk_hash = vcc->vci & (VCC_HTABLE_SIZE - 1);
sk_add_node(sk, head);
}
void vcc_insert_socket(struct sock *sk)
{
write_lock_irq(&vcc_sklist_lock);
__vcc_insert_socket(sk);
write_unlock_irq(&vcc_sklist_lock);
}
EXPORT_SYMBOL(vcc_insert_socket);
static void vcc_remove_socket(struct sock *sk)
{
write_lock_irq(&vcc_sklist_lock);
sk_del_node_init(sk);
write_unlock_irq(&vcc_sklist_lock);
}
static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size)
{
struct sock *sk = sk_atm(vcc);
if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {
pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n",
sk_wmem_alloc_get(sk), size, sk->sk_sndbuf);
return false;
}
return true;
}
static void vcc_sock_destruct(struct sock *sk)
{
if (atomic_read(&sk->sk_rmem_alloc))
printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
__func__, atomic_read(&sk->sk_rmem_alloc));
if (refcount_read(&sk->sk_wmem_alloc))
printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
__func__, refcount_read(&sk->sk_wmem_alloc));
}
static void vcc_def_wakeup(struct sock *sk)
{
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
struct socket_wq *wq;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
wake_up(&wq->wait);
rcu_read_unlock();
}
static inline int vcc_writable(struct sock *sk)
{
struct atm_vcc *vcc = atm_sk(sk);
return (vcc->qos.txtp.max_sdu +
refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
}
static void vcc_write_space(struct sock *sk)
{
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
struct socket_wq *wq;
rcu_read_lock();
if (vcc_writable(sk)) {
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
wake_up_interruptible(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
rcu_read_unlock();
}
static void vcc_release_cb(struct sock *sk)
{
struct atm_vcc *vcc = atm_sk(sk);
if (vcc->release_cb)
vcc->release_cb(vcc);
}
static struct proto vcc_proto = {
.name = "VCC",
.owner = THIS_MODULE,
.obj_size = sizeof(struct atm_vcc),
.release_cb = vcc_release_cb,
};
int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern)
{
struct sock *sk;
struct atm_vcc *vcc;
sock->sk = NULL;
if (sock->type == SOCK_STREAM)
return -EINVAL;
sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern);
if (!sk)
return -ENOMEM;
sock_init_data(sock, sk);
sk->sk_state_change = vcc_def_wakeup;
sk->sk_write_space = vcc_write_space;
vcc = atm_sk(sk);
vcc->dev = NULL;
memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
refcount_set(&sk->sk_wmem_alloc, 1);
atomic_set(&sk->sk_rmem_alloc, 0);
vcc->push = NULL;
vcc->pop = NULL;
vcc->owner = NULL;
vcc->push_oam = NULL;
vcc->release_cb = NULL;
vcc->vpi = vcc->vci = 0; /* no VCI/VPI yet */
vcc->atm_options = vcc->aal_options = 0;
sk->sk_destruct = vcc_sock_destruct;
return 0;
}
static void vcc_destroy_socket(struct sock *sk)
{
struct atm_vcc *vcc = atm_sk(sk);
struct sk_buff *skb;
set_bit(ATM_VF_CLOSE, &vcc->flags);
clear_bit(ATM_VF_READY, &vcc->flags);
if (vcc->dev) {
if (vcc->dev->ops->close)
vcc->dev->ops->close(vcc);
if (vcc->push)
vcc->push(vcc, NULL); /* atmarpd has no push */
module_put(vcc->owner);
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
atm_return(vcc, skb->truesize);
kfree_skb(skb);
}
module_put(vcc->dev->ops->owner);
atm_dev_put(vcc->dev);
}
vcc_remove_socket(sk);
}
int vcc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
lock_sock(sk);
vcc_destroy_socket(sock->sk);
release_sock(sk);
sock_put(sk);
}
return 0;
}
void vcc_release_async(struct atm_vcc *vcc, int reply)
{
struct sock *sk = sk_atm(vcc);
set_bit(ATM_VF_CLOSE, &vcc->flags);
sk->sk_shutdown |= RCV_SHUTDOWN;
sk->sk_err = -reply;
clear_bit(ATM_VF_WAITING, &vcc->flags);
sk->sk_state_change(sk);
}
EXPORT_SYMBOL(vcc_release_async);
void vcc_process_recv_queue(struct atm_vcc *vcc)
{
struct sk_buff_head queue, *rq;
struct sk_buff *skb, *tmp;
unsigned long flags;
__skb_queue_head_init(&queue);
rq = &sk_atm(vcc)->sk_receive_queue;
spin_lock_irqsave(&rq->lock, flags);
skb_queue_splice_init(rq, &queue);
spin_unlock_irqrestore(&rq->lock, flags);
skb_queue_walk_safe(&queue, skb, tmp) {
__skb_unlink(skb, &queue);
vcc->push(vcc, skb);
}
}
EXPORT_SYMBOL(vcc_process_recv_queue);
void atm_dev_signal_change(struct atm_dev *dev, char signal)
{
pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
__func__, signal, dev, dev->number, dev->signal);
/* atm driver sending invalid signal */
WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
if (dev->signal == signal)
return; /* no change */
dev->signal = signal;
atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
}
EXPORT_SYMBOL(atm_dev_signal_change);
void atm_dev_release_vccs(struct atm_dev *dev)
{
int i;
write_lock_irq(&vcc_sklist_lock);
for (i = 0; i < VCC_HTABLE_SIZE; i++) {
struct hlist_head *head = &vcc_hash[i];
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 08:06:00 +07:00
struct hlist_node *tmp;
struct sock *s;
struct atm_vcc *vcc;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 08:06:00 +07:00
sk_for_each_safe(s, tmp, head) {
vcc = atm_sk(s);
if (vcc->dev == dev) {
vcc_release_async(vcc, -EPIPE);
sk_del_node_init(s);
}
}
}
write_unlock_irq(&vcc_sklist_lock);
}
EXPORT_SYMBOL(atm_dev_release_vccs);
static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
{
int max_sdu;
if (!tp->traffic_class)
return 0;
switch (aal) {
case ATM_AAL0:
max_sdu = ATM_CELL_SIZE-1;
break;
case ATM_AAL34:
max_sdu = ATM_MAX_AAL34_PDU;
break;
default:
pr_warn("AAL problems ... (%d)\n", aal);
/* fall through */
case ATM_AAL5:
max_sdu = ATM_MAX_AAL5_PDU;
}
if (!tp->max_sdu)
tp->max_sdu = max_sdu;
else if (tp->max_sdu > max_sdu)
return -EINVAL;
if (!tp->max_cdv)
tp->max_cdv = ATM_MAX_CDV;
return 0;
}
static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
{
struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)];
struct sock *s;
struct atm_vcc *walk;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 08:06:00 +07:00
sk_for_each(s, head) {
walk = atm_sk(s);
if (walk->dev != vcc->dev)
continue;
if (test_bit(ATM_VF_ADDR, &walk->flags) && walk->vpi == vpi &&
walk->vci == vci && ((walk->qos.txtp.traffic_class !=
ATM_NONE && vcc->qos.txtp.traffic_class != ATM_NONE) ||
(walk->qos.rxtp.traffic_class != ATM_NONE &&
vcc->qos.rxtp.traffic_class != ATM_NONE)))
return -EADDRINUSE;
}
/* allow VCCs with same VPI/VCI iff they don't collide on
TX/RX (but we may refuse such sharing for other reasons,
e.g. if protocol requires to have both channels) */
return 0;
}
static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci)
{
static short p; /* poor man's per-device cache */
static int c;
short old_p;
int old_c;
int err;
if (*vpi != ATM_VPI_ANY && *vci != ATM_VCI_ANY) {
err = check_ci(vcc, *vpi, *vci);
return err;
}
/* last scan may have left values out of bounds for current device */
if (*vpi != ATM_VPI_ANY)
p = *vpi;
else if (p >= 1 << vcc->dev->ci_range.vpi_bits)
p = 0;
if (*vci != ATM_VCI_ANY)
c = *vci;
else if (c < ATM_NOT_RSV_VCI || c >= 1 << vcc->dev->ci_range.vci_bits)
c = ATM_NOT_RSV_VCI;
old_p = p;
old_c = c;
do {
if (!check_ci(vcc, p, c)) {
*vpi = p;
*vci = c;
return 0;
}
if (*vci == ATM_VCI_ANY) {
c++;
if (c >= 1 << vcc->dev->ci_range.vci_bits)
c = ATM_NOT_RSV_VCI;
}
if ((c == ATM_NOT_RSV_VCI || *vci != ATM_VCI_ANY) &&
*vpi == ATM_VPI_ANY) {
p++;
if (p >= 1 << vcc->dev->ci_range.vpi_bits)
p = 0;
}
} while (old_p != p || old_c != c);
return -EADDRINUSE;
}
static int __vcc_connect(struct atm_vcc *vcc, struct atm_dev *dev, short vpi,
int vci)
{
struct sock *sk = sk_atm(vcc);
int error;
if ((vpi != ATM_VPI_UNSPEC && vpi != ATM_VPI_ANY &&
vpi >> dev->ci_range.vpi_bits) || (vci != ATM_VCI_UNSPEC &&
vci != ATM_VCI_ANY && vci >> dev->ci_range.vci_bits))
return -EINVAL;
if (vci > 0 && vci < ATM_NOT_RSV_VCI && !capable(CAP_NET_BIND_SERVICE))
return -EPERM;
error = -ENODEV;
if (!try_module_get(dev->ops->owner))
return error;
vcc->dev = dev;
write_lock_irq(&vcc_sklist_lock);
if (test_bit(ATM_DF_REMOVED, &dev->flags) ||
(error = find_ci(vcc, &vpi, &vci))) {
write_unlock_irq(&vcc_sklist_lock);
goto fail_module_put;
}
vcc->vpi = vpi;
vcc->vci = vci;
__vcc_insert_socket(sk);
write_unlock_irq(&vcc_sklist_lock);
switch (vcc->qos.aal) {
case ATM_AAL0:
error = atm_init_aal0(vcc);
vcc->stats = &dev->stats.aal0;
break;
case ATM_AAL34:
error = atm_init_aal34(vcc);
vcc->stats = &dev->stats.aal34;
break;
case ATM_NO_AAL:
/* ATM_AAL5 is also used in the "0 for default" case */
vcc->qos.aal = ATM_AAL5;
/* fall through */
case ATM_AAL5:
error = atm_init_aal5(vcc);
vcc->stats = &dev->stats.aal5;
break;
default:
error = -EPROTOTYPE;
}
if (!error)
error = adjust_tp(&vcc->qos.txtp, vcc->qos.aal);
if (!error)
error = adjust_tp(&vcc->qos.rxtp, vcc->qos.aal);
if (error)
goto fail;
pr_debug("VCC %d.%d, AAL %d\n", vpi, vci, vcc->qos.aal);
pr_debug(" TX: %d, PCR %d..%d, SDU %d\n",
vcc->qos.txtp.traffic_class,
vcc->qos.txtp.min_pcr,
vcc->qos.txtp.max_pcr,
vcc->qos.txtp.max_sdu);
pr_debug(" RX: %d, PCR %d..%d, SDU %d\n",
vcc->qos.rxtp.traffic_class,
vcc->qos.rxtp.min_pcr,
vcc->qos.rxtp.max_pcr,
vcc->qos.rxtp.max_sdu);
if (dev->ops->open) {
error = dev->ops->open(vcc);
if (error)
goto fail;
}
return 0;
fail:
vcc_remove_socket(sk);
fail_module_put:
module_put(dev->ops->owner);
/* ensure we get dev module ref count correct */
vcc->dev = NULL;
return error;
}
int vcc_connect(struct socket *sock, int itf, short vpi, int vci)
{
struct atm_dev *dev;
struct atm_vcc *vcc = ATM_SD(sock);
int error;
pr_debug("(vpi %d, vci %d)\n", vpi, vci);
if (sock->state == SS_CONNECTED)
return -EISCONN;
if (sock->state != SS_UNCONNECTED)
return -EINVAL;
if (!(vpi || vci))
return -EINVAL;
if (vpi != ATM_VPI_UNSPEC && vci != ATM_VCI_UNSPEC)
clear_bit(ATM_VF_PARTIAL, &vcc->flags);
else
if (test_bit(ATM_VF_PARTIAL, &vcc->flags))
return -EINVAL;
pr_debug("(TX: cl %d,bw %d-%d,sdu %d; "
"RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n",
vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,
vcc->qos.txtp.max_pcr, vcc->qos.txtp.max_sdu,
vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,
vcc->qos.rxtp.max_pcr, vcc->qos.rxtp.max_sdu,
vcc->qos.aal == ATM_AAL5 ? "" :
vcc->qos.aal == ATM_AAL0 ? "" : " ??? code ",
vcc->qos.aal == ATM_AAL0 ? 0 : vcc->qos.aal);
if (!test_bit(ATM_VF_HASQOS, &vcc->flags))
return -EBADFD;
if (vcc->qos.txtp.traffic_class == ATM_ANYCLASS ||
vcc->qos.rxtp.traffic_class == ATM_ANYCLASS)
return -EINVAL;
if (likely(itf != ATM_ITF_ANY)) {
dev = try_then_request_module(atm_dev_lookup(itf),
"atm-device-%d", itf);
} else {
dev = NULL;
mutex_lock(&atm_dev_mutex);
if (!list_empty(&atm_devs)) {
dev = list_entry(atm_devs.next,
struct atm_dev, dev_list);
atm_dev_hold(dev);
}
mutex_unlock(&atm_dev_mutex);
}
if (!dev)
return -ENODEV;
error = __vcc_connect(vcc, dev, vpi, vci);
if (error) {
atm_dev_put(dev);
return error;
}
if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC)
set_bit(ATM_VF_PARTIAL, &vcc->flags);
if (test_bit(ATM_VF_READY, &ATM_SD(sock)->flags))
sock->state = SS_CONNECTED;
return 0;
}
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc;
struct sk_buff *skb;
int copied, error = -EINVAL;
if (sock->state != SS_CONNECTED)
return -ENOTCONN;
/* only handle MSG_DONTWAIT and MSG_PEEK */
if (flags & ~(MSG_DONTWAIT | MSG_PEEK))
return -EOPNOTSUPP;
vcc = ATM_SD(sock);
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags) ||
!test_bit(ATM_VF_READY, &vcc->flags))
return 0;
skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
if (!skb)
return error;
copied = skb->len;
if (copied > size) {
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
error = skb_copy_datagram_msg(skb, 0, msg, copied);
if (error)
return error;
net: Generalize socket rx gap / receive queue overflow cmsg Create a new socket level option to report number of queue overflows Recently I augmented the AF_PACKET protocol to report the number of frames lost on the socket receive queue between any two enqueued frames. This value was exported via a SOL_PACKET level cmsg. AFter I completed that work it was requested that this feature be generalized so that any datagram oriented socket could make use of this option. As such I've created this patch, It creates a new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue overflowed between any two given frames. It also augments the AF_PACKET protocol to take advantage of this new feature (as it previously did not touch sk->sk_drops, which this patch uses to record the overflow count). Tested successfully by me. Notes: 1) Unlike my previous patch, this patch simply records the sk_drops value, which is not a number of drops between packets, but rather a total number of drops. Deltas must be computed in user space. 2) While this patch currently works with datagram oriented protocols, it will also be accepted by non-datagram oriented protocols. I'm not sure if thats agreeable to everyone, but my argument in favor of doing so is that, for those protocols which aren't applicable to this option, sk_drops will always be zero, and reporting no drops on a receive queue that isn't used for those non-participating protocols seems reasonable to me. This also saves us having to code in a per-protocol opt in mechanism. 3) This applies cleanly to net-next assuming that commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-10-13 03:26:31 +07:00
sock_recv_ts_and_drops(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
skb->truesize);
atm_return(vcc, skb->truesize);
}
skb_free_datagram(sk, skb);
return copied;
}
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
{
struct sock *sk = sock->sk;
DEFINE_WAIT(wait);
struct atm_vcc *vcc;
struct sk_buff *skb;
int eff, error;
lock_sock(sk);
if (sock->state != SS_CONNECTED) {
error = -ENOTCONN;
goto out;
}
if (m->msg_name) {
error = -EISCONN;
goto out;
}
vcc = ATM_SD(sock);
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags) ||
!test_bit(ATM_VF_READY, &vcc->flags)) {
error = -EPIPE;
send_sig(SIGPIPE, current, 0);
goto out;
}
if (!size) {
error = 0;
goto out;
}
if (size > vcc->qos.txtp.max_sdu) {
error = -EMSGSIZE;
goto out;
}
eff = (size+3) & ~3; /* align to word boundary */
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
error = 0;
while (!vcc_tx_ready(vcc, eff)) {
if (m->msg_flags & MSG_DONTWAIT) {
error = -EAGAIN;
break;
}
schedule();
if (signal_pending(current)) {
error = -ERESTARTSYS;
break;
}
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags) ||
!test_bit(ATM_VF_READY, &vcc->flags)) {
error = -EPIPE;
send_sig(SIGPIPE, current, 0);
break;
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
finish_wait(sk_sleep(sk), &wait);
if (error)
goto out;
skb = alloc_skb(eff, GFP_KERNEL);
if (!skb) {
error = -ENOMEM;
goto out;
}
pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
atm: Preserve value of skb->truesize when accounting to vcc ATM accounts for in-flight TX packets in sk_wmem_alloc of the VCC on which they are to be sent. But it doesn't take ownership of those packets from the sock (if any) which originally owned them. They should remain owned by their actual sender until they've left the box. There's a hack in pskb_expand_head() to avoid adjusting skb->truesize for certain skbs, precisely to avoid messing up sk_wmem_alloc accounting. Ideally that hack would cover the ATM use case too, but it doesn't — skbs which aren't owned by any sock, for example PPP control frames, still get their truesize adjusted when the low-level ATM driver adds headroom. This has always been an issue, it seems. The truesize of a packet increases, and sk_wmem_alloc on the VCC goes negative. But this wasn't for normal traffic, only for control frames. So I think we just got away with it, and we probably needed to send 2GiB of LCP echo frames before the misaccounting would ever have caused a problem and caused atm_may_send() to start refusing packets. Commit 14afee4b609 ("net: convert sock.sk_wmem_alloc from atomic_t to refcount_t") did exactly what it was intended to do, and turned this mostly-theoretical problem into a real one, causing PPPoATM to fail immediately as sk_wmem_alloc underflows and atm_may_send() *immediately* starts refusing to allow new packets. The least intrusive solution to this problem is to stash the value of skb->truesize that was accounted to the VCC, in a new member of the ATM_SKB(skb) structure. Then in atm_pop_raw() subtract precisely that value instead of the then-current value of skb->truesize. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: David Woodhouse <dwmw2@infradead.org> Tested-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-06-16 17:55:44 +07:00
atm_account_tx(vcc, skb);
skb->dev = NULL; /* for paths shared with net_device interfaces */
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
kfree_skb(skb);
error = -EFAULT;
goto out;
}
if (eff != size)
memset(skb->data + size, 0, eff-size);
error = vcc->dev->ops->send(vcc, skb);
error = error ? error : size;
out:
release_sock(sk);
return error;
}
__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct atm_vcc *vcc;
__poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
vcc = ATM_SD(sock);
/* exceptional events */
if (sk->sk_err)
mask = EPOLLERR;
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags))
mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* writable? */
if (sock->state == SS_CONNECTING &&
test_bit(ATM_VF_WAITING, &vcc->flags))
return mask;
if (vcc->qos.txtp.traffic_class != ATM_NONE &&
vcc_writable(sk))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
static int atm_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
{
int error;
/*
* Don't let the QoS change the already connected AAL type nor the
* traffic class.
*/
if (qos->aal != vcc->qos.aal ||
qos->rxtp.traffic_class != vcc->qos.rxtp.traffic_class ||
qos->txtp.traffic_class != vcc->qos.txtp.traffic_class)
return -EINVAL;
error = adjust_tp(&qos->txtp, qos->aal);
if (!error)
error = adjust_tp(&qos->rxtp, qos->aal);
if (error)
return error;
if (!vcc->dev->ops->change_qos)
return -EOPNOTSUPP;
if (sk_atm(vcc)->sk_family == AF_ATMPVC)
return vcc->dev->ops->change_qos(vcc, qos, ATM_MF_SET);
return svc_change_qos(vcc, qos);
}
static int check_tp(const struct atm_trafprm *tp)
{
/* @@@ Should be merged with adjust_tp */
if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS)
return 0;
if (tp->traffic_class != ATM_UBR && !tp->min_pcr && !tp->pcr &&
!tp->max_pcr)
return -EINVAL;
if (tp->min_pcr == ATM_MAX_PCR)
return -EINVAL;
if (tp->min_pcr && tp->max_pcr && tp->max_pcr != ATM_MAX_PCR &&
tp->min_pcr > tp->max_pcr)
return -EINVAL;
/*
* We allow pcr to be outside [min_pcr,max_pcr], because later
* adjustment may still push it in the valid range.
*/
return 0;
}
static int check_qos(const struct atm_qos *qos)
{
int error;
if (!qos->txtp.traffic_class && !qos->rxtp.traffic_class)
return -EINVAL;
if (qos->txtp.traffic_class != qos->rxtp.traffic_class &&
qos->txtp.traffic_class && qos->rxtp.traffic_class &&
qos->txtp.traffic_class != ATM_ANYCLASS &&
qos->rxtp.traffic_class != ATM_ANYCLASS)
return -EINVAL;
error = check_tp(&qos->txtp);
if (error)
return error;
return check_tp(&qos->rxtp);
}
int vcc_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct atm_vcc *vcc;
unsigned long value;
int error;
if (__SO_LEVEL_MATCH(optname, level) && optlen != __SO_SIZE(optname))
return -EINVAL;
vcc = ATM_SD(sock);
switch (optname) {
case SO_ATMQOS:
{
struct atm_qos qos;
if (copy_from_user(&qos, optval, sizeof(qos)))
return -EFAULT;
error = check_qos(&qos);
if (error)
return error;
if (sock->state == SS_CONNECTED)
return atm_change_qos(vcc, &qos);
if (sock->state != SS_UNCONNECTED)
return -EBADFD;
vcc->qos = qos;
set_bit(ATM_VF_HASQOS, &vcc->flags);
return 0;
}
case SO_SETCLP:
if (get_user(value, (unsigned long __user *)optval))
return -EFAULT;
if (value)
vcc->atm_options |= ATM_ATMOPT_CLP;
else
vcc->atm_options &= ~ATM_ATMOPT_CLP;
return 0;
default:
if (level == SOL_SOCKET)
return -EINVAL;
break;
}
if (!vcc->dev || !vcc->dev->ops->setsockopt)
return -EINVAL;
return vcc->dev->ops->setsockopt(vcc, level, optname, optval, optlen);
}
int vcc_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct atm_vcc *vcc;
int len;
if (get_user(len, optlen))
return -EFAULT;
if (__SO_LEVEL_MATCH(optname, level) && len != __SO_SIZE(optname))
return -EINVAL;
vcc = ATM_SD(sock);
switch (optname) {
case SO_ATMQOS:
if (!test_bit(ATM_VF_HASQOS, &vcc->flags))
return -EINVAL;
return copy_to_user(optval, &vcc->qos, sizeof(vcc->qos))
? -EFAULT : 0;
case SO_SETCLP:
return put_user(vcc->atm_options & ATM_ATMOPT_CLP ? 1 : 0,
(unsigned long __user *)optval) ? -EFAULT : 0;
case SO_ATMPVC:
{
struct sockaddr_atmpvc pvc;
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
memset(&pvc, 0, sizeof(pvc));
pvc.sap_family = AF_ATMPVC;
pvc.sap_addr.itf = vcc->dev->number;
pvc.sap_addr.vpi = vcc->vpi;
pvc.sap_addr.vci = vcc->vci;
return copy_to_user(optval, &pvc, sizeof(pvc)) ? -EFAULT : 0;
}
default:
if (level == SOL_SOCKET)
return -EINVAL;
break;
}
if (!vcc->dev || !vcc->dev->ops->getsockopt)
return -EINVAL;
return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
}
int register_atmdevice_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
}
EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
void unregister_atmdevice_notifier(struct notifier_block *nb)
{
atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
static int __init atm_init(void)
{
int error;
error = proto_register(&vcc_proto, 0);
if (error < 0)
goto out;
error = atmpvc_init();
if (error < 0) {
pr_err("atmpvc_init() failed with %d\n", error);
goto out_unregister_vcc_proto;
}
error = atmsvc_init();
if (error < 0) {
pr_err("atmsvc_init() failed with %d\n", error);
goto out_atmpvc_exit;
}
error = atm_proc_init();
if (error < 0) {
pr_err("atm_proc_init() failed with %d\n", error);
goto out_atmsvc_exit;
}
error = atm_sysfs_init();
if (error < 0) {
pr_err("atm_sysfs_init() failed with %d\n", error);
goto out_atmproc_exit;
}
out:
return error;
out_atmproc_exit:
atm_proc_exit();
out_atmsvc_exit:
atmsvc_exit();
out_atmpvc_exit:
atmsvc_exit();
out_unregister_vcc_proto:
proto_unregister(&vcc_proto);
goto out;
}
static void __exit atm_exit(void)
{
atm_proc_exit();
atm_sysfs_exit();
atmsvc_exit();
atmpvc_exit();
proto_unregister(&vcc_proto);
}
subsys_initcall(atm_init);
module_exit(atm_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_ATMPVC);
MODULE_ALIAS_NETPROTO(PF_ATMSVC);