From 6a57b2ee4506bb8a3d2f7ba2f62ff65ec56ba150 Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Wed, 29 Mar 2006 13:57:31 -0800 Subject: [PATCH 1/6] [DECNET]: Fix refcount From: Patrick Caulfield This patch fixes a bug in the reference counting for the default DECnet device. If the device is changed, then the new device had its refcount decremented rather than the old one! Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d2ae9893ca17..a26ff9f44576 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -620,7 +620,7 @@ int dn_dev_set_default(struct net_device *dev, int force) } write_unlock(&dndev_lock); if (old) - dev_put(dev); + dev_put(old); return rv; } From 68907dad58cd7ef11536e1db6baeb98b20af91b2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Mar 2006 13:58:25 -0800 Subject: [PATCH 2/6] [DCCP]: Use NULL for pointers, comfort sparse. From: Randy Dunlap Use NULL instead of 0 for pointers. Fix these sparse warnings: net/dccp/feat.c:207:20: warning: Using plain integer as NULL pointer net/dccp/feat.c:325:21: warning: Using plain integer as NULL pointer net/dccp/feat.c:526:20: warning: Using plain integer as NULL pointer Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/dccp/feat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index e3dd30d36c8a..b39e2a597889 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -204,7 +204,7 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, if (rc) { kfree(opt->dccpop_sc->dccpoc_val); kfree(opt->dccpop_sc); - opt->dccpop_sc = 0; + opt->dccpop_sc = NULL; return rc; } @@ -322,7 +322,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R : DCCPO_CONFIRM_L; opt->dccpop_feat = feature; - opt->dccpop_val = 0; + opt->dccpop_val = NULL; opt->dccpop_len = 0; /* change feature */ @@ -523,7 +523,7 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) * once... */ /* the master socket no longer needs to worry about confirms */ - opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */ + opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */ /* reset state for a new socket */ opt->dccpop_conf = 0; From 56079431b6ba163df8ba26b3eccc82379f0c0ce4 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Wed, 29 Mar 2006 15:57:29 -0800 Subject: [PATCH 3/6] [NET]: Deinline some larger functions from netdevice.h On a allyesconfig'ured kernel: Size Uses Wasted Name and definition ===== ==== ====== ================================================ 95 162 12075 netif_wake_queue include/linux/netdevice.h 129 86 9265 dev_kfree_skb_any include/linux/netdevice.h 127 56 5885 netif_device_attach include/linux/netdevice.h 73 86 4505 dev_kfree_skb_irq include/linux/netdevice.h 46 60 1534 netif_device_detach include/linux/netdevice.h 119 16 1485 __netif_rx_schedule include/linux/netdevice.h 143 5 492 netif_rx_schedule include/linux/netdevice.h 81 7 366 netif_schedule include/linux/netdevice.h netif_wake_queue is big because __netif_schedule is a big inline: static inline void __netif_schedule(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { unsigned long flags; struct softnet_data *sd; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); dev->next_sched = sd->output_queue; sd->output_queue = dev; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } static inline void netif_wake_queue(struct net_device *dev) { #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) __netif_schedule(dev); } By de-inlining __netif_schedule we are saving a lot of text at each callsite of netif_wake_queue and netif_schedule. __netif_rx_schedule is also big, and it makes more sense to keep both of them out of line. Patch also deinlines dev_kfree_skb_any. We can deinline dev_kfree_skb_irq instead... oh well. netif_device_attach/detach are not hot paths, we can deinline them too. Signed-off-by: Denis Vlasenko Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/netdevice.h | 55 +++------------------------------ net/core/dev.c | 64 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 950dc55e5192..40ccf8cc4239 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -static inline void __netif_schedule(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; - struct softnet_data *sd; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} +extern void __netif_schedule(struct net_device *dev); static inline void netif_schedule(struct net_device *dev) { @@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb) /* Use this variant in places where it could be invoked * either from interrupt or non-interrupt context. */ -static inline void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} +extern void dev_kfree_skb_any(struct sk_buff *skb); #define HAVE_NETIF_RX 1 extern int netif_rx(struct sk_buff *skb); @@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev) return test_bit(__LINK_STATE_PRESENT, &dev->state); } -static inline void netif_device_detach(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_stop_queue(dev); - } -} +extern void netif_device_detach(struct net_device *dev); -static inline void netif_device_attach(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_wake_queue(dev); - __netdev_watchdog_up(dev); - } -} +extern void netif_device_attach(struct net_device *dev); /* * Network interface message level settings @@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev) * already been called and returned 1. */ -static inline void __netif_rx_schedule(struct net_device *dev) -{ - unsigned long flags; - - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); -} +extern void __netif_rx_schedule(struct net_device *dev); /* Try to reschedule poll. Called by irq handler. */ diff --git a/net/core/dev.c b/net/core/dev.c index a3ab11f34153..434220d093aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1080,6 +1080,70 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } + +void __netif_schedule(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { + unsigned long flags; + struct softnet_data *sd; + + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + dev->next_sched = sd->output_queue; + sd->output_queue = dev; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__netif_schedule); + +void __netif_rx_schedule(struct net_device *dev) +{ + unsigned long flags; + + local_irq_save(flags); + dev_hold(dev); + list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); + if (dev->quota < 0) + dev->quota += dev->weight; + else + dev->quota = dev->weight; + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__netif_rx_schedule); + +void dev_kfree_skb_any(struct sk_buff *skb) +{ + if (in_irq() || irqs_disabled()) + dev_kfree_skb_irq(skb); + else + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(dev_kfree_skb_any); + + +/* Hot-plugging. */ +void netif_device_detach(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_stop_queue(dev); + } +} +EXPORT_SYMBOL(netif_device_detach); + +void netif_device_attach(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_wake_queue(dev); + __netdev_watchdog_up(dev); + } +} +EXPORT_SYMBOL(netif_device_attach); + + /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. From c08e49611a8b4e38a75bf217e1029a48faf10b82 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 31 Mar 2006 02:09:36 -0800 Subject: [PATCH 4/6] [NET]: add SO_RCVBUF comment Put a comment in there explaining why we double the setsockopt() caller's SO_RCVBUF. People keep wondering. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/sock.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index a96ea7dd0fc1..ed2afdb9ea2d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -385,7 +385,21 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = sysctl_rmem_max; set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - /* FIXME: is this lower bound the right one? */ + /* + * We double it on the way in to account for + * "struct sk_buff" etc. overhead. Applications + * assume that the SO_RCVBUF setting they make will + * allow that much actual data to be received on that + * socket. + * + * Applications are unaware that "struct sk_buff" and + * other overheads allocate from the receive buffer + * during socket buffer allocation. + * + * And after considering the possible alternatives, + * returning the value we actually used in getsockopt + * is the most desirable behavior. + */ if ((val * 2) < SOCK_MIN_RCVBUF) sk->sk_rcvbuf = SOCK_MIN_RCVBUF; else From 0803dbed7a23721d091639c9e173c0389dcd524a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 31 Mar 2006 02:25:46 -0800 Subject: [PATCH 5/6] [TCP]: Kill unused extern decl for tcp_v4_hash_connecting() Noticed by Alan Menegotto. Signed-off-by: David S. Miller --- include/net/tcp.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9418f4d1afbb..3c989db8a7aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -405,9 +405,6 @@ extern int tcp_disconnect(struct sock *sk, int flags); extern void tcp_unhash(struct sock *sk); -extern int tcp_v4_hash_connecting(struct sock *sk); - - /* From syncookies.c */ extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); From 025be81e83043f20538dcced1e12c5f8d152fbdb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 31 Mar 2006 02:27:06 -0800 Subject: [PATCH 6/6] [NET]: Allow skb headroom to be overridden Previously we added NET_IP_ALIGN so an architecture can override the padding done to align headers. The next step is to allow the skb headroom to be overridden. We currently always reserve 16 bytes to grow into, meaning all DMAs start 16 bytes into a cacheline. On ppc64 we really want DMA writes to start on a cacheline boundary, so we increase that headroom to one cacheline. Signed-off-by: Anton Blanchard Signed-off-by: David S. Miller --- include/asm-powerpc/system.h | 5 ++++- include/linux/skbuff.h | 29 +++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index 65f5a7b2646b..d075725bf444 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, * powers of 2 writes until it reaches sufficient alignment). * * Based on this we disable the IP header alignment in network drivers. + * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining + * cacheline alignment of buffers. */ -#define NET_IP_ALIGN 0 +#define NET_IP_ALIGN 0 +#define NET_SKB_PAD L1_CACHE_BYTES #endif #define arch_align_stack(x) (x) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 613b9513f8b9..c4619a428d9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_IP_ALIGN 2 #endif +/* + * The networking layer reserves some headroom in skb data (via + * dev_alloc_skb). This is used to avoid having to reallocate skb data when + * the header has to grow. In the default case, if the header has to grow + * 16 bytes or less we avoid the reallocation. + * + * Unfortunately this headroom changes the DMA alignment of the resulting + * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive + * on some architectures. An architecture can override this value, + * perhaps setting it to a cacheline in size (since that will maintain + * cacheline alignment of the DMA). It must be a power of 2. + * + * Various parts of the networking layer expect at least 16 bytes of + * headroom, you should not reduce this. + */ +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif + extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) @@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); + struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); if (likely(skb)) - skb_reserve(skb, 16); + skb_reserve(skb, NET_SKB_PAD); return skb; } #else @@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) - + skb_headroom(skb); if (delta < 0) delta = 0; if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); + return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) & + ~(NET_SKB_PAD-1), 0, GFP_ATOMIC); return 0; }