linux_dsm_epyc7002/net/decnet/dn_route.c

1924 lines
47 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* DECnet Routing Functions (Endnode and Router)
*
* Authors: Steve Whitehouse <SteveW@ACM.org>
* Eduardo Marcelo Serrat <emserrat@geocities.com>
*
* Changes:
* Steve Whitehouse : Fixes to allow "intra-ethernet" and
* "return-to-sender" bits on outgoing
* packets.
* Steve Whitehouse : Timeouts for cached routes.
* Steve Whitehouse : Use dst cache for input routes too.
* Steve Whitehouse : Fixed error values in dn_send_skb.
* Steve Whitehouse : Rework routing functions to better fit
* DECnet routing design
* Alexey Kuznetsov : New SMP locking
* Steve Whitehouse : More SMP locking changes & dn_cache_dump()
* Steve Whitehouse : Prerouting NF hook, now really is prerouting.
* Fixed possible skb leak in rtnetlink funcs.
* Steve Whitehouse : Dave Miller's dynamic hash table sizing and
* Alexey Kuznetsov's finer grained locking
* from ipv4/route.c.
* Steve Whitehouse : Routing is now starting to look like a
* sensible set of code now, mainly due to
* my copying the IPv4 routing code. The
* hooks here are modified and will continue
* to evolve for a while.
* Steve Whitehouse : Real SMP at last :-) Also new netfilter
* stuff. Look out raw sockets your days
* are numbered!
* Steve Whitehouse : Added return-to-sender functions. Added
* backlog congestion level return codes.
* Steve Whitehouse : Fixed bug where routes were set up with
* no ref count on net devices.
* Steve Whitehouse : RCU for the route cache
* Steve Whitehouse : Preparations for the flow cache
* Steve Whitehouse : Prepare for nonlinear skbs
*/
/******************************************************************************
(c) 1995-1998 E.M. Serrat emserrat@geocities.com
*******************************************************************************/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/inet.h>
#include <linux/route.h>
#include <linux/in_route.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
#include <linux/string.h>
#include <linux/netfilter_decnet.h>
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/export.h>
#include <asm/errno.h>
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/fib_rules.h>
#include <net/dn.h>
#include <net/dn_dev.h>
#include <net/dn_nsp.h>
#include <net/dn_route.h>
#include <net/dn_neigh.h>
#include <net/dn_fib.h>
struct dn_rt_hash_bucket
{
struct dn_route __rcu *chain;
spinlock_t lock;
};
extern struct neigh_table dn_neigh_table;
static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00};
static const int dn_rt_min_delay = 2 * HZ;
static const int dn_rt_max_delay = 10 * HZ;
static const int dn_rt_mtu_expires = 10 * 60 * HZ;
static unsigned long dn_rt_deadline;
static int dn_dst_gc(struct dst_ops *ops);
static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
static unsigned int dn_dst_mtu(const struct dst_entry *dst);
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
static void dn_dst_destroy(struct dst_entry *);
static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
net: add bool confirm_neigh parameter for dst_ops.update_pmtu The MTU update code is supposed to be invoked in response to real networking events that update the PMTU. In IPv6 PMTU update function __ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor confirmed time. But for tunnel code, it will call pmtu before xmit, like: - tnl_update_pmtu() - skb_dst_update_pmtu() - ip6_rt_update_pmtu() - __ip6_rt_update_pmtu() - dst_confirm_neigh() If the tunnel remote dst mac address changed and we still do the neigh confirm, we will not be able to update neigh cache and ping6 remote will failed. So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we should not be invoking dst_confirm_neigh() as we have no evidence of successful two-way communication at this point. On the other hand it is also important to keep the neigh reachability fresh for TCP flows, so we cannot remove this dst_confirm_neigh() call. To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu to choose whether we should do neigh update or not. I will add the parameter in this patch and set all the callers to true to comply with the previous way, and fix the tunnel code one by one on later patches. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Suggested-by: David Miller <davem@davemloft.net> Reviewed-by: Guillaume Nault <gnault@redhat.com> Acked-by: David Ahern <dsahern@gmail.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-22 09:51:09 +07:00
struct sk_buff *skb , u32 mtu,
bool confirm_neigh);
static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr);
static int dn_route_input(struct sk_buff *);
static void dn_run_flush(struct timer_list *unused);
static struct dn_rt_hash_bucket *dn_rt_hash_table;
static unsigned int dn_rt_hash_mask;
static struct timer_list dn_route_timer;
timer: Remove expires and data arguments from DEFINE_TIMER Drop the arguments from the macro and adjust all callers with the following script: perl -pi -e 's/DEFINE_TIMER\((.*), 0, 0\);/DEFINE_TIMER($1);/g;' \ $(git grep DEFINE_TIMER | cut -d: -f1 | sort -u | grep -v timer.h) Signed-off-by: Kees Cook <keescook@chromium.org> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> # for m68k parts Acked-by: Guenter Roeck <linux@roeck-us.net> # for watchdog parts Acked-by: David S. Miller <davem@davemloft.net> # for networking parts Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Acked-by: Kalle Valo <kvalo@codeaurora.org> # for wireless parts Acked-by: Arnd Bergmann <arnd@arndb.de> Cc: linux-mips@linux-mips.org Cc: Petr Mladek <pmladek@suse.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Sebastian Reichel <sre@kernel.org> Cc: Kalle Valo <kvalo@qca.qualcomm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Pavel Machek <pavel@ucw.cz> Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com> Cc: Wim Van Sebroeck <wim@iguana.be> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Ursula Braun <ubraun@linux.vnet.ibm.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: Harish Patil <harish.patil@cavium.com> Cc: Stephen Boyd <sboyd@codeaurora.org> Cc: Michael Reed <mdr@sgi.com> Cc: Manish Chopra <manish.chopra@cavium.com> Cc: Len Brown <len.brown@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Mark Gross <mark.gross@intel.com> Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Guenter Roeck <linux@roeck-us.net> Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com> Link: https://lkml.kernel.org/r/1507159627-127660-11-git-send-email-keescook@chromium.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2017-10-05 06:27:04 +07:00
static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
int decnet_dst_gc_interval = 2;
static struct dst_ops dn_dst_ops = {
.family = PF_DECnet,
.gc_thresh = 128,
.gc = dn_dst_gc,
.check = dn_dst_check,
.default_advmss = dn_dst_default_advmss,
.mtu = dn_dst_mtu,
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
.cow_metrics = dst_cow_metrics_generic,
.destroy = dn_dst_destroy,
.ifdown = dn_dst_ifdown,
.negative_advice = dn_dst_negative_advice,
.link_failure = dn_dst_link_failure,
.update_pmtu = dn_dst_update_pmtu,
.redirect = dn_dst_redirect,
.neigh_lookup = dn_dst_neigh_lookup,
};
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
static void dn_dst_destroy(struct dst_entry *dst)
{
struct dn_route *rt = (struct dn_route *) dst;
if (rt->n)
neigh_release(rt->n);
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
dst_destroy_metrics_generic(dst);
}
static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
{
if (how) {
struct dn_route *rt = (struct dn_route *) dst;
struct neighbour *n = rt->n;
if (n && n->dev == dev) {
n->dev = dev_net(dev)->loopback_dev;
dev_hold(n->dev);
dev_put(dev);
}
}
}
static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
{
__u16 tmp = (__u16 __force)(src ^ dst);
tmp ^= (tmp >> 3);
tmp ^= (tmp >> 5);
tmp ^= (tmp >> 10);
return dn_rt_hash_mask & (unsigned int)tmp;
}
treewide: setup_timer() -> timer_setup() This converts all remaining cases of the old setup_timer() API into using timer_setup(), where the callback argument is the structure already holding the struct timer_list. These should have no behavioral changes, since they just change which pointer is passed into the callback with the same available pointers after conversion. It handles the following examples, in addition to some other variations. Casting from unsigned long: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... setup_timer(&ptr->my_timer, my_callback, ptr); and forced object casts: void my_callback(struct something *ptr) { ... } ... setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr); become: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... timer_setup(&ptr->my_timer, my_callback, 0); Direct function assignments: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... ptr->my_timer.function = my_callback; have a temporary cast added, along with converting the args: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback; And finally, callbacks without a data assignment: void my_callback(unsigned long data) { ... } ... setup_timer(&ptr->my_timer, my_callback, 0); have their argument renamed to verify they're unused during conversion: void my_callback(struct timer_list *unused) { ... } ... timer_setup(&ptr->my_timer, my_callback, 0); The conversion is done with the following Coccinelle script: spatch --very-quiet --all-includes --include-headers \ -I ./arch/x86/include -I ./arch/x86/include/generated \ -I ./include -I ./arch/x86/include/uapi \ -I ./arch/x86/include/generated/uapi -I ./include/uapi \ -I ./include/generated/uapi --include ./include/linux/kconfig.h \ --dir . \ --cocci-file ~/src/data/timer_setup.cocci @fix_address_of@ expression e; @@ setup_timer( -&(e) +&e , ...) // Update any raw setup_timer() usages that have a NULL callback, but // would otherwise match change_timer_function_usage, since the latter // will update all function assignments done in the face of a NULL // function initialization in setup_timer(). @change_timer_function_usage_NULL@ expression _E; identifier _timer; type _cast_data; @@ ( -setup_timer(&_E->_timer, NULL, _E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E->_timer, NULL, (_cast_data)_E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E._timer, NULL, &_E); +timer_setup(&_E._timer, NULL, 0); | -setup_timer(&_E._timer, NULL, (_cast_data)&_E); +timer_setup(&_E._timer, NULL, 0); ) @change_timer_function_usage@ expression _E; identifier _timer; struct timer_list _stl; identifier _callback; type _cast_func, _cast_data; @@ ( -setup_timer(&_E->_timer, _callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | _E->_timer@_stl.function = _callback; | _E->_timer@_stl.function = &_callback; | _E->_timer@_stl.function = (_cast_func)_callback; | _E->_timer@_stl.function = (_cast_func)&_callback; | _E._timer@_stl.function = _callback; | _E._timer@_stl.function = &_callback; | _E._timer@_stl.function = (_cast_func)_callback; | _E._timer@_stl.function = (_cast_func)&_callback; ) // callback(unsigned long arg) @change_callback_handle_cast depends on change_timer_function_usage@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; identifier _handle; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { ( ... when != _origarg _handletype *_handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg ) } // callback(unsigned long arg) without existing variable @change_callback_handle_cast_no_arg depends on change_timer_function_usage && !change_callback_handle_cast@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { + _handletype *_origarg = from_timer(_origarg, t, _timer); + ... when != _origarg - (_handletype *)_origarg + _origarg ... when != _origarg } // Avoid already converted callbacks. @match_callback_converted depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier t; @@ void _callback(struct timer_list *t) { ... } // callback(struct something *handle) @change_callback_handle_arg depends on change_timer_function_usage && !match_callback_converted && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; @@ void _callback( -_handletype *_handle +struct timer_list *t ) { + _handletype *_handle = from_timer(_handle, t, _timer); ... } // If change_callback_handle_arg ran on an empty function, remove // the added handler. @unchange_callback_handle_arg depends on change_timer_function_usage && change_callback_handle_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; identifier t; @@ void _callback(struct timer_list *t) { - _handletype *_handle = from_timer(_handle, t, _timer); } // We only want to refactor the setup_timer() data argument if we've found // the matching callback. This undoes changes in change_timer_function_usage. @unchange_timer_function_usage depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg && !change_callback_handle_arg@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type change_timer_function_usage._cast_data; @@ ( -timer_setup(&_E->_timer, _callback, 0); +setup_timer(&_E->_timer, _callback, (_cast_data)_E); | -timer_setup(&_E._timer, _callback, 0); +setup_timer(&_E._timer, _callback, (_cast_data)&_E); ) // If we fixed a callback from a .function assignment, fix the // assignment cast now. @change_timer_function_assignment depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_func; typedef TIMER_FUNC_TYPE; @@ ( _E->_timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -&_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)_callback; +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -&_callback; +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; ) // Sometimes timer functions are called directly. Replace matched args. @change_timer_function_calls depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression _E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_data; @@ _callback( ( -(_cast_data)_E +&_E->_timer | -(_cast_data)&_E +&_E._timer | -_E +&_E->_timer ) ) // If a timer has been configured without a data argument, it can be // converted without regard to the callback argument, since it is unused. @match_timer_function_unused_data@ expression _E; identifier _timer; identifier _callback; @@ ( -setup_timer(&_E->_timer, _callback, 0); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0L); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0UL); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0L); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0UL); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_timer, _callback, 0); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0L); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0UL); +timer_setup(&_timer, _callback, 0); | -setup_timer(_timer, _callback, 0); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0L); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0UL); +timer_setup(_timer, _callback, 0); ) @change_callback_unused_data depends on match_timer_function_unused_data@ identifier match_timer_function_unused_data._callback; type _origtype; identifier _origarg; @@ void _callback( -_origtype _origarg +struct timer_list *unused ) { ... when != _origarg } Signed-off-by: Kees Cook <keescook@chromium.org>
2017-10-17 04:43:17 +07:00
static void dn_dst_check_expire(struct timer_list *unused)
{
int i;
struct dn_route *rt;
struct dn_route __rcu **rtp;
unsigned long now = jiffies;
unsigned long expire = 120 * HZ;
for (i = 0; i <= dn_rt_hash_mask; i++) {
rtp = &dn_rt_hash_table[i].chain;
spin_lock(&dn_rt_hash_table[i].lock);
while ((rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
rtp = &rt->dn_next;
continue;
}
*rtp = rt->dn_next;
rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
spin_unlock(&dn_rt_hash_table[i].lock);
if ((jiffies - now) > 0)
break;
}
mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
}
static int dn_dst_gc(struct dst_ops *ops)
{
struct dn_route *rt;
struct dn_route __rcu **rtp;
int i;
unsigned long now = jiffies;
unsigned long expire = 10 * HZ;
for (i = 0; i <= dn_rt_hash_mask; i++) {
spin_lock_bh(&dn_rt_hash_table[i].lock);
rtp = &dn_rt_hash_table[i].chain;
while ((rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
if (atomic_read(&rt->dst.__refcnt) > 1 ||
(now - rt->dst.lastuse) < expire) {
rtp = &rt->dn_next;
continue;
}
*rtp = rt->dn_next;
rt->dn_next = NULL;
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
break;
}
spin_unlock_bh(&dn_rt_hash_table[i].lock);
}
return 0;
}
/*
* The decnet standards don't impose a particular minimum mtu, what they
* do insist on is that the routing layer accepts a datagram of at least
* 230 bytes long. Here we have to subtract the routing header length from
* 230 to get the minimum acceptable mtu. If there is no neighbour, then we
* assume the worst and use a long header size.
*
* We update both the mtu and the advertised mss (i.e. the segment size we
* advertise to the other end).
*/
static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
net: add bool confirm_neigh parameter for dst_ops.update_pmtu The MTU update code is supposed to be invoked in response to real networking events that update the PMTU. In IPv6 PMTU update function __ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor confirmed time. But for tunnel code, it will call pmtu before xmit, like: - tnl_update_pmtu() - skb_dst_update_pmtu() - ip6_rt_update_pmtu() - __ip6_rt_update_pmtu() - dst_confirm_neigh() If the tunnel remote dst mac address changed and we still do the neigh confirm, we will not be able to update neigh cache and ping6 remote will failed. So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we should not be invoking dst_confirm_neigh() as we have no evidence of successful two-way communication at this point. On the other hand it is also important to keep the neigh reachability fresh for TCP flows, so we cannot remove this dst_confirm_neigh() call. To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu to choose whether we should do neigh update or not. I will add the parameter in this patch and set all the callers to true to comply with the previous way, and fix the tunnel code one by one on later patches. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Suggested-by: David Miller <davem@davemloft.net> Reviewed-by: Guillaume Nault <gnault@redhat.com> Acked-by: David Ahern <dsahern@gmail.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-12-22 09:51:09 +07:00
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
struct dn_route *rt = (struct dn_route *) dst;
struct neighbour *n = rt->n;
u32 min_mtu = 230;
struct dn_dev *dn;
dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL;
if (dn && dn->use_long == 0)
min_mtu -= 6;
else
min_mtu -= 21;
if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
if (!(dst_metric_locked(dst, RTAX_MTU))) {
dst_metric_set(dst, RTAX_MTU, mtu);
dst_set_expires(dst, dn_rt_mtu_expires);
}
if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
if (!existing_mss || existing_mss > mss)
dst_metric_set(dst, RTAX_ADVMSS, mss);
}
}
}
static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb)
{
}
/*
* When a route has been marked obsolete. (e.g. routing cache flush)
*/
static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
{
return NULL;
}
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
{
dst_release(dst);
return NULL;
}
static void dn_dst_link_failure(struct sk_buff *skb)
{
}
static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
{
return ((fl1->daddr ^ fl2->daddr) |
(fl1->saddr ^ fl2->saddr) |
(fl1->flowidn_mark ^ fl2->flowidn_mark) |
(fl1->flowidn_scope ^ fl2->flowidn_scope) |
(fl1->flowidn_oif ^ fl2->flowidn_oif) |
(fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
}
static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp)
{
struct dn_route *rth;
struct dn_route __rcu **rthp;
unsigned long now = jiffies;
rthp = &dn_rt_hash_table[hash].chain;
spin_lock_bh(&dn_rt_hash_table[hash].lock);
while ((rth = rcu_dereference_protected(*rthp,
lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
if (compare_keys(&rth->fld, &rt->fld)) {
/* Put it first */
*rthp = rth->dn_next;
rcu_assign_pointer(rth->dn_next,
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
dst_hold_and_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
dst_release_immediate(&rt->dst);
*rp = rth;
return 0;
}
rthp = &rth->dn_next;
}
rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
dst_hold_and_use(&rt->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
*rp = rt;
return 0;
}
static void dn_run_flush(struct timer_list *unused)
{
int i;
struct dn_route *rt, *next;
for (i = 0; i < dn_rt_hash_mask; i++) {
spin_lock_bh(&dn_rt_hash_table[i].lock);
if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
goto nothing_to_declare;
for(; rt; rt = next) {
next = rcu_dereference_raw(rt->dn_next);
RCU_INIT_POINTER(rt->dn_next, NULL);
dst_dev_put(&rt->dst);
dst_release(&rt->dst);
}
nothing_to_declare:
spin_unlock_bh(&dn_rt_hash_table[i].lock);
}
}
static DEFINE_SPINLOCK(dn_rt_flush_lock);
void dn_rt_cache_flush(int delay)
{
unsigned long now = jiffies;
int user_mode = !in_interrupt();
if (delay < 0)
delay = dn_rt_min_delay;
spin_lock_bh(&dn_rt_flush_lock);
if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
long tmo = (long)(dn_rt_deadline - now);
if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
tmo = 0;
if (delay > tmo)
delay = tmo;
}
if (delay <= 0) {
spin_unlock_bh(&dn_rt_flush_lock);
dn_run_flush(NULL);
return;
}
if (dn_rt_deadline == 0)
dn_rt_deadline = now + dn_rt_max_delay;
dn_rt_flush_timer.expires = now + delay;
add_timer(&dn_rt_flush_timer);
spin_unlock_bh(&dn_rt_flush_lock);
}
/**
* dn_return_short - Return a short packet to its sender
* @skb: The packet to return
*
*/
static int dn_return_short(struct sk_buff *skb)
{
struct dn_skb_cb *cb;
unsigned char *ptr;
__le16 *src;
__le16 *dst;
/* Add back headers */
skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
cb = DN_SKB_CB(skb);
/* Skip packet length and point to flags */
ptr = skb->data + 2;
*ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
dst = (__le16 *)ptr;
ptr += 2;
src = (__le16 *)ptr;
ptr += 2;
*ptr = 0; /* Zero hop count */
swap(*src, *dst);
skb->pkt_type = PACKET_OUTGOING;
dn_rt_finish_output(skb, NULL, NULL);
return NET_RX_SUCCESS;
}
/**
* dn_return_long - Return a long packet to its sender
* @skb: The long format packet to return
*
*/
static int dn_return_long(struct sk_buff *skb)
{
struct dn_skb_cb *cb;
unsigned char *ptr;
unsigned char *src_addr, *dst_addr;
unsigned char tmp[ETH_ALEN];
/* Add back all headers */
skb_push(skb, skb->data - skb_network_header(skb));
if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
return NET_RX_DROP;
cb = DN_SKB_CB(skb);
/* Ignore packet length and point to flags */
ptr = skb->data + 2;
/* Skip padding */
if (*ptr & DN_RT_F_PF) {
char padlen = (*ptr & ~DN_RT_F_PF);
ptr += padlen;
}
*ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
ptr += 2;
dst_addr = ptr;
ptr += 8;
src_addr = ptr;
ptr += 6;
*ptr = 0; /* Zero hop count */
/* Swap source and destination */
memcpy(tmp, src_addr, ETH_ALEN);
memcpy(src_addr, dst_addr, ETH_ALEN);
memcpy(dst_addr, tmp, ETH_ALEN);
skb->pkt_type = PACKET_OUTGOING;
dn_rt_finish_output(skb, dst_addr, src_addr);
return NET_RX_SUCCESS;
}
/**
* dn_route_rx_packet - Try and find a route for an incoming packet
* @net: The applicable net namespace
* @sk: Socket packet transmitted on
* @skb: The packet to find a route for
*
* Returns: result of input function if route is found, error code otherwise
*/
static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb;
int err;
if ((err = dn_route_input(skb)) == 0)
return dst_input(skb);
cb = DN_SKB_CB(skb);
if (decnet_debug_level & 4) {
char *devname = skb->dev ? skb->dev->name : "???";
printk(KERN_DEBUG
"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
(int)cb->rt_flags, devname, skb->len,
le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
err, skb->pkt_type);
}
if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
switch (cb->rt_flags & DN_RT_PKT_MSK) {
case DN_RT_PKT_SHORT:
return dn_return_short(skb);
case DN_RT_PKT_LONG:
return dn_return_long(skb);
}
}
kfree_skb(skb);
return NET_RX_DROP;
}
static int dn_route_rx_long(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
unsigned char *ptr = skb->data;
if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
goto drop_it;
skb_pull(skb, 20);
skb_reset_transport_header(skb);
/* Destination info */
ptr += 2;
cb->dst = dn_eth2dn(ptr);
if (memcmp(ptr, dn_hiord_addr, 4) != 0)
goto drop_it;
ptr += 6;
/* Source info */
ptr += 2;
cb->src = dn_eth2dn(ptr);
if (memcmp(ptr, dn_hiord_addr, 4) != 0)
goto drop_it;
ptr += 6;
/* Other junk */
ptr++;
cb->hops = *ptr++; /* Visit Count */
2015-09-16 08:04:16 +07:00
return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
&init_net, NULL, skb, skb->dev, NULL,
dn_route_rx_packet);
drop_it:
kfree_skb(skb);
return NET_RX_DROP;
}
static int dn_route_rx_short(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
unsigned char *ptr = skb->data;
if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
goto drop_it;
skb_pull(skb, 5);
skb_reset_transport_header(skb);
cb->dst = *(__le16 *)ptr;
ptr += 2;
cb->src = *(__le16 *)ptr;
ptr += 2;
cb->hops = *ptr & 0x3f;
2015-09-16 08:04:16 +07:00
return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
&init_net, NULL, skb, skb->dev, NULL,
dn_route_rx_packet);
drop_it:
kfree_skb(skb);
return NET_RX_DROP;
}
static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
{
/*
* I know we drop the packet here, but thats considered success in
* this case
*/
kfree_skb(skb);
return NET_RX_SUCCESS;
}
static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
{
dn_dev_hello(skb);
dn_neigh_pointopoint_hello(skb);
return NET_RX_SUCCESS;
}
int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct dn_skb_cb *cb;
unsigned char flags = 0;
__u16 len = le16_to_cpu(*(__le16 *)skb->data);
struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
unsigned char padlen = 0;
if (!net_eq(dev_net(dev), &init_net))
goto dump_it;
if (dn == NULL)
goto dump_it;
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto out;
if (!pskb_may_pull(skb, 3))
goto dump_it;
skb_pull(skb, 2);
if (len > skb->len)
goto dump_it;
skb_trim(skb, len);
flags = *skb->data;
cb = DN_SKB_CB(skb);
cb->stamp = jiffies;
cb->iif = dev->ifindex;
/*
* If we have padding, remove it.
*/
if (flags & DN_RT_F_PF) {
padlen = flags & ~DN_RT_F_PF;
if (!pskb_may_pull(skb, padlen + 1))
goto dump_it;
skb_pull(skb, padlen);
flags = *skb->data;
}
skb_reset_network_header(skb);
/*
* Weed out future version DECnet
*/
if (flags & DN_RT_F_VER)
goto dump_it;
cb->rt_flags = flags;
if (decnet_debug_level & 1)
printk(KERN_DEBUG
"dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
(int)flags, dev->name, len, skb->len,
padlen);
if (flags & DN_RT_PKT_CNTL) {
if (unlikely(skb_linearize(skb)))
goto dump_it;
switch (flags & DN_RT_CNTL_MSK) {
case DN_RT_PKT_INIT:
dn_dev_init_pkt(skb);
break;
case DN_RT_PKT_VERI:
dn_dev_veri_pkt(skb);
break;
}
if (dn->parms.state != DN_DEV_S_RU)
goto dump_it;
switch (flags & DN_RT_CNTL_MSK) {
case DN_RT_PKT_HELO:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
2015-09-16 08:04:16 +07:00
&init_net, NULL, skb, skb->dev, NULL,
dn_route_ptp_hello);
case DN_RT_PKT_L1RT:
case DN_RT_PKT_L2RT:
return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
2015-09-16 08:04:16 +07:00
&init_net, NULL, skb, skb->dev, NULL,
dn_route_discard);
case DN_RT_PKT_ERTH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
2015-09-16 08:04:16 +07:00
&init_net, NULL, skb, skb->dev, NULL,
dn_neigh_router_hello);
case DN_RT_PKT_EEDH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
2015-09-16 08:04:16 +07:00
&init_net, NULL, skb, skb->dev, NULL,
dn_neigh_endnode_hello);
}
} else {
if (dn->parms.state != DN_DEV_S_RU)
goto dump_it;
skb_pull(skb, 1); /* Pull flags */
switch (flags & DN_RT_PKT_MSK) {
case DN_RT_PKT_LONG:
return dn_route_rx_long(skb);
case DN_RT_PKT_SHORT:
return dn_route_rx_short(skb);
}
}
dump_it:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
struct net_device *dev = dst->dev;
struct dn_skb_cb *cb = DN_SKB_CB(skb);
int err = -EINVAL;
if (rt->n == NULL)
goto error;
skb->dev = dev;
cb->src = rt->rt_saddr;
cb->dst = rt->rt_daddr;
/*
* Always set the Intra-Ethernet bit on all outgoing packets
* originated on this node. Only valid flag from upper layers
* is return-to-sender-requested. Set hop count to 0 too.
*/
cb->rt_flags &= ~DN_RT_F_RQR;
cb->rt_flags |= DN_RT_F_IE;
cb->hops = 0;
2015-09-16 08:04:16 +07:00
return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
&init_net, sk, skb, NULL, dev,
dn_to_neigh_output);
error:
net_dbg_ratelimited("dn_output: This should not happen\n");
kfree_skb(skb);
return err;
}
static int dn_forward(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
struct dst_entry *dst = skb_dst(skb);
struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
struct dn_route *rt;
int header_len;
struct net_device *dev = skb->dev;
if (skb->pkt_type != PACKET_HOST)
goto drop;
/* Ensure that we have enough space for headers */
rt = (struct dn_route *)skb_dst(skb);
header_len = dn_db->use_long ? 21 : 6;
if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
goto drop;
/*
* Hop count exceeded.
*/
if (++cb->hops > 30)
goto drop;
skb->dev = rt->dst.dev;
/*
* If packet goes out same interface it came in on, then set
* the Intra-Ethernet bit. This has no effect for short
* packets, so we don't need to test for them here.
*/
cb->rt_flags &= ~DN_RT_F_IE;
if (rt->rt_flags & RTCF_DOREDIRECT)
cb->rt_flags |= DN_RT_F_IE;
2015-09-16 08:04:16 +07:00
return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
&init_net, NULL, skb, dev, skb->dev,
dn_to_neigh_output);
drop:
kfree_skb(skb);
return NET_RX_DROP;
}
/*
* Used to catch bugs. This should never normally get
* called.
*/
static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
kfree_skb(skb);
return NET_RX_DROP;
}
static int dn_rt_bug(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
kfree_skb(skb);
return NET_RX_DROP;
}
static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
{
return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
}
static unsigned int dn_dst_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
return mtu ? : dst->dev->mtu;
}
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
}
static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
{
struct dn_fib_info *fi = res->fi;
struct net_device *dev = rt->dst.dev;
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
unsigned int mss_metric;
struct neighbour *n;
if (fi) {
if (DN_FIB_RES_GW(*res) &&
DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = DN_FIB_RES_GW(*res);
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
rt->rt_type = res->type;
if (dev != NULL && rt->n == NULL) {
n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
if (IS_ERR(n))
return PTR_ERR(n);
rt->n = n;
}
if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
if (mss_metric) {
unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-01-27 11:51:05 +07:00
if (mss_metric > mss)
dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
}
return 0;
}
static inline int dn_match_addr(__le16 addr1, __le16 addr2)
{
__u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
int match = 16;
while(tmp) {
tmp >>= 1;
match--;
}
return match;
}
static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
{
__le16 saddr = 0;
struct dn_dev *dn_db;
struct dn_ifaddr *ifa;
int best_match = 0;
int ret;
rcu_read_lock();
dn_db = rcu_dereference(dev->dn_ptr);
for (ifa = rcu_dereference(dn_db->ifa_list);
ifa != NULL;
ifa = rcu_dereference(ifa->ifa_next)) {
if (ifa->ifa_scope > scope)
continue;
if (!daddr) {
saddr = ifa->ifa_local;
break;
}
ret = dn_match_addr(daddr, ifa->ifa_local);
if (ret > best_match)
saddr = ifa->ifa_local;
if (best_match == 0)
saddr = ifa->ifa_local;
}
rcu_read_unlock();
return saddr;
}
static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
{
return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
}
static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res)
{
__le16 mask = dnet_make_mask(res->prefixlen);
return (daddr&~mask)|res->fi->fib_nh->nh_gw;
}
static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
{
struct flowidn fld = {
.daddr = oldflp->daddr,
.saddr = oldflp->saddr,
.flowidn_scope = RT_SCOPE_UNIVERSE,
.flowidn_mark = oldflp->flowidn_mark,
.flowidn_iif = LOOPBACK_IFINDEX,
.flowidn_oif = oldflp->flowidn_oif,
};
struct dn_route *rt = NULL;
struct net_device *dev_out = NULL, *dev;
struct neighbour *neigh = NULL;
unsigned int hash;
unsigned int flags = 0;
struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
int err;
int free_res = 0;
__le16 gateway = 0;
if (decnet_debug_level & 16)
printk(KERN_DEBUG
"dn_route_output_slow: dst=%04x src=%04x mark=%d"
" iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
le16_to_cpu(oldflp->saddr),
oldflp->flowidn_mark, LOOPBACK_IFINDEX,
oldflp->flowidn_oif);
/* If we have an output interface, verify its a DECnet device */
if (oldflp->flowidn_oif) {
dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
err = -ENODEV;
if (dev_out && dev_out->dn_ptr == NULL) {
dev_put(dev_out);
dev_out = NULL;
}
if (dev_out == NULL)
goto out;
}
/* If we have a source address, verify that its a local address */
if (oldflp->saddr) {
err = -EADDRNOTAVAIL;
if (dev_out) {
if (dn_dev_islocal(dev_out, oldflp->saddr))
goto source_ok;
dev_put(dev_out);
goto out;
}
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev) {
if (!dev->dn_ptr)
continue;
if (!dn_dev_islocal(dev, oldflp->saddr))
continue;
if ((dev->flags & IFF_LOOPBACK) &&
oldflp->daddr &&
!dn_dev_islocal(dev, oldflp->daddr))
continue;
dev_out = dev;
break;
}
rcu_read_unlock();
if (dev_out == NULL)
goto out;
dev_hold(dev_out);
source_ok:
;
}
/* No destination? Assume its local */
if (!fld.daddr) {
fld.daddr = fld.saddr;
if (dev_out)
dev_put(dev_out);
err = -EINVAL;
dev_out = init_net.loopback_dev;
if (!dev_out->dn_ptr)
goto out;
err = -EADDRNOTAVAIL;
dev_hold(dev_out);
if (!fld.daddr) {
fld.daddr =
fld.saddr = dnet_select_source(dev_out, 0,
RT_SCOPE_HOST);
if (!fld.daddr)
goto out;
}
fld.flowidn_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
goto make_route;
}
if (decnet_debug_level & 16)
printk(KERN_DEBUG
"dn_route_output_slow: initial checks complete."
" dst=%04x src=%04x oif=%d try_hard=%d\n",
le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
fld.flowidn_oif, try_hard);
/*
* N.B. If the kernel is compiled without router support then
* dn_fib_lookup() will evaluate to non-zero so this if () block
* will always be executed.
*/
err = -ESRCH;
if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
struct dn_dev *dn_db;
if (err != -ESRCH)
goto out;
/*
* Here the fallback is basically the standard algorithm for
* routing in endnodes which is described in the DECnet routing
* docs
*
* If we are not trying hard, look in neighbour cache.
* The result is tested to ensure that if a specific output
* device/source address was requested, then we honour that
* here
*/
if (!try_hard) {
neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
if (neigh) {
if ((oldflp->flowidn_oif &&
(neigh->dev->ifindex != oldflp->flowidn_oif)) ||
(oldflp->saddr &&
(!dn_dev_islocal(neigh->dev,
oldflp->saddr)))) {
neigh_release(neigh);
neigh = NULL;
} else {
if (dev_out)
dev_put(dev_out);
if (dn_dev_islocal(neigh->dev, fld.daddr)) {
dev_out = init_net.loopback_dev;
res.type = RTN_LOCAL;
} else {
dev_out = neigh->dev;
}
dev_hold(dev_out);
goto select_source;
}
}
}
/* Not there? Perhaps its a local address */
if (dev_out == NULL)
dev_out = dn_dev_get_default();
err = -ENODEV;
if (dev_out == NULL)
goto out;
dn_db = rcu_dereference_raw(dev_out->dn_ptr);
if (!dn_db)
goto e_inval;
/* Possible improvement - check all devices for local addr */
if (dn_dev_islocal(dev_out, fld.daddr)) {
dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
res.type = RTN_LOCAL;
goto select_source;
}
/* Not local either.... try sending it to the default router */
neigh = neigh_clone(dn_db->router);
BUG_ON(neigh && neigh->dev != dev_out);
/* Ok then, we assume its directly connected and move on */
select_source:
if (neigh)
gateway = ((struct dn_neigh *)neigh)->addr;
if (gateway == 0)
gateway = fld.daddr;
if (fld.saddr == 0) {
fld.saddr = dnet_select_source(dev_out, gateway,
res.type == RTN_LOCAL ?
RT_SCOPE_HOST :
RT_SCOPE_LINK);
if (fld.saddr == 0 && res.type != RTN_LOCAL)
goto e_addr;
}
fld.flowidn_oif = dev_out->ifindex;
goto make_route;
}
free_res = 1;
if (res.type == RTN_NAT)
goto e_inval;
if (res.type == RTN_LOCAL) {
if (!fld.saddr)
fld.saddr = fld.daddr;
if (dev_out)
dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
if (!dev_out->dn_ptr)
goto e_inval;
fld.flowidn_oif = dev_out->ifindex;
if (res.fi)
dn_fib_info_put(res.fi);
res.fi = NULL;
goto make_route;
}
if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
dn_fib_select_multipath(&fld, &res);
/*
* We could add some logic to deal with default routes here and
* get rid of some of the special casing above.
*/
if (!fld.saddr)
fld.saddr = DN_FIB_RES_PREFSRC(res);
if (dev_out)
dev_put(dev_out);
dev_out = DN_FIB_RES_DEV(res);
dev_hold(dev_out);
fld.flowidn_oif = dev_out->ifindex;
gateway = DN_FIB_RES_GW(res);
make_route:
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, 0);
if (rt == NULL)
goto e_nobufs;
rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->fld.saddr = oldflp->saddr;
rt->fld.daddr = oldflp->daddr;
rt->fld.flowidn_oif = oldflp->flowidn_oif;
rt->fld.flowidn_iif = 0;
rt->fld.flowidn_mark = oldflp->flowidn_mark;
rt->rt_saddr = fld.saddr;
rt->rt_daddr = fld.daddr;
rt->rt_gateway = gateway ? gateway : fld.daddr;
rt->rt_local_src = fld.saddr;
rt->rt_dst_map = fld.daddr;
rt->rt_src_map = fld.saddr;
rt->n = neigh;
neigh = NULL;
rt->dst.lastuse = jiffies;
rt->dst.output = dn_output;
rt->dst.input = dn_rt_bug;
rt->rt_flags = flags;
if (flags & RTCF_LOCAL)
rt->dst.input = dn_nsp_rx;
err = dn_rt_set_next_hop(rt, &res);
if (err)
goto e_neighbour;
hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
/* dn_insert_route() increments dst->__refcnt */
dn_insert_route(rt, hash, (struct dn_route **)pprt);
done:
if (neigh)
neigh_release(neigh);
if (free_res)
dn_fib_res_put(&res);
if (dev_out)
dev_put(dev_out);
out:
return err;
e_addr:
err = -EADDRNOTAVAIL;
goto done;
e_inval:
err = -EINVAL;
goto done;
e_nobufs:
err = -ENOBUFS;
goto done;
e_neighbour:
dst_release_immediate(&rt->dst);
goto e_nobufs;
}
/*
* N.B. The flags may be moved into the flowi at some future stage.
*/
static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
{
unsigned int hash = dn_hash(flp->saddr, flp->daddr);
struct dn_route *rt = NULL;
if (!(flags & MSG_TRYHARD)) {
rcu_read_lock_bh();
for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
rt = rcu_dereference_bh(rt->dn_next)) {
if ((flp->daddr == rt->fld.daddr) &&
(flp->saddr == rt->fld.saddr) &&
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
dn_is_output_route(rt) &&
(rt->fld.flowidn_oif == flp->flowidn_oif)) {
dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
*pprt = &rt->dst;
return 0;
}
}
rcu_read_unlock_bh();
}
return dn_route_output_slow(pprt, flp, flags);
}
static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
{
int err;
err = __dn_route_output_key(pprt, flp, flags);
if (err == 0 && flp->flowidn_proto) {
*pprt = xfrm_lookup(&init_net, *pprt,
flowidn_to_flowi(flp), NULL, 0);
if (IS_ERR(*pprt)) {
err = PTR_ERR(*pprt);
*pprt = NULL;
}
}
return err;
}
int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
{
int err;
err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
if (err == 0 && fl->flowidn_proto) {
*pprt = xfrm_lookup(&init_net, *pprt,
flowidn_to_flowi(fl), sk, 0);
if (IS_ERR(*pprt)) {
err = PTR_ERR(*pprt);
*pprt = NULL;
}
}
return err;
}
static int dn_route_input_slow(struct sk_buff *skb)
{
struct dn_route *rt = NULL;
struct dn_skb_cb *cb = DN_SKB_CB(skb);
struct net_device *in_dev = skb->dev;
struct net_device *out_dev = NULL;
struct dn_dev *dn_db;
struct neighbour *neigh = NULL;
unsigned int hash;
int flags = 0;
__le16 gateway = 0;
__le16 local_src = 0;
struct flowidn fld = {
.daddr = cb->dst,
.saddr = cb->src,
.flowidn_scope = RT_SCOPE_UNIVERSE,
.flowidn_mark = skb->mark,
.flowidn_iif = skb->dev->ifindex,
};
struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
int err = -EINVAL;
int free_res = 0;
dev_hold(in_dev);
if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
goto out;
/* Zero source addresses are not allowed */
if (fld.saddr == 0)
goto out;
/*
* In this case we've just received a packet from a source
* outside ourselves pretending to come from us. We don't
* allow it any further to prevent routing loops, spoofing and
* other nasties. Loopback packets already have the dst attached
* so this only affects packets which have originated elsewhere.
*/
err = -ENOTUNIQ;
if (dn_dev_islocal(in_dev, cb->src))
goto out;
err = dn_fib_lookup(&fld, &res);
if (err) {
if (err != -ESRCH)
goto out;
/*
* Is the destination us ?
*/
if (!dn_dev_islocal(in_dev, cb->dst))
goto e_inval;
res.type = RTN_LOCAL;
} else {
__le16 src_map = fld.saddr;
free_res = 1;
out_dev = DN_FIB_RES_DEV(res);
if (out_dev == NULL) {
net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n");
goto e_inval;
}
dev_hold(out_dev);
if (res.r)
src_map = fld.saddr; /* no NAT support for now */
gateway = DN_FIB_RES_GW(res);
if (res.type == RTN_NAT) {
fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
dn_fib_res_put(&res);
free_res = 0;
if (dn_fib_lookup(&fld, &res))
goto e_inval;
free_res = 1;
if (res.type != RTN_UNICAST)
goto e_inval;
flags |= RTCF_DNAT;
gateway = fld.daddr;
}
fld.saddr = src_map;
}
switch(res.type) {
case RTN_UNICAST:
/*
* Forwarding check here, we only check for forwarding
* being turned off, if you want to only forward intra
* area, its up to you to set the routing tables up
* correctly.
*/
if (dn_db->parms.forwarding == 0)
goto e_inval;
if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
dn_fib_select_multipath(&fld, &res);
/*
* Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
* flag as a hint to set the intra-ethernet bit when
* forwarding. If we've got NAT in operation, we don't do
* this optimisation.
*/
if (out_dev == in_dev && !(flags & RTCF_NAT))
flags |= RTCF_DOREDIRECT;
local_src = DN_FIB_RES_PREFSRC(res);
case RTN_BLACKHOLE:
case RTN_UNREACHABLE:
break;
case RTN_LOCAL:
flags |= RTCF_LOCAL;
fld.saddr = cb->dst;
fld.daddr = cb->src;
/* Routing tables gave us a gateway */
if (gateway)
goto make_route;
/* Packet was intra-ethernet, so we know its on-link */
if (cb->rt_flags & DN_RT_F_IE) {
gateway = cb->src;
goto make_route;
}
/* Use the default router if there is one */
neigh = neigh_clone(dn_db->router);
if (neigh) {
gateway = ((struct dn_neigh *)neigh)->addr;
goto make_route;
}
/* Close eyes and pray */
gateway = cb->src;
goto make_route;
default:
goto e_inval;
}
make_route:
rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, 0);
if (rt == NULL)
goto e_nobufs;
rt->dn_next = NULL;
memset(&rt->fld, 0, sizeof(rt->fld));
rt->rt_saddr = fld.saddr;
rt->rt_daddr = fld.daddr;
rt->rt_gateway = fld.daddr;
if (gateway)
rt->rt_gateway = gateway;
rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
rt->rt_dst_map = fld.daddr;
rt->rt_src_map = fld.saddr;
rt->fld.saddr = cb->src;
rt->fld.daddr = cb->dst;
rt->fld.flowidn_oif = 0;
rt->fld.flowidn_iif = in_dev->ifindex;
rt->fld.flowidn_mark = fld.flowidn_mark;
rt->n = neigh;
rt->dst.lastuse = jiffies;
rt->dst.output = dn_rt_bug_out;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
break;
case RTN_LOCAL:
rt->dst.output = dn_output;
rt->dst.input = dn_nsp_rx;
rt->dst.dev = in_dev;
flags |= RTCF_LOCAL;
break;
default:
case RTN_UNREACHABLE:
case RTN_BLACKHOLE:
rt->dst.input = dst_discard;
}
rt->rt_flags = flags;
err = dn_rt_set_next_hop(rt, &res);
if (err)
goto e_neighbour;
hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
/* dn_insert_route() increments dst->__refcnt */
dn_insert_route(rt, hash, &rt);
skb_dst_set(skb, &rt->dst);
done:
if (neigh)
neigh_release(neigh);
if (free_res)
dn_fib_res_put(&res);
dev_put(in_dev);
if (out_dev)
dev_put(out_dev);
out:
return err;
e_inval:
err = -EINVAL;
goto done;
e_nobufs:
err = -ENOBUFS;
goto done;
e_neighbour:
dst_release_immediate(&rt->dst);
goto done;
}
static int dn_route_input(struct sk_buff *skb)
{
struct dn_route *rt;
struct dn_skb_cb *cb = DN_SKB_CB(skb);
unsigned int hash = dn_hash(cb->src, cb->dst);
if (skb_dst(skb))
return 0;
rcu_read_lock();
for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
rt = rcu_dereference(rt->dn_next)) {
if ((rt->fld.saddr == cb->src) &&
(rt->fld.daddr == cb->dst) &&
(rt->fld.flowidn_oif == 0) &&
(rt->fld.flowidn_mark == skb->mark) &&
(rt->fld.flowidn_iif == cb->iif)) {
dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock();
skb_dst_set(skb, (struct dst_entry *)rt);
return 0;
}
}
rcu_read_unlock();
return dn_route_input_slow(skb);
}
static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
int event, int nowait, unsigned int flags)
{
struct dn_route *rt = (struct dn_route *)skb_dst(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
long expires;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
if (!nlh)
return -EMSGSIZE;
r = nlmsg_data(nlh);
r->rtm_family = AF_DECnet;
r->rtm_dst_len = 16;
r->rtm_src_len = 0;
r->rtm_tos = 0;
r->rtm_table = RT_TABLE_MAIN;
r->rtm_type = rt->rt_type;
r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
r->rtm_scope = RT_SCOPE_UNIVERSE;
r->rtm_protocol = RTPROT_UNSPEC;
if (rt->rt_flags & RTCF_NOTIFY)
r->rtm_flags |= RTM_F_NOTIFY;
if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 ||
nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0)
goto errout;
if (rt->fld.saddr) {
r->rtm_src_len = 16;
if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0)
goto errout;
}
if (rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0)
goto errout;
/*
* Note to self - change this if input routes reverse direction when
* they deal only with inputs and not with replies like they do
* currently.
*/
if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0)
goto errout;
if (rt->rt_daddr != rt->rt_gateway &&
nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0)
goto errout;
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto errout;
expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
rt->dst.error) < 0)
goto errout;
if (dn_is_input_route(rt) &&
nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
goto errout;
netlink: make nlmsg_end() and genlmsg_end() void Contrary to common expectations for an "int" return, these functions return only a positive value -- if used correctly they cannot even return 0 because the message header will necessarily be in the skb. This makes the very common pattern of if (genlmsg_end(...) < 0) { ... } be a whole bunch of dead code. Many places also simply do return nlmsg_end(...); and the caller is expected to deal with it. This also commonly (at least for me) causes errors, because it is very common to write if (my_function(...)) /* error condition */ and if my_function() does "return nlmsg_end()" this is of course wrong. Additionally, there's not a single place in the kernel that actually needs the message length returned, and if anyone needs it later then it'll be very easy to just use skb->len there. Remove this, and make the functions void. This removes a bunch of dead code as described above. The patch adds lines because I did - return nlmsg_end(...); + nlmsg_end(...); + return 0; I could have preserved all the function's return values by returning skb->len, but instead I've audited all the places calling the affected functions and found that none cared. A few places actually compared the return value with <= 0 in dump functionality, but that could just be changed to < 0 with no change in behaviour, so I opted for the more efficient version. One instance of the error I've made numerous times now is also present in net/phonet/pn_netlink.c in the route_dumpit() function - it didn't check for <0 or <=0 and thus broke out of the loop every single time. I've preserved this since it will (I think) have caused the messages to userspace to be formatted differently with just a single message for every SKB returned to userspace. It's possible that this isn't needed for the tools that actually use this, but I don't even know what they are so couldn't test that changing this behaviour would be acceptable. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-01-17 04:09:00 +07:00
nlmsg_end(skb, nlh);
return 0;
errout:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
[RTA_DST] = { .type = NLA_U16 },
[RTA_SRC] = { .type = NLA_U16 },
[RTA_IIF] = { .type = NLA_U32 },
[RTA_OIF] = { .type = NLA_U32 },
[RTA_GATEWAY] = { .type = NLA_U16 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_PREFSRC] = { .type = NLA_U16 },
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .type = NLA_NESTED },
[RTA_TABLE] = { .type = NLA_U32 },
[RTA_MARK] = { .type = NLA_U32 },
};
/*
* This is called by both endnodes and routers now.
*/
static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm = nlmsg_data(nlh);
struct dn_route *rt = NULL;
struct dn_skb_cb *cb;
int err;
struct sk_buff *skb;
struct flowidn fld;
struct nlattr *tb[RTA_MAX+1];
if (!net_eq(net, &init_net))
return -EINVAL;
netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 19:07:28 +07:00
err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_dn_policy, extack);
if (err < 0)
return err;
memset(&fld, 0, sizeof(fld));
fld.flowidn_proto = DNPROTO_NSP;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb == NULL)
return -ENOBUFS;
skb_reset_mac_header(skb);
cb = DN_SKB_CB(skb);
if (tb[RTA_SRC])
fld.saddr = nla_get_le16(tb[RTA_SRC]);
if (tb[RTA_DST])
fld.daddr = nla_get_le16(tb[RTA_DST]);
if (tb[RTA_IIF])
fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
if (fld.flowidn_iif) {
struct net_device *dev;
dev = __dev_get_by_index(&init_net, fld.flowidn_iif);
if (!dev || !dev->dn_ptr) {
kfree_skb(skb);
return -ENODEV;
}
skb->protocol = htons(ETH_P_DNA_RT);
skb->dev = dev;
cb->src = fld.saddr;
cb->dst = fld.daddr;
local_bh_disable();
err = dn_route_input(skb);
local_bh_enable();
memset(cb, 0, sizeof(struct dn_skb_cb));
rt = (struct dn_route *)skb_dst(skb);
if (!err && -rt->dst.error)
err = rt->dst.error;
} else {
if (tb[RTA_OIF])
fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
}
skb->dev = NULL;
if (err)
goto out_free;
skb_dst_set(skb, &rt->dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
if (err < 0) {
err = -EMSGSIZE;
goto out_free;
}
return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
out_free:
kfree_skb(skb);
return err;
}
/*
* For routers, this is called from dn_fib_dump, but for endnodes its
* called directly from the rtnetlink dispatch table.
*/
int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct dn_route *rt;
int h, s_h;
int idx, s_idx;
struct rtmsg *rtm;
if (!net_eq(net, &init_net))
return 0;
if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg))
return -EINVAL;
rtm = nlmsg_data(cb->nlh);
if (!(rtm->rtm_flags & RTM_F_CLONED))
return 0;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
for(h = 0; h <= dn_rt_hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
rcu_read_lock_bh();
for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
rt;
rt = rcu_dereference_bh(rt->dn_next), idx++) {
if (idx < s_idx)
continue;
skb_dst_set(skb, dst_clone(&rt->dst));
if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1, NLM_F_MULTI) < 0) {
skb_dst_drop(skb);
rcu_read_unlock_bh();
goto done;
}
skb_dst_drop(skb);
}
rcu_read_unlock_bh();
}
done:
cb->args[0] = h;
cb->args[1] = idx;
return skb->len;
}
#ifdef CONFIG_PROC_FS
struct dn_rt_cache_iter_state {
int bucket;
};
static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
{
struct dn_route *rt = NULL;
struct dn_rt_cache_iter_state *s = seq->private;
for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
rcu_read_lock_bh();
rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
if (rt)
break;
rcu_read_unlock_bh();
}
return rt;
}
static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
{
struct dn_rt_cache_iter_state *s = seq->private;
rt = rcu_dereference_bh(rt->dn_next);
while (!rt) {
rcu_read_unlock_bh();
if (--s->bucket < 0)
break;
rcu_read_lock_bh();
rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
}
return rt;
}
static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
struct dn_route *rt = dn_rt_cache_get_first(seq);
if (rt) {
while(*pos && (rt = dn_rt_cache_get_next(seq, rt)))
--*pos;
}
return *pos ? NULL : rt;
}
static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct dn_route *rt = dn_rt_cache_get_next(seq, v);
++*pos;
return rt;
}
static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
{
if (v)
rcu_read_unlock_bh();
}
static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
{
struct dn_route *rt = v;
char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
rt->dst.dev ? rt->dst.dev->name : "*",
dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
atomic_read(&rt->dst.__refcnt),
rt->dst.__use, 0);
return 0;
}
static const struct seq_operations dn_rt_cache_seq_ops = {
.start = dn_rt_cache_seq_start,
.next = dn_rt_cache_seq_next,
.stop = dn_rt_cache_seq_stop,
.show = dn_rt_cache_seq_show,
};
#endif /* CONFIG_PROC_FS */
void __init dn_route_init(void)
{
int i, goal, order;
dn_dst_ops.kmem_cachep =
kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
dst_entries_init(&dn_dst_ops);
treewide: setup_timer() -> timer_setup() This converts all remaining cases of the old setup_timer() API into using timer_setup(), where the callback argument is the structure already holding the struct timer_list. These should have no behavioral changes, since they just change which pointer is passed into the callback with the same available pointers after conversion. It handles the following examples, in addition to some other variations. Casting from unsigned long: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... setup_timer(&ptr->my_timer, my_callback, ptr); and forced object casts: void my_callback(struct something *ptr) { ... } ... setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr); become: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... timer_setup(&ptr->my_timer, my_callback, 0); Direct function assignments: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... ptr->my_timer.function = my_callback; have a temporary cast added, along with converting the args: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback; And finally, callbacks without a data assignment: void my_callback(unsigned long data) { ... } ... setup_timer(&ptr->my_timer, my_callback, 0); have their argument renamed to verify they're unused during conversion: void my_callback(struct timer_list *unused) { ... } ... timer_setup(&ptr->my_timer, my_callback, 0); The conversion is done with the following Coccinelle script: spatch --very-quiet --all-includes --include-headers \ -I ./arch/x86/include -I ./arch/x86/include/generated \ -I ./include -I ./arch/x86/include/uapi \ -I ./arch/x86/include/generated/uapi -I ./include/uapi \ -I ./include/generated/uapi --include ./include/linux/kconfig.h \ --dir . \ --cocci-file ~/src/data/timer_setup.cocci @fix_address_of@ expression e; @@ setup_timer( -&(e) +&e , ...) // Update any raw setup_timer() usages that have a NULL callback, but // would otherwise match change_timer_function_usage, since the latter // will update all function assignments done in the face of a NULL // function initialization in setup_timer(). @change_timer_function_usage_NULL@ expression _E; identifier _timer; type _cast_data; @@ ( -setup_timer(&_E->_timer, NULL, _E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E->_timer, NULL, (_cast_data)_E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E._timer, NULL, &_E); +timer_setup(&_E._timer, NULL, 0); | -setup_timer(&_E._timer, NULL, (_cast_data)&_E); +timer_setup(&_E._timer, NULL, 0); ) @change_timer_function_usage@ expression _E; identifier _timer; struct timer_list _stl; identifier _callback; type _cast_func, _cast_data; @@ ( -setup_timer(&_E->_timer, _callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | _E->_timer@_stl.function = _callback; | _E->_timer@_stl.function = &_callback; | _E->_timer@_stl.function = (_cast_func)_callback; | _E->_timer@_stl.function = (_cast_func)&_callback; | _E._timer@_stl.function = _callback; | _E._timer@_stl.function = &_callback; | _E._timer@_stl.function = (_cast_func)_callback; | _E._timer@_stl.function = (_cast_func)&_callback; ) // callback(unsigned long arg) @change_callback_handle_cast depends on change_timer_function_usage@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; identifier _handle; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { ( ... when != _origarg _handletype *_handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg ) } // callback(unsigned long arg) without existing variable @change_callback_handle_cast_no_arg depends on change_timer_function_usage && !change_callback_handle_cast@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { + _handletype *_origarg = from_timer(_origarg, t, _timer); + ... when != _origarg - (_handletype *)_origarg + _origarg ... when != _origarg } // Avoid already converted callbacks. @match_callback_converted depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier t; @@ void _callback(struct timer_list *t) { ... } // callback(struct something *handle) @change_callback_handle_arg depends on change_timer_function_usage && !match_callback_converted && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; @@ void _callback( -_handletype *_handle +struct timer_list *t ) { + _handletype *_handle = from_timer(_handle, t, _timer); ... } // If change_callback_handle_arg ran on an empty function, remove // the added handler. @unchange_callback_handle_arg depends on change_timer_function_usage && change_callback_handle_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; identifier t; @@ void _callback(struct timer_list *t) { - _handletype *_handle = from_timer(_handle, t, _timer); } // We only want to refactor the setup_timer() data argument if we've found // the matching callback. This undoes changes in change_timer_function_usage. @unchange_timer_function_usage depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg && !change_callback_handle_arg@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type change_timer_function_usage._cast_data; @@ ( -timer_setup(&_E->_timer, _callback, 0); +setup_timer(&_E->_timer, _callback, (_cast_data)_E); | -timer_setup(&_E._timer, _callback, 0); +setup_timer(&_E._timer, _callback, (_cast_data)&_E); ) // If we fixed a callback from a .function assignment, fix the // assignment cast now. @change_timer_function_assignment depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_func; typedef TIMER_FUNC_TYPE; @@ ( _E->_timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -&_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)_callback; +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -&_callback; +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; ) // Sometimes timer functions are called directly. Replace matched args. @change_timer_function_calls depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression _E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_data; @@ _callback( ( -(_cast_data)_E +&_E->_timer | -(_cast_data)&_E +&_E._timer | -_E +&_E->_timer ) ) // If a timer has been configured without a data argument, it can be // converted without regard to the callback argument, since it is unused. @match_timer_function_unused_data@ expression _E; identifier _timer; identifier _callback; @@ ( -setup_timer(&_E->_timer, _callback, 0); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0L); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0UL); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0L); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0UL); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_timer, _callback, 0); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0L); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0UL); +timer_setup(&_timer, _callback, 0); | -setup_timer(_timer, _callback, 0); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0L); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0UL); +timer_setup(_timer, _callback, 0); ) @change_callback_unused_data depends on match_timer_function_unused_data@ identifier match_timer_function_unused_data._callback; type _origtype; identifier _origarg; @@ void _callback( -_origtype _origarg +struct timer_list *unused ) { ... when != _origarg } Signed-off-by: Kees Cook <keescook@chromium.org>
2017-10-17 04:43:17 +07:00
timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
add_timer(&dn_route_timer);
goal = totalram_pages() >> (26 - PAGE_SHIFT);
for(order = 0; (1UL << order) < goal; order++)
/* NOTHING */;
/*
* Only want 1024 entries max, since the table is very, very unlikely
* to be larger than that.
*/
while(order && ((((1UL << order) * PAGE_SIZE) /
sizeof(struct dn_rt_hash_bucket)) >= 2048))
order--;
do {
dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
sizeof(struct dn_rt_hash_bucket);
while(dn_rt_hash_mask & (dn_rt_hash_mask - 1))
dn_rt_hash_mask--;
dn_rt_hash_table = (struct dn_rt_hash_bucket *)
__get_free_pages(GFP_ATOMIC, order);
} while (dn_rt_hash_table == NULL && --order > 0);
if (!dn_rt_hash_table)
panic("Failed to allocate DECnet route cache hash table\n");
printk(KERN_INFO
"DECnet: Routing cache hash table of %u buckets, %ldKbytes\n",
dn_rt_hash_mask,
(long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
dn_rt_hash_mask--;
for(i = 0; i <= dn_rt_hash_mask; i++) {
spin_lock_init(&dn_rt_hash_table[i].lock);
dn_rt_hash_table[i].chain = NULL;
}
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
&dn_rt_cache_seq_ops,
sizeof(struct dn_rt_cache_iter_state), NULL);
#ifdef CONFIG_DECNET_ROUTER
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
dn_cache_getroute, dn_fib_dump, 0);
#else
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
dn_cache_getroute, dn_cache_dump, 0);
#endif
}
void __exit dn_route_cleanup(void)
{
del_timer(&dn_route_timer);
dn_run_flush(NULL);
remove_proc_entry("decnet_cache", init_net.proc_net);
dst_entries_destroy(&dn_dst_ops);
}