linux_dsm_epyc7002/net/8021q/vlan.c

809 lines
19 KiB
C
Raw Normal View History

/*
* INET 802.1Q VLAN
* Ethernet-type device handling.
*
* Authors: Ben Greear <greearb@candelatech.com>
* Please send support related email to: netdev@vger.kernel.org
* VLAN Home Page: http://www.candelatech.com/~greear/vlan.html
*
* Fixes:
* Fix for packet capture - Nick Eggleston <nick@dccinc.com>;
* Add HW acceleration hooks - David S. Miller <davem@redhat.com>;
* Correct all the locking - David S. Miller <davem@redhat.com>;
* Use hash table for VLAN groups - David S. Miller <davem@redhat.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <net/p8022.h>
#include <net/arp.h>
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/uaccess.h>
#include <linux/if_vlan.h>
#include "vlan.h"
#include "vlanproc.h"
#define DRV_VERSION "1.8"
/* Global VLAN variables */
netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 08:58:21 +07:00
unsigned int vlan_net_id __read_mostly;
const char vlan_fullname[] = "802.1Q VLAN Support";
const char vlan_version[] = DRV_VERSION;
/* End of global variables definitions. */
static int vlan_group_prealloc_vid(struct vlan_group *vg,
__be16 vlan_proto, u16 vlan_id)
{
struct net_device **array;
unsigned int pidx, vidx;
unsigned int size;
ASSERT_RTNL();
pidx = vlan_proto_idx(vlan_proto);
vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
array = vg->vlan_devices_arrays[pidx][vidx];
if (array != NULL)
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
array = kzalloc(size, GFP_KERNEL);
if (array == NULL)
return -ENOBUFS;
vg->vlan_devices_arrays[pidx][vidx] = array;
return 0;
}
void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
struct vlan_info *vlan_info;
struct vlan_group *grp;
u16 vlan_id = vlan->vlan_id;
ASSERT_RTNL();
vlan_info = rtnl_dereference(real_dev->vlan_info);
BUG_ON(!vlan_info);
grp = &vlan_info->grp;
grp->nr_vlan_devs--;
if (vlan->flags & VLAN_FLAG_MVRP)
vlan_mvrp_request_leave(dev);
vlan: fix GVRP at dismantle time ip link add link eth2 eth2.103 type vlan id 103 gvrp on loose_binding on ip link set eth2.103 up rmmod tg3 # driver providing eth2 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp] PGD 11d251067 PUD 11b9e0067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/virtual/net/eth2.104/ifindex CPU 0 Modules linked in: tg3(-) 8021q garp nfsd lockd auth_rpcgss sunrpc libphy sg [last unloaded: x_tables] Pid: 11494, comm: rmmod Tainted: G W 2.6.39-rc6-00261-gfd71257-dirty #580 HP ProLiant BL460c G6 RIP: 0010:[<ffffffffa0030c9e>] [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp] RSP: 0018:ffff88007a19bae8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88011b5e2000 RCX: 0000000000000002 RDX: 0000000000000000 RSI: 0000000000000175 RDI: ffffffffa0030d5b RBP: ffff88007a19bb18 R08: 0000000000000001 R09: ffff88011bd64a00 R10: ffff88011d34ec00 R11: 0000000000000000 R12: 0000000000000002 R13: ffff88007a19bc48 R14: ffff88007a19bb88 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88011fc00000(0063) knlGS:00000000f77d76c0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000011a675000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 11494, threadinfo ffff88007a19a000, task ffff8800798595c0) Stack: ffff88007a19bb36 ffff88011c84b800 ffff88011b5e2000 ffff88007a19bc48 ffff88007a19bb88 0000000000000006 ffff88007a19bb38 ffffffffa003a5f6 ffff88007a19bb38 670088007a19bba8 ffff88007a19bb58 ffffffffa00397e7 Call Trace: [<ffffffffa003a5f6>] vlan_gvrp_request_leave+0x46/0x50 [8021q] [<ffffffffa00397e7>] vlan_dev_stop+0xb7/0xc0 [8021q] [<ffffffff8137e427>] __dev_close_many+0x87/0xe0 [<ffffffff8137e507>] dev_close_many+0x87/0x110 [<ffffffff8137e630>] rollback_registered_many+0xa0/0x240 [<ffffffff8137e7e9>] unregister_netdevice_many+0x19/0x60 [<ffffffffa00389eb>] vlan_device_event+0x53b/0x550 [8021q] [<ffffffff8143f448>] ? ip6mr_device_event+0xa8/0xd0 [<ffffffff81479d03>] notifier_call_chain+0x53/0x80 [<ffffffff81062539>] __raw_notifier_call_chain+0x9/0x10 [<ffffffff81062551>] raw_notifier_call_chain+0x11/0x20 [<ffffffff8137df82>] call_netdevice_notifiers+0x32/0x60 [<ffffffff8137e69f>] rollback_registered_many+0x10f/0x240 [<ffffffff8137e85f>] rollback_registered+0x2f/0x40 [<ffffffff8137e8c8>] unregister_netdevice_queue+0x58/0x90 [<ffffffff8137e9eb>] unregister_netdev+0x1b/0x30 [<ffffffffa005d73f>] tg3_remove_one+0x6f/0x10b [tg3] We should call vlan_gvrp_request_leave() from unregister_vlan_dev(), not from vlan_dev_stop(), because vlan_gvrp_uninit_applicant() is called right after unregister_netdevice_queue(). In batch mode, unregister_netdevice_queue() doesn’t immediately call vlan_dev_stop(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-05-11 02:22:54 +07:00
if (vlan->flags & VLAN_FLAG_GVRP)
vlan_gvrp_request_leave(dev);
vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
netdev_upper_dev_unlink(real_dev, dev);
/* Because unregister_netdevice_queue() makes sure at least one rcu
* grace period is respected before device freeing,
* we dont need to call synchronize_net() here.
*/
unregister_netdevice_queue(dev, head);
if (grp->nr_vlan_devs == 0) {
vlan_mvrp_uninit_applicant(real_dev);
vlan_gvrp_uninit_applicant(real_dev);
}
/* Take it out of our own structures, but be sure to interlock with
* HW accelerating devices or SW vlan input packet processing if
* VLAN is not 0 (leave it there for 802.1p).
*/
if (vlan_id)
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
}
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id)
{
const char *name = real_dev->name;
if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
pr_info("VLANs not supported on %s\n", name);
return -EOPNOTSUPP;
}
if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
return -EEXIST;
return 0;
}
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
u16 vlan_id = vlan->vlan_id;
struct vlan_info *vlan_info;
struct vlan_group *grp;
int err;
err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
if (err)
return err;
vlan_info = rtnl_dereference(real_dev->vlan_info);
/* vlan_info should be there now. vlan_vid_add took care of it */
BUG_ON(!vlan_info);
grp = &vlan_info->grp;
if (grp->nr_vlan_devs == 0) {
err = vlan_gvrp_init_applicant(real_dev);
if (err < 0)
goto out_vid_del;
err = vlan_mvrp_init_applicant(real_dev);
if (err < 0)
goto out_uninit_gvrp;
}
err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
if (err < 0)
goto out_uninit_mvrp;
vlan->nest_level = dev_get_nest_level(real_dev) + 1;
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_mvrp;
err = netdev_upper_dev_link(real_dev, dev, extack);
if (err)
goto out_unregister_netdev;
/* Account for reference in struct vlan_dev_priv */
dev_hold(real_dev);
netif_stacked_transfer_operstate(real_dev, dev);
linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
/* So, got the sucker initialized, now lets place
* it into our local structure.
*/
vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
grp->nr_vlan_devs++;
return 0;
out_unregister_netdev:
unregister_netdevice(dev);
out_uninit_mvrp:
if (grp->nr_vlan_devs == 0)
vlan_mvrp_uninit_applicant(real_dev);
out_uninit_gvrp:
if (grp->nr_vlan_devs == 0)
vlan_gvrp_uninit_applicant(real_dev);
out_vid_del:
vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
return err;
}
/* Attach a VLAN device to a mac address (ie Ethernet Card).
* Returns 0 if the device was created or a negative error code otherwise.
*/
static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
{
struct net_device *new_dev;
struct vlan_dev_priv *vlan;
struct net *net = dev_net(real_dev);
struct vlan_net *vn = net_generic(net, vlan_net_id);
char name[IFNAMSIZ];
int err;
if (vlan_id >= VLAN_VID_MASK)
return -ERANGE;
err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
if (err < 0)
return err;
/* Gotta set up the fields for the device. */
switch (vn->name_type) {
case VLAN_NAME_TYPE_RAW_PLUS_VID:
/* name will look like: eth1.0005 */
snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
break;
case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
/* Put our vlan.VID in the name.
* Name will look like: vlan5
*/
snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
break;
case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
/* Put our vlan.VID in the name.
* Name will look like: eth0.5
*/
snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
break;
case VLAN_NAME_TYPE_PLUS_VID:
/* Put our vlan.VID in the name.
* Name will look like: vlan0005
*/
default:
snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
}
new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name,
NET_NAME_UNKNOWN, vlan_setup);
if (new_dev == NULL)
return -ENOBUFS;
dev_net_set(new_dev, net);
/* need 4 bytes for extra VLAN header info,
* hope the underlying device can handle it.
*/
new_dev->mtu = real_dev->mtu;
vlan = vlan_dev_priv(new_dev);
vlan->vlan_proto = htons(ETH_P_8021Q);
vlan->vlan_id = vlan_id;
vlan->real_dev = real_dev;
vlan->dent = NULL;
vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
err = register_vlan_dev(new_dev, NULL);
if (err < 0)
goto out_free_newdev;
return 0;
out_free_newdev:
net: 8021q: Fix one possible panic caused by BUG_ON in free_netdev The register_vlan_device would invoke free_netdev directly, when register_vlan_dev failed. It would trigger the BUG_ON in free_netdev if the dev was already registered. In this case, the netdev would be freed in netdev_run_todo later. So add one condition check now. Only when dev is not registered, then free it directly. The following is the part coredump when netdev_upper_dev_link failed in register_vlan_dev. I removed the lines which are too long. [ 411.237457] ------------[ cut here ]------------ [ 411.237458] kernel BUG at net/core/dev.c:7998! [ 411.237484] invalid opcode: 0000 [#1] SMP [ 411.237705] [last unloaded: 8021q] [ 411.237718] CPU: 1 PID: 12845 Comm: vconfig Tainted: G E 4.12.0-rc5+ #6 [ 411.237737] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 411.237764] task: ffff9cbeb6685580 task.stack: ffffa7d2807d8000 [ 411.237782] RIP: 0010:free_netdev+0x116/0x120 [ 411.237794] RSP: 0018:ffffa7d2807dbdb0 EFLAGS: 00010297 [ 411.237808] RAX: 0000000000000002 RBX: ffff9cbeb6ba8fd8 RCX: 0000000000001878 [ 411.237826] RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000000 [ 411.237844] RBP: ffffa7d2807dbdc8 R08: 0002986100029841 R09: 0002982100029801 [ 411.237861] R10: 0004000100029980 R11: 0004000100029980 R12: ffff9cbeb6ba9000 [ 411.238761] R13: ffff9cbeb6ba9060 R14: ffff9cbe60f1a000 R15: ffff9cbeb6ba9000 [ 411.239518] FS: 00007fb690d81700(0000) GS:ffff9cbebb640000(0000) knlGS:0000000000000000 [ 411.239949] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 411.240454] CR2: 00007f7115624000 CR3: 0000000077cdf000 CR4: 00000000003406e0 [ 411.240936] Call Trace: [ 411.241462] vlan_ioctl_handler+0x3f1/0x400 [8021q] [ 411.241910] sock_ioctl+0x18b/0x2c0 [ 411.242394] do_vfs_ioctl+0xa1/0x5d0 [ 411.242853] ? sock_alloc_file+0xa6/0x130 [ 411.243465] SyS_ioctl+0x79/0x90 [ 411.243900] entry_SYSCALL_64_fastpath+0x1e/0xa9 [ 411.244425] RIP: 0033:0x7fb69089a357 [ 411.244863] RSP: 002b:00007ffcd04e0fc8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 411.245445] RAX: ffffffffffffffda RBX: 00007ffcd04e2884 RCX: 00007fb69089a357 [ 411.245903] RDX: 00007ffcd04e0fd0 RSI: 0000000000008983 RDI: 0000000000000003 [ 411.246527] RBP: 00007ffcd04e0fd0 R08: 0000000000000000 R09: 1999999999999999 [ 411.246976] R10: 000000000000053f R11: 0000000000000202 R12: 0000000000000004 [ 411.247414] R13: 00007ffcd04e1128 R14: 00007ffcd04e2888 R15: 0000000000000001 [ 411.249129] RIP: free_netdev+0x116/0x120 RSP: ffffa7d2807dbdb0 Signed-off-by: Gao Feng <gfree.wind@vip.163.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-16 14:00:02 +07:00
if (new_dev->reg_state == NETREG_UNINITIALIZED)
free_netdev(new_dev);
return err;
}
static void vlan_sync_address(struct net_device *dev,
struct net_device *vlandev)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
/* May be called without an actual change */
if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr))
return;
/* vlan continues to inherit address of lower device */
if (vlan_dev_inherit_address(vlandev, dev))
goto out;
/* vlan address was different from the old address and is equal to
* the new address */
if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) &&
ether_addr_equal(vlandev->dev_addr, dev->dev_addr))
dev_uc_del(dev, vlandev->dev_addr);
/* vlan address was equal to the old address and is different from
* the new address */
if (ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) &&
!ether_addr_equal(vlandev->dev_addr, dev->dev_addr))
dev_uc_add(dev, vlandev->dev_addr);
out:
ether_addr_copy(vlan->real_dev_addr, dev->dev_addr);
}
static void vlan_transfer_features(struct net_device *dev,
struct net_device *vlandev)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
vlandev->gso_max_size = dev->gso_max_size;
vlandev->gso_max_segs = dev->gso_max_segs;
if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
else
vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
#if IS_ENABLED(CONFIG_FCOE)
vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
#endif
vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
netdev_update_features(vlandev);
}
static int __vlan_device_event(struct net_device *dev, unsigned long event)
{
int err = 0;
switch (event) {
case NETDEV_CHANGENAME:
vlan_proc_rem_dev(dev);
err = vlan_proc_add_dev(dev);
break;
case NETDEV_REGISTER:
err = vlan_proc_add_dev(dev);
break;
case NETDEV_UNREGISTER:
vlan_proc_rem_dev(dev);
break;
}
return err;
}
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct vlan_group *grp;
struct vlan_info *vlan_info;
int i, flgs;
struct net_device *vlandev;
struct vlan_dev_priv *vlan;
bool last = false;
LIST_HEAD(list);
if (is_vlan_dev(dev)) {
int err = __vlan_device_event(dev, event);
if (err)
return notifier_from_errno(err);
}
if ((event == NETDEV_UP) &&
(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
pr_info("adding VLAN 0 to HW filter on device %s\n",
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
if (event == NETDEV_DOWN &&
(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
goto out;
grp = &vlan_info->grp;
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
*/
switch (event) {
case NETDEV_CHANGE:
/* Propagate real device state to vlan devices */
vlan_group_for_each_dev(grp, i, vlandev)
netif_stacked_transfer_operstate(dev, vlandev);
break;
case NETDEV_CHANGEADDR:
/* Adjust unicast filters on underlying device */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
if (!(flgs & IFF_UP))
continue;
vlan_sync_address(dev, vlandev);
}
break;
case NETDEV_CHANGEMTU:
vlan_group_for_each_dev(grp, i, vlandev) {
if (vlandev->mtu <= dev->mtu)
continue;
dev_set_mtu(vlandev, dev->mtu);
}
break;
case NETDEV_FEAT_CHANGE:
/* Propagate device features to underlying device */
vlan_group_for_each_dev(grp, i, vlandev)
vlan_transfer_features(dev, vlandev);
break;
case NETDEV_DOWN: {
struct net_device *tmp;
LIST_HEAD(close_list);
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
if (!(flgs & IFF_UP))
continue;
vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
list_add(&vlandev->close_list, &close_list);
}
dev_close_many(&close_list, false);
list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
netif_stacked_transfer_operstate(dev, vlandev);
list_del_init(&vlandev->close_list);
}
list_del(&close_list);
break;
}
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
vlan: Correctly propagate promisc|allmulti flags in notifier. Currently vlan notifier handler will try to update all vlans for a device when that device comes up. A problem occurs, however, when the vlan device was set to promiscuous, but not by the user (ex: a bridge). In that case, dev->gflags are not updated. What results is that the lower device ends up with an extra promiscuity count. Here are the backtraces that prove this: [62852.052179] [<ffffffff814fe248>] __dev_set_promiscuity+0x38/0x1e0 [62852.052186] [<ffffffff8160bcbb>] ? _raw_spin_unlock_bh+0x1b/0x40 [62852.052188] [<ffffffff814fe4be>] ? dev_set_rx_mode+0x2e/0x40 [62852.052190] [<ffffffff814fe694>] dev_set_promiscuity+0x24/0x50 [62852.052194] [<ffffffffa0324795>] vlan_dev_open+0xd5/0x1f0 [8021q] [62852.052196] [<ffffffff814fe58f>] __dev_open+0xbf/0x140 [62852.052198] [<ffffffff814fe88d>] __dev_change_flags+0x9d/0x170 [62852.052200] [<ffffffff814fe989>] dev_change_flags+0x29/0x60 The above comes from the setting the vlan device to IFF_UP state. [62852.053569] [<ffffffff814fe248>] __dev_set_promiscuity+0x38/0x1e0 [62852.053571] [<ffffffffa032459b>] ? vlan_dev_set_rx_mode+0x2b/0x30 [8021q] [62852.053573] [<ffffffff814fe8d5>] __dev_change_flags+0xe5/0x170 [62852.053645] [<ffffffff814fe989>] dev_change_flags+0x29/0x60 [62852.053647] [<ffffffffa032334a>] vlan_device_event+0x18a/0x690 [8021q] [62852.053649] [<ffffffff8161036c>] notifier_call_chain+0x4c/0x70 [62852.053651] [<ffffffff8109d456>] raw_notifier_call_chain+0x16/0x20 [62852.053653] [<ffffffff814f744d>] call_netdevice_notifiers+0x2d/0x60 [62852.053654] [<ffffffff814fe1a3>] __dev_notify_flags+0x33/0xa0 [62852.053656] [<ffffffff814fe9b2>] dev_change_flags+0x52/0x60 [62852.053657] [<ffffffff8150cd57>] do_setlink+0x397/0xa40 And this one comes from the notification code. What we end up with is a vlan with promiscuity count of 1 and and a physical device with a promiscuity count of 2. They should both have a count 1. To resolve this issue, vlan code can use dev_get_flags() api which correctly masks promiscuity and allmulti flags. Signed-off-by: Vlad Yasevich <vyasevic@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-13 07:53:14 +07:00
flgs = dev_get_flags(vlandev);
if (flgs & IFF_UP)
continue;
vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
dev_change_flags(vlandev, flgs | IFF_UP);
netif_stacked_transfer_operstate(dev, vlandev);
}
break;
case NETDEV_UNREGISTER:
/* twiddle thumbs on netns device moves */
if (dev->reg_state != NETREG_UNREGISTERING)
break;
vlan_group_for_each_dev(grp, i, vlandev) {
/* removal of last vid destroys vlan_info, abort
* afterwards */
if (vlan_info->nr_vids == 1)
last = true;
unregister_vlan_dev(vlandev, &list);
if (last)
break;
}
unregister_netdevice_many(&list);
break;
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlaying device to change its type. */
if (vlan_uses_dev(dev))
return NOTIFY_BAD;
break;
case NETDEV_NOTIFY_PEERS:
case NETDEV_BONDING_FAILOVER:
case NETDEV_RESEND_IGMP:
/* Propagate to vlan devices */
vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
break;
}
out:
return NOTIFY_DONE;
}
static struct notifier_block vlan_notifier_block __read_mostly = {
.notifier_call = vlan_device_event,
};
/*
* VLAN IOCTL handler.
* o execute requested action or pass command to the device driver
* arg is really a struct vlan_ioctl_args __user *.
*/
[NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-18 01:56:21 +07:00
static int vlan_ioctl_handler(struct net *net, void __user *arg)
{
int err;
struct vlan_ioctl_args args;
struct net_device *dev = NULL;
if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args)))
return -EFAULT;
/* Null terminate this sucker, just in case. */
args.device1[sizeof(args.device1) - 1] = 0;
args.u.device2[sizeof(args.u.device2) - 1] = 0;
rtnl_lock();
switch (args.cmd) {
case SET_VLAN_INGRESS_PRIORITY_CMD:
case SET_VLAN_EGRESS_PRIORITY_CMD:
case SET_VLAN_FLAG_CMD:
case ADD_VLAN_CMD:
case DEL_VLAN_CMD:
case GET_VLAN_REALDEV_NAME_CMD:
case GET_VLAN_VID_CMD:
err = -ENODEV;
dev = __dev_get_by_name(net, args.device1);
if (!dev)
goto out;
err = -EINVAL;
if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev))
goto out;
}
switch (args.cmd) {
case SET_VLAN_INGRESS_PRIORITY_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
vlan_dev_set_ingress_priority(dev,
args.u.skb_priority,
args.vlan_qos);
err = 0;
break;
case SET_VLAN_EGRESS_PRIORITY_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = vlan_dev_set_egress_priority(dev,
args.u.skb_priority,
args.vlan_qos);
break;
case SET_VLAN_FLAG_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = vlan_dev_change_flags(dev,
args.vlan_qos ? args.u.flag : 0,
args.u.flag);
break;
case SET_VLAN_NAME_TYPE_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
struct vlan_net *vn;
vn = net_generic(net, vlan_net_id);
vn->name_type = args.u.name_type;
err = 0;
} else {
err = -EINVAL;
}
break;
case ADD_VLAN_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = register_vlan_device(dev, args.u.VID);
break;
case DEL_VLAN_CMD:
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
unregister_vlan_dev(dev, NULL);
err = 0;
break;
case GET_VLAN_REALDEV_NAME_CMD:
err = 0;
vlan_dev_get_realdev_name(dev, args.u.device2);
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args)))
err = -EFAULT;
break;
case GET_VLAN_VID_CMD:
err = 0;
args.u.VID = vlan_dev_vlan_id(dev);
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args)))
err = -EFAULT;
break;
default:
err = -EOPNOTSUPP;
break;
}
out:
rtnl_unlock();
return err;
}
static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
struct sk_buff *p, **pp = NULL;
struct vlan_hdr *vhdr;
unsigned int hlen, off_vlan;
const struct packet_offload *ptype;
__be16 type;
int flush = 1;
off_vlan = skb_gro_offset(skb);
hlen = off_vlan + sizeof(*vhdr);
vhdr = skb_gro_header_fast(skb, off_vlan);
if (skb_gro_header_hard(skb, hlen)) {
vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
if (unlikely(!vhdr))
goto out;
}
type = vhdr->h_vlan_encapsulated_proto;
rcu_read_lock();
ptype = gro_find_receive_by_type(type);
if (!ptype)
goto out_unlock;
flush = 0;
for (p = *head; p; p = p->next) {
struct vlan_hdr *vhdr2;
if (!NAPI_GRO_CB(p)->same_flow)
continue;
vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
if (compare_vlan_header(vhdr, vhdr2))
NAPI_GRO_CB(p)->same_flow = 0;
}
skb_gro_pull(skb, sizeof(*vhdr));
skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
out:
NAPI_GRO_CB(skb)->flush |= flush;
return pp;
}
static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
{
struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
__be16 type = vhdr->h_vlan_encapsulated_proto;
struct packet_offload *ptype;
int err = -ENOENT;
rcu_read_lock();
ptype = gro_find_complete_by_type(type);
if (ptype)
err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
rcu_read_unlock();
return err;
}
static struct packet_offload vlan_packet_offloads[] __read_mostly = {
{
.type = cpu_to_be16(ETH_P_8021Q),
.priority = 10,
.callbacks = {
.gro_receive = vlan_gro_receive,
.gro_complete = vlan_gro_complete,
},
},
{
.type = cpu_to_be16(ETH_P_8021AD),
.priority = 10,
.callbacks = {
.gro_receive = vlan_gro_receive,
.gro_complete = vlan_gro_complete,
},
},
};
static int __net_init vlan_init_net(struct net *net)
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
int err;
vn->name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
err = vlan_proc_init(net);
return err;
}
static void __net_exit vlan_exit_net(struct net *net)
{
vlan_proc_cleanup(net);
}
static struct pernet_operations vlan_net_ops = {
.init = vlan_init_net,
.exit = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct vlan_net),
};
static int __init vlan_proto_init(void)
{
int err;
unsigned int i;
pr_info("%s v%s\n", vlan_fullname, vlan_version);
err = register_pernet_subsys(&vlan_net_ops);
if (err < 0)
goto err0;
err = register_netdevice_notifier(&vlan_notifier_block);
if (err < 0)
goto err2;
err = vlan_gvrp_init();
if (err < 0)
goto err3;
err = vlan_mvrp_init();
if (err < 0)
goto err4;
err = vlan_netlink_init();
if (err < 0)
goto err5;
for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
dev_add_offload(&vlan_packet_offloads[i]);
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
err5:
vlan_mvrp_uninit();
err4:
vlan_gvrp_uninit();
err3:
unregister_netdevice_notifier(&vlan_notifier_block);
err2:
unregister_pernet_subsys(&vlan_net_ops);
err0:
return err;
}
static void __exit vlan_cleanup_module(void)
{
unsigned int i;
vlan_ioctl_set(NULL);
for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
dev_remove_offload(&vlan_packet_offloads[i]);
vlan_netlink_fini();
unregister_netdevice_notifier(&vlan_notifier_block);
unregister_pernet_subsys(&vlan_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
vlan_mvrp_uninit();
vlan_gvrp_uninit();
}
module_init(vlan_proto_init);
module_exit(vlan_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);