linux_dsm_epyc7002/drivers/net/bonding/bond_sysfs.c

817 lines
22 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright(c) 2004-2005 Intel Corporation. All rights reserved.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/in.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/rtnetlink.h>
#include <linux/etherdevice.h>
[NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-18 01:56:21 +07:00
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/nsproxy.h>
#include <net/bonding.h>
#define to_bond(cd) ((struct bonding *)(netdev_priv(to_net_dev(cd))))
/* "show" function for the bond_masters attribute.
* The class parameter is ignored.
*/
static ssize_t bonding_show_bonds(struct class *cls,
struct class_attribute *attr,
char *buf)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
int res = 0;
struct bonding *bond;
rtnl_lock();
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", bond->dev->name);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
rtnl_unlock();
return res;
}
static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifname)
{
struct bonding *bond;
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (strncmp(bond->dev->name, ifname, IFNAMSIZ) == 0)
return bond->dev;
}
return NULL;
}
/* "store" function for the bond_masters attribute. This is what
* creates and deletes entire bonds.
*
* The class parameter is ignored.
*/
static ssize_t bonding_store_bonds(struct class *cls,
struct class_attribute *attr,
const char *buffer, size_t count)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
char command[IFNAMSIZ + 1] = {0, };
char *ifname;
int rv, res = count;
sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
ifname = command + 1;
if ((strlen(command) <= 1) ||
!dev_valid_name(ifname))
goto err_no_cmd;
if (command[0] == '+') {
pr_info("%s is being created...\n", ifname);
rv = bond_create(bn->net, ifname);
if (rv) {
if (rv == -EEXIST)
pr_info("%s already exists\n", ifname);
else
pr_info("%s creation failed\n", ifname);
res = rv;
}
} else if (command[0] == '-') {
struct net_device *bond_dev;
rtnl_lock();
bond_dev = bond_get_by_name(bn, ifname);
if (bond_dev) {
pr_info("%s is being deleted...\n", ifname);
unregister_netdevice(bond_dev);
} else {
pr_err("unable to delete non-existent %s\n", ifname);
res = -ENODEV;
}
rtnl_unlock();
} else
goto err_no_cmd;
/* Always return either count or an error. If you return 0, you'll
* get called forever, which is bad.
*/
return res;
err_no_cmd:
pr_err("no command found in bonding_masters - use +ifname or -ifname\n");
return -EPERM;
}
/* class attribute for bond_masters file. This ends up in /sys/class/net */
static const struct class_attribute class_attr_bonding_masters = {
.attr = {
.name = "bonding_masters",
.mode = 0644,
},
.show = bonding_show_bonds,
.store = bonding_store_bonds,
};
/* Generic "store" method for bonding sysfs option setting */
static ssize_t bonding_sysfs_store_option(struct device *d,
struct device_attribute *attr,
const char *buffer, size_t count)
{
struct bonding *bond = to_bond(d);
const struct bond_option *opt;
char *buffer_clone;
int ret;
opt = bond_opt_get_by_name(attr->attr.name);
if (WARN_ON(!opt))
return -ENOENT;
buffer_clone = kstrndup(buffer, count, GFP_KERNEL);
if (!buffer_clone)
return -ENOMEM;
ret = bond_opt_tryset_rtnl(bond, opt->id, buffer_clone);
if (!ret)
ret = count;
kfree(buffer_clone);
return ret;
}
/* Show the slaves in the current bond. */
static ssize_t bonding_show_slaves(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
struct list_head *iter;
struct slave *slave;
int res = 0;
if (!rtnl_trylock())
return restart_syscall();
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", slave->dev->name);
}
rtnl_unlock();
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
static DEVICE_ATTR(slaves, 0644, bonding_show_slaves,
bonding_sysfs_store_option);
/* Show the bonding mode. */
static ssize_t bonding_show_mode(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_MODE, BOND_MODE(bond));
return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));
}
static DEVICE_ATTR(mode, 0644, bonding_show_mode, bonding_sysfs_store_option);
/* Show the bonding transmit hash method. */
static ssize_t bonding_show_xmit_hash(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy);
return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);
}
static DEVICE_ATTR(xmit_hash_policy, 0644,
bonding_show_xmit_hash, bonding_sysfs_store_option);
/* Show arp_validate. */
static ssize_t bonding_show_arp_validate(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_ARP_VALIDATE,
bond->params.arp_validate);
return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);
}
static DEVICE_ATTR(arp_validate, 0644, bonding_show_arp_validate,
bonding_sysfs_store_option);
/* Show arp_all_targets. */
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 16:49:34 +07:00
static ssize_t bonding_show_arp_all_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 16:49:34 +07:00
val = bond_opt_get_val(BOND_OPT_ARP_ALL_TARGETS,
bond->params.arp_all_targets);
return sprintf(buf, "%s %d\n",
val->string, bond->params.arp_all_targets);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 16:49:34 +07:00
}
static DEVICE_ATTR(arp_all_targets, 0644,
bonding_show_arp_all_targets, bonding_sysfs_store_option);
/* Show fail_over_mac. */
static ssize_t bonding_show_fail_over_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC,
bond->params.fail_over_mac);
return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);
}
static DEVICE_ATTR(fail_over_mac, 0644,
bonding_show_fail_over_mac, bonding_sysfs_store_option);
/* Show the arp timer interval. */
static ssize_t bonding_show_arp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.arp_interval);
}
static DEVICE_ATTR(arp_interval, 0644,
bonding_show_arp_interval, bonding_sysfs_store_option);
/* Show the arp targets. */
static ssize_t bonding_show_arp_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
int i, res = 0;
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {
if (bond->params.arp_targets[i])
res += sprintf(buf + res, "%pI4 ",
&bond->params.arp_targets[i]);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
static DEVICE_ATTR(arp_ip_target, 0644,
bonding_show_arp_targets, bonding_sysfs_store_option);
/* Show the up and down delays. */
static ssize_t bonding_show_downdelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
}
static DEVICE_ATTR(downdelay, 0644,
bonding_show_downdelay, bonding_sysfs_store_option);
static ssize_t bonding_show_updelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
}
static DEVICE_ATTR(updelay, 0644,
bonding_show_updelay, bonding_sysfs_store_option);
static ssize_t bonding_show_peer_notif_delay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n",
bond->params.peer_notif_delay * bond->params.miimon);
}
static DEVICE_ATTR(peer_notif_delay, 0644,
bonding_show_peer_notif_delay, bonding_sysfs_store_option);
/* Show the LACP interval. */
static ssize_t bonding_show_lacp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_LACP_RATE, bond->params.lacp_fast);
return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
}
static DEVICE_ATTR(lacp_rate, 0644,
bonding_show_lacp, bonding_sysfs_store_option);
static ssize_t bonding_show_min_links(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%u\n", bond->params.min_links);
}
static DEVICE_ATTR(min_links, 0644,
bonding_show_min_links, bonding_sysfs_store_option);
static ssize_t bonding_show_ad_select(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select);
return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);
}
static DEVICE_ATTR(ad_select, 0644,
bonding_show_ad_select, bonding_sysfs_store_option);
/* Show the number of peer notifications to send after a failover event. */
static ssize_t bonding_show_num_peer_notif(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.num_peer_notif);
}
static DEVICE_ATTR(num_grat_arp, 0644,
bonding_show_num_peer_notif, bonding_sysfs_store_option);
static DEVICE_ATTR(num_unsol_na, 0644,
bonding_show_num_peer_notif, bonding_sysfs_store_option);
/* Show the MII monitor interval. */
static ssize_t bonding_show_miimon(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.miimon);
}
static DEVICE_ATTR(miimon, 0644,
bonding_show_miimon, bonding_sysfs_store_option);
/* Show the primary slave. */
static ssize_t bonding_show_primary(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct slave *primary;
int count = 0;
rcu_read_lock();
primary = rcu_dereference(bond->primary_slave);
if (primary)
count = sprintf(buf, "%s\n", primary->dev->name);
rcu_read_unlock();
return count;
}
static DEVICE_ATTR(primary, 0644,
bonding_show_primary, bonding_sysfs_store_option);
/* Show the primary_reselect flag. */
static ssize_t bonding_show_primary_reselect(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
val = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT,
bond->params.primary_reselect);
return sprintf(buf, "%s %d\n",
val->string, bond->params.primary_reselect);
}
static DEVICE_ATTR(primary_reselect, 0644,
bonding_show_primary_reselect, bonding_sysfs_store_option);
/* Show the use_carrier flag. */
static ssize_t bonding_show_carrier(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.use_carrier);
}
static DEVICE_ATTR(use_carrier, 0644,
bonding_show_carrier, bonding_sysfs_store_option);
/* Show currently active_slave. */
static ssize_t bonding_show_active_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct net_device *slave_dev;
int count = 0;
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 21:54:51 +07:00
rcu_read_lock();
slave_dev = bond_option_active_slave_get_rcu(bond);
if (slave_dev)
count = sprintf(buf, "%s\n", slave_dev->name);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 21:54:51 +07:00
rcu_read_unlock();
return count;
}
static DEVICE_ATTR(active_slave, 0644,
bonding_show_active_slave, bonding_sysfs_store_option);
/* Show link status of the bond interface. */
static ssize_t bonding_show_mii_status(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
bool active = netif_carrier_ok(bond->dev);
return sprintf(buf, "%s\n", active ? "up" : "down");
}
static DEVICE_ATTR(mii_status, 0444, bonding_show_mii_status, NULL);
/* Show current 802.3ad aggregator ID. */
static ssize_t bonding_show_ad_aggregator(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.aggregator_id);
}
return count;
}
static DEVICE_ATTR(ad_aggregator, 0444, bonding_show_ad_aggregator, NULL);
/* Show number of active 802.3ad ports. */
static ssize_t bonding_show_ad_num_ports(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.ports);
}
return count;
}
static DEVICE_ATTR(ad_num_ports, 0444, bonding_show_ad_num_ports, NULL);
/* Show current 802.3ad actor key. */
static ssize_t bonding_show_ad_actor_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.actor_key);
}
return count;
}
static DEVICE_ATTR(ad_actor_key, 0444, bonding_show_ad_actor_key, NULL);
/* Show current 802.3ad partner key. */
static ssize_t bonding_show_ad_partner_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.partner_key);
}
return count;
}
static DEVICE_ATTR(ad_partner_key, 0444, bonding_show_ad_partner_key, NULL);
/* Show current 802.3ad partner mac. */
static ssize_t bonding_show_ad_partner_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
struct ad_info ad_info;
if (!bond_3ad_get_active_agg_info(bond, &ad_info))
count = sprintf(buf, "%pM\n", ad_info.partner_system);
}
return count;
}
static DEVICE_ATTR(ad_partner_mac, 0444, bonding_show_ad_partner_mac, NULL);
/* Show the queue_ids of the slaves in the current bond. */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
static ssize_t bonding_show_queue_id(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct list_head *iter;
struct slave *slave;
int res = 0;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
if (!rtnl_trylock())
return restart_syscall();
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
/* not enough space for another interface_name:queue_id pair */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s:%d ",
slave->dev->name, slave->queue_id);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
rtnl_unlock();
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
return res;
}
static DEVICE_ATTR(queue_id, 0644, bonding_show_queue_id,
bonding_sysfs_store_option);
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
/* Show the all_slaves_active flag. */
static ssize_t bonding_show_slaves_active(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.all_slaves_active);
}
static DEVICE_ATTR(all_slaves_active, 0644,
bonding_show_slaves_active, bonding_sysfs_store_option);
/* Show the number of IGMP membership reports to send on link failure */
static ssize_t bonding_show_resend_igmp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.resend_igmp);
}
static DEVICE_ATTR(resend_igmp, 0644,
bonding_show_resend_igmp, bonding_sysfs_store_option);
static ssize_t bonding_show_lp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.lp_interval);
}
static DEVICE_ATTR(lp_interval, 0644,
bonding_show_lp_interval, bonding_sysfs_store_option);
static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
}
static DEVICE_ATTR(tlb_dynamic_lb, 0644,
bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option);
static ssize_t bonding_show_packets_per_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
unsigned int packets_per_slave = bond->params.packets_per_slave;
return sprintf(buf, "%u\n", packets_per_slave);
}
static DEVICE_ATTR(packets_per_slave, 0644,
bonding_show_packets_per_slave, bonding_sysfs_store_option);
static ssize_t bonding_show_ad_actor_sys_prio(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN))
return sprintf(buf, "%hu\n", bond->params.ad_actor_sys_prio);
return 0;
}
static DEVICE_ATTR(ad_actor_sys_prio, 0644,
bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option);
bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar <maheshb@google.com> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-09 14:01:56 +07:00
static ssize_t bonding_show_ad_actor_system(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN))
bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar <maheshb@google.com> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-09 14:01:56 +07:00
return sprintf(buf, "%pM\n", bond->params.ad_actor_system);
return 0;
}
static DEVICE_ATTR(ad_actor_system, 0644,
bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar <maheshb@google.com> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-09 14:01:56 +07:00
bonding_show_ad_actor_system, bonding_sysfs_store_option);
static ssize_t bonding_show_ad_user_port_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN))
return sprintf(buf, "%hu\n", bond->params.ad_user_port_key);
return 0;
}
static DEVICE_ATTR(ad_user_port_key, 0644,
bonding_show_ad_user_port_key, bonding_sysfs_store_option);
static struct attribute *per_bond_attrs[] = {
&dev_attr_slaves.attr,
&dev_attr_mode.attr,
&dev_attr_fail_over_mac.attr,
&dev_attr_arp_validate.attr,
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 16:49:34 +07:00
&dev_attr_arp_all_targets.attr,
&dev_attr_arp_interval.attr,
&dev_attr_arp_ip_target.attr,
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
&dev_attr_peer_notif_delay.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
&dev_attr_num_grat_arp.attr,
&dev_attr_num_unsol_na.attr,
&dev_attr_miimon.attr,
&dev_attr_primary.attr,
&dev_attr_primary_reselect.attr,
&dev_attr_use_carrier.attr,
&dev_attr_active_slave.attr,
&dev_attr_mii_status.attr,
&dev_attr_ad_aggregator.attr,
&dev_attr_ad_num_ports.attr,
&dev_attr_ad_actor_key.attr,
&dev_attr_ad_partner_key.attr,
&dev_attr_ad_partner_mac.attr,
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 15:40:18 +07:00
&dev_attr_queue_id.attr,
&dev_attr_all_slaves_active.attr,
&dev_attr_resend_igmp.attr,
&dev_attr_min_links.attr,
&dev_attr_lp_interval.attr,
&dev_attr_packets_per_slave.attr,
&dev_attr_tlb_dynamic_lb.attr,
&dev_attr_ad_actor_sys_prio.attr,
bonding: Allow userspace to set actors' macaddr in an AD-system. In an AD system, the communication between actor and partner is the business between these two entities. In the current setup anyone on the same L2 can "guess" the LACPDU contents and then possibly send the spoofed LACPDUs and trick the partner causing connectivity issues for the AD system. This patch allows to use a random mac-address obscuring it's identity making it harder for someone in the L2 is do the same thing. This patch allows user-space to choose the mac-address for the AD-system. This mac-address can not be NULL or a Multicast. If the mac-address is set from user-space; kernel will honor it and will not overwrite it. In the absence (value from user space); the logic will default to using the masters' mac as the mac-address for the AD-system. It can be set using example code below - # modprobe bonding mode=4 # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ $(( (RANDOM & 0xFE) | 0x02 )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF )) \ $(( RANDOM & 0xFF ))) # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system # echo +eth1 > /sys/class/net/bond0/bonding/slaves ... # ip link set bond0 up Signed-off-by: Mahesh Bandewar <maheshb@google.com> Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com> [jt: fixed up style issues reported by checkpatch] Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-09 14:01:56 +07:00
&dev_attr_ad_actor_system.attr,
&dev_attr_ad_user_port_key.attr,
NULL,
};
static const struct attribute_group bonding_group = {
.name = "bonding",
.attrs = per_bond_attrs,
};
/* Initialize sysfs. This sets up the bonding_masters file in
* /sys/class/net.
*/
int bond_create_sysfs(struct bond_net *bn)
{
int ret;
bn->class_attr_bonding_masters = class_attr_bonding_masters;
bonding: Add a forgetten sysfs_attr_init on class_attr_bonding_masters When I made class_attr_bonding_matters per network namespace and dynamically allocated I overlooked the need for calling sysfs_attr_init. Oops. This fixes the following lockdep splat: [ 5.749651] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 5.749655] bonding: MII link monitoring set to 100 ms [ 5.749676] BUG: key f49a831c not in .data! [ 5.749677] ------------[ cut here ]------------ [ 5.749752] WARNING: at kernel/lockdep.c:2897 lockdep_init_map+0x1c3/0x460() [ 5.749809] Hardware name: ProLiant BL460c G1 [ 5.749862] Modules linked in: bonding(+) [ 5.749978] Pid: 3177, comm: modprobe Not tainted 3.1.0-rc9-02177-gf2d1a4e-dirty #1157 [ 5.750066] Call Trace: [ 5.750120] [<c1352c2f>] ? printk+0x18/0x21 [ 5.750176] [<c103112d>] warn_slowpath_common+0x6d/0xa0 [ 5.750231] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750287] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750342] [<c103117d>] warn_slowpath_null+0x1d/0x20 [ 5.750398] [<c1060133>] lockdep_init_map+0x1c3/0x460 [ 5.750453] [<c1355ddd>] ? _raw_spin_unlock+0x1d/0x20 [ 5.750510] [<c11255c8>] ? sysfs_new_dirent+0x68/0x110 [ 5.750565] [<c1124d4b>] sysfs_add_file_mode+0x8b/0xe0 [ 5.750621] [<c1124db3>] sysfs_add_file+0x13/0x20 [ 5.750675] [<c1124e7c>] sysfs_create_file+0x1c/0x20 [ 5.750737] [<c1208f09>] class_create_file+0x19/0x20 [ 5.750794] [<c12c186f>] netdev_class_create_file+0xf/0x20 [ 5.750853] [<f85deaf4>] bond_create_sysfs+0x44/0x90 [bonding] [ 5.750911] [<f8410947>] ? bond_create_proc_dir+0x1e/0x3e [bonding] [ 5.750970] [<f841007e>] bond_net_init+0x7e/0x87 [bonding] [ 5.751026] [<f8410000>] ? 0xf840ffff [ 5.751080] [<c12abc7a>] ops_init.clone.4+0xba/0x100 [ 5.751135] [<c12abdb2>] ? register_pernet_subsys+0x12/0x30 [ 5.751191] [<c12abd03>] register_pernet_operations.clone.3+0x43/0x80 [ 5.751249] [<c12abdb9>] register_pernet_subsys+0x19/0x30 [ 5.751306] [<f84108b9>] bonding_init+0x832/0x8a2 [bonding] [ 5.751363] [<c10011f0>] do_one_initcall+0x30/0x160 [ 5.751420] [<f8410087>] ? bond_net_init+0x87/0x87 [bonding] [ 5.751477] [<c106d5cf>] sys_init_module+0xef/0x1890 [ 5.751533] [<c1356490>] sysenter_do_call+0x12/0x36 [ 5.751588] ---[ end trace 89f492d83a7f5006 ]--- Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Tested-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-10-22 05:43:07 +07:00
sysfs_attr_init(&bn->class_attr_bonding_masters.attr);
sysfs: make attr namespace interface less convoluted sysfs ns (namespace) implementation became more convoluted than necessary while trying to hide ns information from visible interface. The relatively recent attr ns support is a good example. * attr ns tag is determined by sysfs_ops->namespace() callback while dir tag is determined by kobj_type->namespace(). The placement is arbitrary. * Instead of performing operations with explicit ns tag, the namespace callback is routed through sysfs_attr_ns(), sysfs_ops->namespace(), class_attr_namespace(), class_attr->namespace(). It's not simpler in any sense. The only thing this convolution does is traversing the whole stack backwards. The namespace callbacks are unncessary because the operations involved are inherently synchronous. The information can be provided in in straight-forward top-down direction and reversing that direction is unnecessary and against basic design principles. This backward interface is unnecessarily convoluted and hinders properly separating out sysfs from driver model / kobject for proper layering. This patch updates attr ns support such that * sysfs_ops->namespace() and class_attr->namespace() are dropped. * sysfs_{create|remove}_file_ns(), which take explicit @ns param, are added and sysfs_{create|remove}_file() are now simple wrappers around the ns aware functions. * ns handling is dropped from sysfs_chmod_file(). Nobody uses it at this point. sysfs_chmod_file_ns() can be added later if necessary. * Explicit @ns is propagated through class_{create|remove}_file_ns() and netdev_class_{create|remove}_file_ns(). * driver/net/bonding which is currently the only user of attr namespace is updated to use netdev_class_{create|remove}_file_ns() with @bh->net as the ns tag instead of using the namespace callback. This patch should be an equivalent conversion without any functional difference. It makes the code easier to follow, reduces lines of code a bit and helps proper separation and layering. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kay Sievers <kay@vrfy.org> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-09-12 09:29:04 +07:00
ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters,
bn->net);
/* Permit multiple loads of the module by ignoring failures to
2007-01-20 09:15:47 +07:00
* create the bonding_masters sysfs file. Bonding devices
* created by second or subsequent loads of the module will
* not be listed in, or controllable by, bonding_masters, but
* will have the usual "bonding" sysfs directory.
*
* This is done to preserve backwards compatibility for
* initscripts/sysconfig, which load bonding multiple times to
* configure multiple bonding devices.
*/
if (ret == -EEXIST) {
/* Is someone being kinky and naming a device bonding_master? */
if (__dev_get_by_name(bn->net,
class_attr_bonding_masters.attr.name))
pr_err("network device named %s already exists in sysfs\n",
class_attr_bonding_masters.attr.name);
ret = 0;
2007-01-20 09:15:47 +07:00
}
return ret;
}
/* Remove /sys/class/net/bonding_masters. */
void bond_destroy_sysfs(struct bond_net *bn)
{
sysfs: make attr namespace interface less convoluted sysfs ns (namespace) implementation became more convoluted than necessary while trying to hide ns information from visible interface. The relatively recent attr ns support is a good example. * attr ns tag is determined by sysfs_ops->namespace() callback while dir tag is determined by kobj_type->namespace(). The placement is arbitrary. * Instead of performing operations with explicit ns tag, the namespace callback is routed through sysfs_attr_ns(), sysfs_ops->namespace(), class_attr_namespace(), class_attr->namespace(). It's not simpler in any sense. The only thing this convolution does is traversing the whole stack backwards. The namespace callbacks are unncessary because the operations involved are inherently synchronous. The information can be provided in in straight-forward top-down direction and reversing that direction is unnecessary and against basic design principles. This backward interface is unnecessarily convoluted and hinders properly separating out sysfs from driver model / kobject for proper layering. This patch updates attr ns support such that * sysfs_ops->namespace() and class_attr->namespace() are dropped. * sysfs_{create|remove}_file_ns(), which take explicit @ns param, are added and sysfs_{create|remove}_file() are now simple wrappers around the ns aware functions. * ns handling is dropped from sysfs_chmod_file(). Nobody uses it at this point. sysfs_chmod_file_ns() can be added later if necessary. * Explicit @ns is propagated through class_{create|remove}_file_ns() and netdev_class_{create|remove}_file_ns(). * driver/net/bonding which is currently the only user of attr namespace is updated to use netdev_class_{create|remove}_file_ns() with @bh->net as the ns tag instead of using the namespace callback. This patch should be an equivalent conversion without any functional difference. It makes the code easier to follow, reduces lines of code a bit and helps proper separation and layering. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kay Sievers <kay@vrfy.org> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-09-12 09:29:04 +07:00
netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);
}
/* Initialize sysfs for each bond. This sets up and registers
* the 'bondctl' directory for each individual bond under /sys/class/net.
*/
void bond_prepare_sysfs_group(struct bonding *bond)
{
bond->dev->sysfs_groups[0] = &bonding_group;
}