linux_dsm_epyc7002/net/tipc/bearer.c

1072 lines
25 KiB
C
Raw Normal View History

/*
* net/tipc/bearer.c: TIPC bearer code
*
* Copyright (c) 1996-2006, 2013-2014, Ericsson AB
* Copyright (c) 2004-2006, 2010-2013, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <net/sock.h>
#include "core.h"
#include "bearer.h"
#include "link.h"
#include "discover.h"
#include "bcast.h"
tipc: add InfiniBand media type Add InfiniBand media type based on the ethernet media type. The only real difference is that in case of InfiniBand, we need the entire 20 bytes of space reserved for media addresses, so the TIPC media type ID is not explicitly stored in the packet payload. Sample output of tipc-config: # tipc-config -v -addr -netid -nt=all -p -m -b -n -ls node address: <10.1.4> current network id: 4711 Type Lower Upper Port Identity Publication Scope 0 167776257 167776257 <10.1.1:1855512577> 1855512578 cluster 167776260 167776260 <10.1.4:1216454657> 1216454658 zone 1 1 1 <10.1.4:1216479235> 1216479236 node Ports: 1216479235: bound to {1,1} 1216454657: bound to {0,167776260} Media: eth ib Bearers: ib:ib0 Nodes known: <10.1.1>: up Link <broadcast-link> Window:20 packets RX packets:0 fragments:0/0 bundles:0/0 TX packets:0 fragments:0/0 bundles:0/0 RX naks:0 defs:0 dups:0 TX naks:0 acks:0 dups:0 Congestion bearer:0 link:0 Send queue max:0 avg:0 Link <10.1.4:ib0-10.1.1:ib0> ACTIVE MTU:2044 Priority:10 Tolerance:1500 ms Window:50 packets RX packets:80 fragments:0/0 bundles:0/0 TX packets:40 fragments:0/0 bundles:0/0 TX profile sample:22 packets average:54 octets 0-64:100% -256:0% -1024:0% -4096:0% -16384:0% -32768:0% -66000:0% RX states:410 probes:213 naks:0 defs:0 dups:0 TX states:410 probes:197 naks:0 acks:0 dups:0 Congestion bearer:0 link:0 Send queue max:1 avg:0 Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-04-17 13:18:28 +07:00
#define MAX_ADDR_STR 60
static struct tipc_media * const media_info_array[] = {
&eth_media_info,
#ifdef CONFIG_TIPC_MEDIA_IB
&ib_media_info,
#endif
#ifdef CONFIG_TIPC_MEDIA_UDP
&udp_media_info,
#endif
NULL
};
static const struct nla_policy
tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
[TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_BEARER_NAME] = {
.type = NLA_STRING,
.len = TIPC_MAX_BEARER_NAME
},
[TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
[TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
};
static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
[TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
[TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
};
static void bearer_disable(struct net *net, struct tipc_bearer *b);
/**
* tipc_media_find - locates specified media object by name
*/
struct tipc_media *tipc_media_find(const char *name)
{
u32 i;
for (i = 0; media_info_array[i] != NULL; i++) {
if (!strcmp(media_info_array[i]->name, name))
break;
}
return media_info_array[i];
}
/**
* media_find_id - locates specified media object by type identifier
*/
static struct tipc_media *media_find_id(u8 type)
{
u32 i;
for (i = 0; media_info_array[i] != NULL; i++) {
if (media_info_array[i]->type_id == type)
break;
}
return media_info_array[i];
}
/**
* tipc_media_addr_printf - record media address in print buffer
*/
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 11:50:23 +07:00
void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a)
{
char addr_str[MAX_ADDR_STR];
struct tipc_media *m;
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 11:50:23 +07:00
int ret;
m = media_find_id(a->media_id);
if (m && !m->addr2str(a, addr_str, sizeof(addr_str)))
ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str);
else {
u32 i;
ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id);
for (i = 0; i < sizeof(a->value); i++)
ret += scnprintf(buf - ret, len + ret,
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 11:50:23 +07:00
"-%02x", a->value[i]);
}
}
/**
* bearer_name_validate - validate & (optionally) deconstruct bearer name
* @name: ptr to bearer name string
* @name_parts: ptr to area for bearer name components (or NULL if not needed)
*
* Returns 1 if bearer name is valid, otherwise 0.
*/
static int bearer_name_validate(const char *name,
struct tipc_bearer_names *name_parts)
{
char name_copy[TIPC_MAX_BEARER_NAME];
char *media_name;
char *if_name;
u32 media_len;
u32 if_len;
/* copy bearer name & ensure length is OK */
name_copy[TIPC_MAX_BEARER_NAME - 1] = 0;
/* need above in case non-Posix strncpy() doesn't pad with nulls */
strncpy(name_copy, name, TIPC_MAX_BEARER_NAME);
if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0)
return 0;
/* ensure all component parts of bearer name are present */
media_name = name_copy;
if_name = strchr(media_name, ':');
if (if_name == NULL)
return 0;
*(if_name++) = 0;
media_len = if_name - media_name;
if_len = strlen(if_name) + 1;
/* validate component parts of bearer name */
if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) ||
(if_len <= 1) || (if_len > TIPC_MAX_IF_NAME))
return 0;
/* return bearer name components, if necessary */
if (name_parts) {
strcpy(name_parts->media_name, media_name);
strcpy(name_parts->if_name, if_name);
}
return 1;
}
/**
* tipc_bearer_find - locates bearer object with matching bearer name
*/
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
b = rtnl_dereference(tn->bearer_list[i]);
if (b && (!strcmp(b->name, name)))
return b;
}
return NULL;
}
void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (b)
tipc_disc_add_dest(b->link_req);
rcu_read_unlock();
}
void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (b)
tipc_disc_remove_dest(b->link_req);
rcu_read_unlock();
}
/**
* tipc_enable_bearer - enable bearer with the given name
*/
static int tipc_enable_bearer(struct net *net, const char *name,
u32 disc_domain, u32 priority,
struct nlattr *attr[])
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
struct tipc_media *m;
struct tipc_bearer_names b_names;
char addr_string[16];
u32 bearer_id;
u32 with_this_prio;
u32 i;
int res = -EINVAL;
if (!tn->own_addr) {
pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
name);
return -ENOPROTOOPT;
}
if (!bearer_name_validate(name, &b_names)) {
pr_warn("Bearer <%s> rejected, illegal name\n", name);
return -EINVAL;
}
if (tipc_addr_domain_valid(disc_domain) &&
(disc_domain != tn->own_addr)) {
if (tipc_in_scope(disc_domain, tn->own_addr)) {
disc_domain = tn->own_addr & TIPC_CLUSTER_MASK;
res = 0; /* accept any node in own cluster */
} else if (in_own_cluster_exact(net, disc_domain))
res = 0; /* accept specified node in own cluster */
}
if (res) {
pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
name);
return -EINVAL;
}
if ((priority > TIPC_MAX_LINK_PRI) &&
(priority != TIPC_MEDIA_LINK_PRI)) {
pr_warn("Bearer <%s> rejected, illegal priority\n", name);
return -EINVAL;
}
m = tipc_media_find(b_names.media_name);
if (!m) {
pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
name, b_names.media_name);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
if (priority == TIPC_MEDIA_LINK_PRI)
priority = m->priority;
restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
b = rtnl_dereference(tn->bearer_list[i]);
if (!b) {
bearer_id = i;
continue;
}
if (!strcmp(name, b->name)) {
pr_warn("Bearer <%s> rejected, already enabled\n",
name);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
if ((b->priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
pr_warn("Bearer <%s> rejected, duplicate priority\n",
name);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
name, priority + 1, priority);
goto restart;
}
}
if (bearer_id >= MAX_BEARERS) {
pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
name, MAX_BEARERS);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
b = kzalloc(sizeof(*b), GFP_ATOMIC);
if (!b)
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -ENOMEM;
strcpy(b->name, name);
b->media = m;
res = m->enable_media(net, b, attr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
b->identity = bearer_id;
b->tolerance = m->tolerance;
b->window = m->window;
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = priority;
res = tipc_disc_create(net, b, &b->bcast_addr);
if (res) {
bearer_disable(net, b);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
return -EINVAL;
}
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
return res;
}
/**
tipc: remove interface state mirroring in bearer struct 'tipc_bearer' is a generic representation of the underlying media type, and exists in a one-to-one relationship to each interface TIPC is using. The struct contains a 'blocked' flag that mirrors the operational and execution state of the represented interface, and is updated through notification calls from the latter. The users of tipc_bearer are checking this flag before each attempt to send a packet via the interface. This state mirroring serves no purpose in the current code base. TIPC links will not discover a media failure any faster through this mechanism, and in reality the flag only adds overhead at packet sending and reception. Furthermore, the fact that the flag needs to be protected by a spinlock aggregated into tipc_bearer has turned out to cause a serious and completely unnecessary deadlock problem. CPU0 CPU1 ---- ---- Time 0: bearer_disable() link_timeout() Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue() Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr) Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock) Time 4: del_timer_sync(&req->timer) I.e., del_timer_sync() on CPU0 never returns, because the timer handler on CPU1 is waiting for the bearer lock. We eliminate the 'blocked' flag from struct tipc_bearer, along with all tests on this flag. This not only resolves the deadlock, but also simplifies and speeds up the data path execution of TIPC. It also fits well into our ongoing effort to make the locking policy simpler and more manageable. An effect of this change is that we can get rid of functions such as tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer(). We replace the latter with a new function, tipc_reset_bearer(), which resets all links associated to the bearer immediately after an interface goes down. A user might notice one slight change in link behaviour after this change. When an interface goes down, (e.g. through a NETDEV_DOWN event) all attached links will be reset immediately, instead of leaving it to each link to detect the failure through a timer-driven mechanism. We consider this an improvement, and see no obvious risks with the new behavior. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 22:08:00 +07:00
* tipc_reset_bearer - Reset all links established over this bearer
*/
static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
{
pr_info("Resetting bearer <%s>\n", b->name);
tipc_node_delete_links(net, b->identity);
tipc_disc_reset(net, b);
return 0;
}
/**
* bearer_disable
*
tipc: purge tipc_net_lock lock Now tipc routing hierarchy comprises the structures 'node', 'link'and 'bearer'. The whole hierarchy is protected by a big read/write lock, tipc_net_lock, to ensure that nothing is added or removed while code is accessing any of these structures. Obviously the locking policy makes node, link and bearer components closely bound together so that their relationship becomes unnecessarily complex. In the worst case, such locking policy not only has a negative influence on performance, but also it's prone to lead to deadlock occasionally. In order o decouple the complex relationship between bearer and node as well as link, the locking policy is adjusted as follows: - Bearer level RTNL lock is used on update side, and RCU is used on read side. Meanwhile, all bearer instances including broadcast bearer are saved into bearer_list array. - Node and link level All node instances are saved into two tipc_node_list and node_htable lists. The two lists are protected by node_list_lock on write side, and they are guarded with RCU lock on read side. All members in node structure including link instances are protected by node spin lock. - The relationship between bearer and node When link accesses bearer, it first needs to find the bearer with its bearer identity from the bearer_list array. When bearer accesses node, it can iterate the node_htable hash list with the node address to find the corresponding node. In the new locking policy, every component has its private locking solution and the relationship between bearer and node is very simple, that is, they can find each other with node address or bearer identity from node_htable hash list or bearer_list array. Until now above all changes have been done, so tipc_net_lock can be removed safely. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Erik Hugne <erik.hugne@ericsson.com> Tested-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-21 09:55:48 +07:00
* Note: This routine assumes caller holds RTNL lock.
*/
static void bearer_disable(struct net *net, struct tipc_bearer *b)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 i;
pr_info("Disabling bearer <%s>\n", b->name);
b->media->disable_media(b);
tipc: avoid possible deadlock while enable and disable bearer We met lockdep warning when enable and disable the bearer for commands such as: tipc-config -netid=1234 -addr=1.1.3 -be=eth:eth0 tipc-config -netid=1234 -addr=1.1.3 -bd=eth:eth0 --------------------------------------------------- [ 327.693595] ====================================================== [ 327.693994] [ INFO: possible circular locking dependency detected ] [ 327.694519] 3.11.0-rc3-wwd-default #4 Tainted: G O [ 327.694882] ------------------------------------------------------- [ 327.695385] tipc-config/5825 is trying to acquire lock: [ 327.695754] (((timer))#2){+.-...}, at: [<ffffffff8105be80>] del_timer_sync+0x0/0xd0 [ 327.696018] [ 327.696018] but task is already holding lock: [ 327.696018] (&(&b_ptr->lock)->rlock){+.-...}, at: [<ffffffffa02be58d>] bearer_disable+ 0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] which lock already depends on the new lock. [ 327.696018] [ 327.696018] [ 327.696018] the existing dependency chain (in reverse order) is: [ 327.696018] [ 327.696018] -> #1 (&(&b_ptr->lock)->rlock){+.-...}: [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff814d65b1>] _raw_spin_lock_bh+0x41/0x80 [ 327.696018] [<ffffffffa02c5d48>] disc_timeout+0x18/0xd0 [tipc] [ 327.696018] [<ffffffff8105b92a>] call_timer_fn+0xda/0x1e0 [ 327.696018] [<ffffffff8105bcd7>] run_timer_softirq+0x2a7/0x2d0 [ 327.696018] [<ffffffff8105379a>] __do_softirq+0x16a/0x2e0 [ 327.696018] [<ffffffff81053a35>] irq_exit+0xd5/0xe0 [ 327.696018] [<ffffffff81033005>] smp_apic_timer_interrupt+0x45/0x60 [ 327.696018] [<ffffffff814df4af>] apic_timer_interrupt+0x6f/0x80 [ 327.696018] [<ffffffff8100b70e>] arch_cpu_idle+0x1e/0x30 [ 327.696018] [<ffffffff810a039d>] cpu_idle_loop+0x1fd/0x280 [ 327.696018] [<ffffffff810a043e>] cpu_startup_entry+0x1e/0x20 [ 327.696018] [<ffffffff81031589>] start_secondary+0x89/0x90 [ 327.696018] [ 327.696018] -> #0 (((timer))#2){+.-...}: [ 327.696018] [<ffffffff810b33fe>] check_prev_add+0x43e/0x4b0 [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff8105bebd>] del_timer_sync+0x3d/0xd0 [ 327.696018] [<ffffffffa02c5855>] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [<ffffffffa02be59f>] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [<ffffffffa02be74f>] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [<ffffffffa02bfb32>] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [<ffffffffa02c8c79>] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [<ffffffff8143e898>] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [<ffffffff8143ed30>] genl_rcv_msg+0x70/0xd0 [ 327.696018] [<ffffffff8143d4c9>] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [<ffffffff8143e617>] genl_rcv+0x27/0x40 [ 327.696018] [<ffffffff8143d21e>] netlink_unicast+0x15e/0x1b0 [ 327.696018] [<ffffffff8143ddcf>] netlink_sendmsg+0x22f/0x400 [ 327.696018] [<ffffffff813f7836>] __sock_sendmsg+0x66/0x80 [ 327.696018] [<ffffffff813f7957>] sock_aio_write+0x107/0x120 [ 327.696018] [<ffffffff8117f76d>] do_sync_write+0x7d/0xc0 [ 327.696018] [<ffffffff8117fc56>] vfs_write+0x186/0x190 [ 327.696018] [<ffffffff811803e0>] SyS_write+0x60/0xb0 [ 327.696018] [<ffffffff814de852>] system_call_fastpath+0x16/0x1b [ 327.696018] [ 327.696018] other info that might help us debug this: [ 327.696018] [ 327.696018] Possible unsafe locking scenario: [ 327.696018] [ 327.696018] CPU0 CPU1 [ 327.696018] ---- ---- [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] lock(&(&b_ptr->lock)->rlock); [ 327.696018] lock(((timer))#2); [ 327.696018] [ 327.696018] *** DEADLOCK *** [ 327.696018] [ 327.696018] 5 locks held by tipc-config/5825: [ 327.696018] #0: (cb_lock){++++++}, at: [<ffffffff8143e608>] genl_rcv+0x18/0x40 [ 327.696018] #1: (genl_mutex){+.+.+.}, at: [<ffffffff8143ed66>] genl_rcv_msg+0xa6/0xd0 [ 327.696018] #2: (config_mutex){+.+.+.}, at: [<ffffffffa02bf889>] tipc_cfg_do_cmd+0x39/ 0x550 [tipc] [ 327.696018] #3: (tipc_net_lock){++.-..}, at: [<ffffffffa02be738>] tipc_disable_bearer+ 0x18/0x60 [tipc] [ 327.696018] #4: (&(&b_ptr->lock)->rlock){+.-...}, at: [<ffffffffa02be58d>] bearer_disable+0xdd/0x120 [tipc] [ 327.696018] [ 327.696018] stack backtrace: [ 327.696018] CPU: 2 PID: 5825 Comm: tipc-config Tainted: G O 3.11.0-rc3-wwd- default #4 [ 327.696018] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 327.696018] 00000000ffffffff ffff880037fa77a8 ffffffff814d03dd 0000000000000000 [ 327.696018] ffff880037fa7808 ffff880037fa77e8 ffffffff810b1c4f 0000000037fa77e8 [ 327.696018] ffff880037fa7808 ffff880037e4db40 0000000000000000 ffff880037e4e318 [ 327.696018] Call Trace: [ 327.696018] [<ffffffff814d03dd>] dump_stack+0x4d/0xa0 [ 327.696018] [<ffffffff810b1c4f>] print_circular_bug+0x10f/0x120 [ 327.696018] [<ffffffff810b33fe>] check_prev_add+0x43e/0x4b0 [ 327.696018] [<ffffffff810b3b4d>] validate_chain+0x6dd/0x870 [ 327.696018] [<ffffffff81087a28>] ? sched_clock_cpu+0xd8/0x110 [ 327.696018] [<ffffffff810b40bb>] __lock_acquire+0x3db/0x670 [ 327.696018] [<ffffffff810b4453>] lock_acquire+0x103/0x130 [ 327.696018] [<ffffffff8105be80>] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [<ffffffff8105bebd>] del_timer_sync+0x3d/0xd0 [ 327.696018] [<ffffffff8105be80>] ? try_to_del_timer_sync+0x70/0x70 [ 327.696018] [<ffffffffa02c5855>] tipc_disc_delete+0x15/0x30 [tipc] [ 327.696018] [<ffffffffa02be59f>] bearer_disable+0xef/0x120 [tipc] [ 327.696018] [<ffffffffa02be74f>] tipc_disable_bearer+0x2f/0x60 [tipc] [ 327.696018] [<ffffffffa02bfb32>] tipc_cfg_do_cmd+0x2e2/0x550 [tipc] [ 327.696018] [<ffffffff81218783>] ? security_capable+0x13/0x20 [ 327.696018] [<ffffffffa02c8c79>] handle_cmd+0x49/0xe0 [tipc] [ 327.696018] [<ffffffff8143e898>] genl_family_rcv_msg+0x268/0x340 [ 327.696018] [<ffffffff8143ed30>] genl_rcv_msg+0x70/0xd0 [ 327.696018] [<ffffffff8143ecc0>] ? genl_lock+0x20/0x20 [ 327.696018] [<ffffffff8143d4c9>] netlink_rcv_skb+0x89/0xb0 [ 327.696018] [<ffffffff8143e608>] ? genl_rcv+0x18/0x40 [ 327.696018] [<ffffffff8143e617>] genl_rcv+0x27/0x40 [ 327.696018] [<ffffffff8143d21e>] netlink_unicast+0x15e/0x1b0 [ 327.696018] [<ffffffff81289d7c>] ? memcpy_fromiovec+0x6c/0x90 [ 327.696018] [<ffffffff8143ddcf>] netlink_sendmsg+0x22f/0x400 [ 327.696018] [<ffffffff813f7836>] __sock_sendmsg+0x66/0x80 [ 327.696018] [<ffffffff813f7957>] sock_aio_write+0x107/0x120 [ 327.696018] [<ffffffff813fe29c>] ? release_sock+0x8c/0xa0 [ 327.696018] [<ffffffff8117f76d>] do_sync_write+0x7d/0xc0 [ 327.696018] [<ffffffff8117fa24>] ? rw_verify_area+0x54/0x100 [ 327.696018] [<ffffffff8117fc56>] vfs_write+0x186/0x190 [ 327.696018] [<ffffffff811803e0>] SyS_write+0x60/0xb0 [ 327.696018] [<ffffffff814de852>] system_call_fastpath+0x16/0x1b ----------------------------------------------------------------------- The problem is that the tipc_link_delete() will cancel the timer disc_timeout() when the b_ptr->lock is hold, but the disc_timeout() still call b_ptr->lock to finish the work, so the dead lock occurs. We should unlock the b_ptr->lock when del the disc_timeout(). Remove link_timeout() still met the same problem, the patch: http://article.gmane.org/gmane.network.tipc.general/4380 fix the problem, so no need to send patch for fix link_timeout() deadlock warming. Signed-off-by: Wang Weidong <wangweidong1@huawei.com> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-09 16:12:58 +07:00
tipc_node_delete_links(net, b->identity);
RCU_INIT_POINTER(b->media_ptr, NULL);
if (b->link_req)
tipc_disc_delete(b->link_req);
for (i = 0; i < MAX_BEARERS; i++) {
if (b == rtnl_dereference(tn->bearer_list[i])) {
RCU_INIT_POINTER(tn->bearer_list[i], NULL);
break;
}
}
kfree_rcu(b, rcu);
}
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attr[])
{
struct net_device *dev;
char *driver_name = strchr((const char *)b->name, ':') + 1;
/* Find device with specified name */
dev = dev_get_by_name(net, driver_name);
if (!dev)
return -ENODEV;
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
b->bcast_addr.broadcast = 1;
b->mtu = dev->mtu;
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr);
rcu_assign_pointer(dev->tipc_ptr, b);
return 0;
}
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
*
* Mark L2 bearer as inactive so that incoming buffers are thrown away
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev;
dev = (struct net_device *)rtnl_dereference(b->media_ptr);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
synchronize_net();
dev_put(dev);
}
/**
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
* tipc_l2_send_msg - send a TIPC packet out over an L2 interface
* @buf: the packet to be sent
* @b: the bearer through which the packet is to be sent
* @dest: peer destination address
*/
int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
struct tipc_bearer *b, struct tipc_media_addr *dest)
{
struct net_device *dev;
int delta;
dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
if (!dev)
return 0;
delta = dev->hard_header_len - skb_headroom(skb);
if ((delta > 0) &&
pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
kfree_skb(skb);
return 0;
}
skb_reset_network_header(skb);
skb->dev = dev;
skb->protocol = htons(ETH_P_TIPC);
dev_hard_header(skb, dev, ETH_P_TIPC, dest->value,
dev->dev_addr, skb->len);
dev_queue_xmit(skb);
return 0;
}
int tipc_bearer_mtu(struct net *net, u32 bearer_id)
{
int mtu = 0;
struct tipc_bearer *b;
rcu_read_lock();
b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
if (b)
mtu = b->mtu;
rcu_read_unlock();
return mtu;
}
/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
*/
void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
struct sk_buff *skb,
struct tipc_media_addr *dest)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_bearer *b;
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (likely(b))
b->media->send_msg(net, skb, b, dest);
rcu_read_unlock();
}
/* tipc_bearer_xmit() -send buffer to destination over bearer
*/
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
struct tipc_media_addr *dst)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
if (skb_queue_empty(xmitq))
return;
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (likely(b)) {
skb_queue_walk_safe(xmitq, skb, tmp) {
__skb_dequeue(xmitq);
b->media->send_msg(net, skb, b, dst);
}
}
rcu_read_unlock();
}
/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations
*/
void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq)
{
struct tipc_net *tn = tipc_net(net);
int net_id = tn->net_id;
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
struct tipc_msg *hdr;
rcu_read_lock();
b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
if (likely(b)) {
skb_queue_walk_safe(xmitq, skb, tmp) {
hdr = buf_msg(skb);
msg_set_non_seq(hdr, 1);
msg_set_mc_netid(hdr, net_id);
__skb_dequeue(xmitq);
b->media->send_msg(net, skb, b, &b->bcast_addr);
}
}
rcu_read_unlock();
}
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
* @buf: the received packet
* @dev: the net device that the packet was received on
* @pt: the packet_type structure which was used to register this handler
* @orig_dev: the original receive net device in case the device is a bond
*
* Accept only packets explicitly sent to this node, or broadcast packets;
* ignores packets sent using interface multicast, and traffic sent to other
* nodes (which can happen if interface is running in promiscuous mode).
*/
static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct tipc_bearer *b;
rcu_read_lock();
b = rcu_dereference_rtnl(dev->tipc_ptr);
if (likely(b)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
tipc_rcv(dev_net(dev), buf, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
}
rcu_read_unlock();
kfree_skb(buf);
return NET_RX_DROP;
}
/**
* tipc_l2_device_event - handle device events from network device
* @nb: the context of the notification
* @evt: the type of event
* @ptr: the net device that the event was on
*
* This function is called by the Ethernet driver in case of link
* change event.
*/
static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
struct tipc_bearer *b;
b = rtnl_dereference(dev->tipc_ptr);
if (!b)
return NOTIFY_DONE;
b->mtu = dev->mtu;
switch (evt) {
case NETDEV_CHANGE:
if (netif_carrier_ok(dev))
break;
case NETDEV_GOING_DOWN:
case NETDEV_CHANGEMTU:
tipc_reset_bearer(net, b);
break;
case NETDEV_CHANGEADDR:
b->media->raw2addr(b, &b->addr,
(char *)dev->dev_addr);
tipc_reset_bearer(net, b);
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
bearer_disable(dev_net(dev), b);
break;
}
return NOTIFY_OK;
}
static struct packet_type tipc_packet_type __read_mostly = {
.type = htons(ETH_P_TIPC),
.func = tipc_l2_rcv_msg,
};
static struct notifier_block notifier = {
.notifier_call = tipc_l2_device_event,
.priority = 0,
};
int tipc_bearer_setup(void)
{
int err;
err = register_netdevice_notifier(&notifier);
if (err)
return err;
dev_add_pack(&tipc_packet_type);
return 0;
}
void tipc_bearer_cleanup(void)
{
unregister_netdevice_notifier(&notifier);
dev_remove_pack(&tipc_packet_type);
}
void tipc_bearer_stop(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
b = rtnl_dereference(tn->bearer_list[i]);
if (b) {
bearer_disable(net, b);
tn->bearer_list[i] = NULL;
}
}
}
/* Caller should hold rtnl_lock to protect the bearer */
static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
struct tipc_bearer *bearer, int nlflags)
{
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
nlflags, TIPC_NL_BEARER_GET);
if (!hdr)
return -EMSGSIZE;
attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER);
if (!attrs)
goto msg_full;
if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name))
goto attr_msg_full;
prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP);
if (!prop)
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
goto prop_msg_full;
nla_nest_end(msg->skb, prop);
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
return 0;
prop_msg_full:
nla_nest_cancel(msg->skb, prop);
attr_msg_full:
nla_nest_cancel(msg->skb, attrs);
msg_full:
genlmsg_cancel(msg->skb, hdr);
return -EMSGSIZE;
}
int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int err;
int i = cb->args[0];
struct tipc_bearer *bearer;
struct tipc_nl_msg msg;
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
if (i == MAX_BEARERS)
return 0;
msg.skb = skb;
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
rtnl_lock();
for (i = 0; i < MAX_BEARERS; i++) {
bearer = rtnl_dereference(tn->bearer_list[i]);
if (!bearer)
continue;
err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI);
if (err)
break;
}
rtnl_unlock();
cb->args[0] = i;
return skb->len;
}
int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
struct sk_buff *rep;
struct tipc_bearer *bearer;
struct tipc_nl_msg msg;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = genl_info_net(info);
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
tipc_nl_bearer_policy);
if (err)
return err;
if (!attrs[TIPC_NLA_BEARER_NAME])
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!rep)
return -ENOMEM;
msg.skb = rep;
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
rtnl_lock();
bearer = tipc_bearer_find(net, name);
if (!bearer) {
err = -EINVAL;
goto err_out;
}
err = __tipc_nl_add_bearer(&msg, bearer, 0);
if (err)
goto err_out;
rtnl_unlock();
return genlmsg_reply(rep, info);
err_out:
rtnl_unlock();
nlmsg_free(rep);
return err;
}
int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
struct tipc_bearer *bearer;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
tipc_nl_bearer_policy);
if (err)
return err;
if (!attrs[TIPC_NLA_BEARER_NAME])
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
rtnl_lock();
bearer = tipc_bearer_find(net, name);
if (!bearer) {
rtnl_unlock();
return -EINVAL;
}
bearer_disable(net, bearer);
rtnl_unlock();
return 0;
}
int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *bearer;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 domain;
u32 prio;
prio = TIPC_MEDIA_LINK_PRI;
domain = tn->own_addr & TIPC_CLUSTER_MASK;
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
tipc_nl_bearer_policy);
if (err)
return err;
if (!attrs[TIPC_NLA_BEARER_NAME])
return -EINVAL;
bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
if (attrs[TIPC_NLA_BEARER_DOMAIN])
domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]);
if (attrs[TIPC_NLA_BEARER_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
props);
if (err)
return err;
if (props[TIPC_NLA_PROP_PRIO])
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
rtnl_lock();
err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
if (err) {
rtnl_unlock();
return err;
}
rtnl_unlock();
return 0;
}
int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
struct tipc_bearer *b;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
struct net *net = sock_net(skb->sk);
if (!info->attrs[TIPC_NLA_BEARER])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
info->attrs[TIPC_NLA_BEARER],
tipc_nl_bearer_policy);
if (err)
return err;
if (!attrs[TIPC_NLA_BEARER_NAME])
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
rtnl_lock();
b = tipc_bearer_find(net, name);
if (!b) {
rtnl_unlock();
return -EINVAL;
}
if (attrs[TIPC_NLA_BEARER_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP],
props);
if (err) {
rtnl_unlock();
return err;
}
if (props[TIPC_NLA_PROP_TOL])
b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
}
rtnl_unlock();
return 0;
}
static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
struct tipc_media *media, int nlflags)
{
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
nlflags, TIPC_NL_MEDIA_GET);
if (!hdr)
return -EMSGSIZE;
attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA);
if (!attrs)
goto msg_full;
if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name))
goto attr_msg_full;
prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP);
if (!prop)
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
goto prop_msg_full;
nla_nest_end(msg->skb, prop);
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
return 0;
prop_msg_full:
nla_nest_cancel(msg->skb, prop);
attr_msg_full:
nla_nest_cancel(msg->skb, attrs);
msg_full:
genlmsg_cancel(msg->skb, hdr);
return -EMSGSIZE;
}
int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int err;
int i = cb->args[0];
struct tipc_nl_msg msg;
if (i == MAX_MEDIA)
return 0;
msg.skb = skb;
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
rtnl_lock();
for (; media_info_array[i] != NULL; i++) {
err = __tipc_nl_add_media(&msg, media_info_array[i],
NLM_F_MULTI);
if (err)
break;
}
rtnl_unlock();
cb->args[0] = i;
return skb->len;
}
int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
struct tipc_nl_msg msg;
struct tipc_media *media;
struct sk_buff *rep;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
tipc_nl_media_policy);
if (err)
return err;
if (!attrs[TIPC_NLA_MEDIA_NAME])
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!rep)
return -ENOMEM;
msg.skb = rep;
msg.portid = info->snd_portid;
msg.seq = info->snd_seq;
rtnl_lock();
media = tipc_media_find(name);
if (!media) {
err = -EINVAL;
goto err_out;
}
err = __tipc_nl_add_media(&msg, media, 0);
if (err)
goto err_out;
rtnl_unlock();
return genlmsg_reply(rep, info);
err_out:
rtnl_unlock();
nlmsg_free(rep);
return err;
}
int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
{
int err;
char *name;
struct tipc_media *m;
struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX,
info->attrs[TIPC_NLA_MEDIA],
tipc_nl_media_policy);
if (!attrs[TIPC_NLA_MEDIA_NAME])
return -EINVAL;
name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
rtnl_lock();
m = tipc_media_find(name);
if (!m) {
rtnl_unlock();
return -EINVAL;
}
if (attrs[TIPC_NLA_MEDIA_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP],
props);
if (err) {
rtnl_unlock();
return err;
}
if (props[TIPC_NLA_PROP_TOL])
m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
if (props[TIPC_NLA_PROP_PRIO])
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
}
rtnl_unlock();
return 0;
}