linux_dsm_epyc7002/net/tipc/discover.c

419 lines
13 KiB
C
Raw Normal View History

/*
* net/tipc/discover.c
*
* Copyright (c) 2003-2006, 2014-2015, Ericsson AB
* Copyright (c) 2005-2006, 2010-2011, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include "link.h"
#include "discover.h"
/* min delay during bearer start up */
#define TIPC_LINK_REQ_INIT msecs_to_jiffies(125)
/* max delay if bearer has no links */
#define TIPC_LINK_REQ_FAST msecs_to_jiffies(1000)
/* max delay if bearer has links */
#define TIPC_LINK_REQ_SLOW msecs_to_jiffies(60000)
/* indicates no timer in use */
#define TIPC_LINK_REQ_INACTIVE 0xffffffff
/**
* struct tipc_link_req - information about an ongoing link setup request
* @bearer_id: identity of bearer issuing requests
* @net: network namespace instance
* @dest: destination address for request messages
* @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
* @lock: spinlock for controlling access to requests
* @buf: request message to be (repeatedly) sent
* @timer: timer governing period between requests
* @timer_intv: current interval between requests (in ms)
*/
struct tipc_link_req {
u32 bearer_id;
struct tipc_media_addr dest;
struct net *net;
u32 domain;
int num_nodes;
spinlock_t lock;
struct sk_buff *buf;
struct timer_list timer;
unsigned long timer_intv;
};
/**
* tipc_disc_init_msg - initialize a link setup message
* @net: the applicable net namespace
* @type: message type (request or response)
* @b_ptr: ptr to bearer issuing message
*/
static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
struct tipc_bearer *b_ptr)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_msg *msg;
u32 dest_domain = b_ptr->domain;
msg = buf_msg(buf);
2015-02-05 20:36:36 +07:00
tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
MAX_H_SIZE, dest_domain);
msg_set_non_seq(msg, 1);
msg_set_node_sig(msg, tn->random);
msg_set_node_capabilities(msg, 0);
msg_set_dest_domain(msg, dest_domain);
msg_set_bc_netid(msg, tn->net_id);
tipc: improve and extend media address conversion functions TIPC currently handles two media specific addresses: Ethernet MAC addresses and InfiniBand addresses. Those are kept in three different formats: 1) A "raw" format as obtained from the device. This format is known only by the media specific adapter code in eth_media.c and ib_media.c. 2) A "generic" internal format, in the form of struct tipc_media_addr, which can be referenced and passed around by the generic media- unaware code. 3) A serialized version of the latter, to be conveyed in neighbor discovery messages. Conversion between the three formats can only be done by the media specific code, so we have function pointers for this purpose in struct tipc_media. Here, the media adapters can install their own conversion functions at startup. We now introduce a new such function, 'raw2addr()', whose purpose is to convert from format 1 to format 2 above. We also try to as far as possible uniform commenting, variable names and usage of these functions, with the purpose of making them more comprehensible. We can now also remove the function tipc_l2_media_addr_set(), whose job is done better by the new function. Finally, we expand the field for serialized addresses (format 3) in discovery messages from 20 to 32 bytes. This is permitted according to the spec, and reduces the risk of problems when we add new media in the future. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
}
/**
* disc_dupl_alert - issue node address duplication alert
* @b_ptr: pointer to bearer detecting duplication
* @node_addr: duplicated node address
* @media_addr: media address advertised by duplicated node
*/
static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
struct tipc_media_addr *media_addr)
{
char node_addr_str[16];
char media_addr_str[64];
tipc_addr_string_fill(node_addr_str, node_addr);
tipc: phase out most of the struct print_buf usage The tipc_printf is renamed to tipc_snprintf, as the new name describes more what the function actually does. It is also changed to take a buffer and length parameter and return number of characters written to the buffer. All callers of this function that used to pass a print_buf are updated. Final removal of the struct print_buf itself will be done synchronously with the pending removal of the deprecated logging code that also was using it. Functions that build up a response message with a list of ports, nametable contents etc. are changed to return the number of characters written to the output buffer. This information was previously hidden in a field of the print_buf struct, and the number of chars written was fetched with a call to tipc_printbuf_validate. This function is removed since it is no longer referenced nor needed. A generic max size ULTRA_STRING_MAX_LEN is defined, named in keeping with the existing TIPC_TLV_ULTRA_STRING, and the various definitions in port, link and nametable code that largely duplicated this information are removed. This means that amount of link statistics that can be returned is now increased from 2k to 32k. The buffer overflow check is now done just before the reply message is passed over netlink or TIPC to a remote node and the message indicating a truncated buffer is changed to a less dramatic one (less CAPS), placed at the end of the message. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-06-29 11:50:23 +07:00
tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
media_addr);
pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
media_addr_str, b_ptr->name);
}
/**
* tipc_disc_rcv - handle incoming discovery message (request or response)
* @net: the applicable net namespace
* @buf: buffer containing message
* @bearer: bearer that message arrived on
*/
void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
struct tipc_bearer *bearer)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
struct tipc_link *link;
struct tipc_media_addr maddr;
struct sk_buff *rbuf;
struct tipc_msg *msg = buf_msg(buf);
u32 ddom = msg_dest_domain(msg);
u32 onode = msg_prevnode(msg);
u32 net_id = msg_bc_netid(msg);
u32 mtyp = msg_type(msg);
u32 signature = msg_node_sig(msg);
u16 caps = msg_node_capabilities(msg);
bool addr_match = false;
bool sign_match = false;
bool link_up = false;
bool accept_addr = false;
bool accept_sign = false;
bool respond = false;
bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg));
kfree_skb(buf);
tipc: Detect duplicate nodes using different network interfaces Utilizes the new "node signature" field in neighbor discovery messages to ensure that all links TIPC associates with a given <Z.C.N> network address belong to the same neighboring node. (Previously, TIPC could not tell if link setup requests arriving on different interfaces were from the same node or from two different nodes that has mistakenly been assigned the same network address.) The revised algorithm for detecting a duplicate node considers both the node signature and the network interface adddress specified in a request message when deciding how to respond to a link setup request. This prevents false alarms that might otherwise arise during normal network operation under the following scenarios: a) A neighboring node reboots. (The node's signature changes, but the network interface address remains unchanged.) b) A neighboring node's network interface is replaced. (The node's signature remains unchanged, but the network interface address changes.) c) A neighboring node is completely replaced. (The node's signature and network interface address both change.) The algorithm also handles cases in which a node reboots and re-establishes its links to TIPC (or begins re-establishing those links) before TIPC detects that it is using a new node signature. In such cases of "delayed rediscovery" TIPC simply accepts the new signature without disrupting communication that is already underway over the links. Thanks to Laser [gotolaser@gmail.com] for his contributions to the development of this enhancement. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-29 04:30:08 +07:00
/* Ensure message from node is valid and communication is permitted */
if (net_id != tn->net_id)
return;
if (maddr.broadcast)
return;
if (!tipc_addr_domain_valid(ddom))
return;
if (!tipc_addr_node_valid(onode))
return;
if (in_own_node(net, onode)) {
if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
disc_dupl_alert(bearer, tn->own_addr, &maddr);
return;
}
if (!tipc_in_scope(ddom, tn->own_addr))
return;
if (!tipc_in_scope(bearer->domain, onode))
return;
node = tipc_node_create(net, onode);
if (!node)
return;
tipc_node_lock(node);
node->capabilities = caps;
link = node->links[bearer->identity];
tipc: Detect duplicate nodes using different network interfaces Utilizes the new "node signature" field in neighbor discovery messages to ensure that all links TIPC associates with a given <Z.C.N> network address belong to the same neighboring node. (Previously, TIPC could not tell if link setup requests arriving on different interfaces were from the same node or from two different nodes that has mistakenly been assigned the same network address.) The revised algorithm for detecting a duplicate node considers both the node signature and the network interface adddress specified in a request message when deciding how to respond to a link setup request. This prevents false alarms that might otherwise arise during normal network operation under the following scenarios: a) A neighboring node reboots. (The node's signature changes, but the network interface address remains unchanged.) b) A neighboring node's network interface is replaced. (The node's signature remains unchanged, but the network interface address changes.) c) A neighboring node is completely replaced. (The node's signature and network interface address both change.) The algorithm also handles cases in which a node reboots and re-establishes its links to TIPC (or begins re-establishing those links) before TIPC detects that it is using a new node signature. In such cases of "delayed rediscovery" TIPC simply accepts the new signature without disrupting communication that is already underway over the links. Thanks to Laser [gotolaser@gmail.com] for his contributions to the development of this enhancement. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-29 04:30:08 +07:00
/* Prepare to validate requesting node's signature and media address */
sign_match = (signature == node->signature);
addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr));
link_up = link && tipc_link_is_up(link);
tipc: Detect duplicate nodes using different network interfaces Utilizes the new "node signature" field in neighbor discovery messages to ensure that all links TIPC associates with a given <Z.C.N> network address belong to the same neighboring node. (Previously, TIPC could not tell if link setup requests arriving on different interfaces were from the same node or from two different nodes that has mistakenly been assigned the same network address.) The revised algorithm for detecting a duplicate node considers both the node signature and the network interface adddress specified in a request message when deciding how to respond to a link setup request. This prevents false alarms that might otherwise arise during normal network operation under the following scenarios: a) A neighboring node reboots. (The node's signature changes, but the network interface address remains unchanged.) b) A neighboring node's network interface is replaced. (The node's signature remains unchanged, but the network interface address changes.) c) A neighboring node is completely replaced. (The node's signature and network interface address both change.) The algorithm also handles cases in which a node reboots and re-establishes its links to TIPC (or begins re-establishing those links) before TIPC detects that it is using a new node signature. In such cases of "delayed rediscovery" TIPC simply accepts the new signature without disrupting communication that is already underway over the links. Thanks to Laser [gotolaser@gmail.com] for his contributions to the development of this enhancement. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-29 04:30:08 +07:00
/* These three flags give us eight permutations: */
if (sign_match && addr_match && link_up) {
/* All is fine. Do nothing. */
} else if (sign_match && addr_match && !link_up) {
/* Respond. The link will come up in due time */
respond = true;
} else if (sign_match && !addr_match && link_up) {
/* Peer has changed i/f address without rebooting.
* If so, the link will reset soon, and the next
* discovery will be accepted. So we can ignore it.
* It may also be an cloned or malicious peer having
* chosen the same node address and signature as an
* existing one.
* Ignore requests until the link goes down, if ever.
*/
disc_dupl_alert(bearer, onode, &maddr);
} else if (sign_match && !addr_match && !link_up) {
/* Peer link has changed i/f address without rebooting.
* It may also be a cloned or malicious peer; we can't
* distinguish between the two.
* The signature is correct, so we must accept.
*/
accept_addr = true;
respond = true;
} else if (!sign_match && addr_match && link_up) {
/* Peer node rebooted. Two possibilities:
* - Delayed re-discovery; this link endpoint has already
* reset and re-established contact with the peer, before
* receiving a discovery message from that node.
* (The peer happened to receive one from this node first).
* - The peer came back so fast that our side has not
* discovered it yet. Probing from this side will soon
* reset the link, since there can be no working link
* endpoint at the peer end, and the link will re-establish.
* Accept the signature, since it comes from a known peer.
*/
accept_sign = true;
} else if (!sign_match && addr_match && !link_up) {
/* The peer node has rebooted.
* Accept signature, since it is a known peer.
*/
accept_sign = true;
respond = true;
} else if (!sign_match && !addr_match && link_up) {
/* Peer rebooted with new address, or a new/duplicate peer.
* Ignore until the link goes down, if ever.
*/
disc_dupl_alert(bearer, onode, &maddr);
} else if (!sign_match && !addr_match && !link_up) {
/* Peer rebooted with new address, or it is a new peer.
* Accept signature and address.
*/
accept_sign = true;
accept_addr = true;
respond = true;
tipc: Detect duplicate nodes using different network interfaces Utilizes the new "node signature" field in neighbor discovery messages to ensure that all links TIPC associates with a given <Z.C.N> network address belong to the same neighboring node. (Previously, TIPC could not tell if link setup requests arriving on different interfaces were from the same node or from two different nodes that has mistakenly been assigned the same network address.) The revised algorithm for detecting a duplicate node considers both the node signature and the network interface adddress specified in a request message when deciding how to respond to a link setup request. This prevents false alarms that might otherwise arise during normal network operation under the following scenarios: a) A neighboring node reboots. (The node's signature changes, but the network interface address remains unchanged.) b) A neighboring node's network interface is replaced. (The node's signature remains unchanged, but the network interface address changes.) c) A neighboring node is completely replaced. (The node's signature and network interface address both change.) The algorithm also handles cases in which a node reboots and re-establishes its links to TIPC (or begins re-establishing those links) before TIPC detects that it is using a new node signature. In such cases of "delayed rediscovery" TIPC simply accepts the new signature without disrupting communication that is already underway over the links. Thanks to Laser [gotolaser@gmail.com] for his contributions to the development of this enhancement. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-29 04:30:08 +07:00
}
if (accept_sign)
node->signature = signature;
if (accept_addr) {
if (!link)
link = tipc_link_create(node, bearer, &maddr);
if (link) {
memcpy(&link->media_addr, &maddr, sizeof(maddr));
tipc_link_reset(link);
} else {
respond = false;
}
}
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
rbuf = tipc_buf_acquire(MAX_H_SIZE);
if (rbuf) {
tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer);
tipc_bearer_send(net, bearer->identity, rbuf, &maddr);
kfree_skb(rbuf);
}
}
tipc_node_unlock(node);
tipc_node_put(node);
}
/**
* disc_update - update frequency of periodic link setup requests
* @req: ptr to link request structure
*
* Reinitiates discovery process if discovery object has no associated nodes
* and is either not currently searching or is searching at a slow rate
*/
static void disc_update(struct tipc_link_req *req)
{
if (!req->num_nodes) {
if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
(req->timer_intv > TIPC_LINK_REQ_FAST)) {
req->timer_intv = TIPC_LINK_REQ_INIT;
mod_timer(&req->timer, jiffies + req->timer_intv);
}
}
}
/**
* tipc_disc_add_dest - increment set of discovered nodes
* @req: ptr to link request structure
*/
void tipc_disc_add_dest(struct tipc_link_req *req)
{
spin_lock_bh(&req->lock);
req->num_nodes++;
spin_unlock_bh(&req->lock);
}
/**
* tipc_disc_remove_dest - decrement set of discovered nodes
* @req: ptr to link request structure
*/
void tipc_disc_remove_dest(struct tipc_link_req *req)
{
spin_lock_bh(&req->lock);
req->num_nodes--;
disc_update(req);
spin_unlock_bh(&req->lock);
}
/**
* disc_timeout - send a periodic link setup request
* @data: ptr to link request structure
*
* Called whenever a link setup request timer associated with a bearer expires.
*/
static void disc_timeout(unsigned long data)
{
struct tipc_link_req *req = (struct tipc_link_req *)data;
int max_delay;
spin_lock_bh(&req->lock);
/* Stop searching if only desired node has been found */
if (tipc_node(req->domain) && req->num_nodes) {
req->timer_intv = TIPC_LINK_REQ_INACTIVE;
goto exit;
}
/*
* Send discovery message, then update discovery timer
*
* Keep doubling time between requests until limit is reached;
* hold at fast polling rate if don't have any associated nodes,
* otherwise hold at slow polling rate
*/
tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest);
tipc: remove interface state mirroring in bearer struct 'tipc_bearer' is a generic representation of the underlying media type, and exists in a one-to-one relationship to each interface TIPC is using. The struct contains a 'blocked' flag that mirrors the operational and execution state of the represented interface, and is updated through notification calls from the latter. The users of tipc_bearer are checking this flag before each attempt to send a packet via the interface. This state mirroring serves no purpose in the current code base. TIPC links will not discover a media failure any faster through this mechanism, and in reality the flag only adds overhead at packet sending and reception. Furthermore, the fact that the flag needs to be protected by a spinlock aggregated into tipc_bearer has turned out to cause a serious and completely unnecessary deadlock problem. CPU0 CPU1 ---- ---- Time 0: bearer_disable() link_timeout() Time 1: spin_lock_bh(&b_ptr->lock) tipc_link_push_queue() Time 2: tipc_link_delete() tipc_bearer_blocked(b_ptr) Time 3: k_cancel_timer(&req->timer) spin_lock_bh(&b_ptr->lock) Time 4: del_timer_sync(&req->timer) I.e., del_timer_sync() on CPU0 never returns, because the timer handler on CPU1 is waiting for the bearer lock. We eliminate the 'blocked' flag from struct tipc_bearer, along with all tests on this flag. This not only resolves the deadlock, but also simplifies and speeds up the data path execution of TIPC. It also fits well into our ongoing effort to make the locking policy simpler and more manageable. An effect of this change is that we can get rid of functions such as tipc_bearer_blocked(), tipc_continue() and tipc_block_bearer(). We replace the latter with a new function, tipc_reset_bearer(), which resets all links associated to the bearer immediately after an interface goes down. A user might notice one slight change in link behaviour after this change. When an interface goes down, (e.g. through a NETDEV_DOWN event) all attached links will be reset immediately, instead of leaving it to each link to detect the failure through a timer-driven mechanism. We consider this an improvement, and see no obvious risks with the new behavior. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Reviewed-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Paul Gortmaker <Paul.Gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 22:08:00 +07:00
req->timer_intv *= 2;
if (req->num_nodes)
max_delay = TIPC_LINK_REQ_SLOW;
else
max_delay = TIPC_LINK_REQ_FAST;
if (req->timer_intv > max_delay)
req->timer_intv = max_delay;
mod_timer(&req->timer, jiffies + req->timer_intv);
exit:
spin_unlock_bh(&req->lock);
}
/**
* tipc_disc_create - create object to send periodic link setup requests
* @net: the applicable net namespace
* @b_ptr: ptr to bearer issuing requests
* @dest: destination address for request messages
* @dest_domain: network domain to which links can be established
*
* Returns 0 if successful, otherwise -errno.
*/
int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest)
{
struct tipc_link_req *req;
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return -ENOMEM;
req->buf = tipc_buf_acquire(MAX_H_SIZE);
if (!req->buf) {
kfree(req);
return -ENOMEM;
}
tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
memcpy(&req->dest, dest, sizeof(*dest));
req->net = net;
req->bearer_id = b_ptr->identity;
req->domain = b_ptr->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
setup_timer(&req->timer, disc_timeout, (unsigned long)req);
mod_timer(&req->timer, jiffies + req->timer_intv);
b_ptr->link_req = req;
tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
return 0;
}
/**
* tipc_disc_delete - destroy object sending periodic link setup requests
* @req: ptr to link request structure
*/
void tipc_disc_delete(struct tipc_link_req *req)
{
del_timer_sync(&req->timer);
kfree_skb(req->buf);
kfree(req);
}
/**
* tipc_disc_reset - reset object to send periodic link setup requests
* @net: the applicable net namespace
* @b_ptr: ptr to bearer issuing requests
* @dest_domain: network domain to which links can be established
*/
void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
{
struct tipc_link_req *req = b_ptr->link_req;
spin_lock_bh(&req->lock);
tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
req->net = net;
req->bearer_id = b_ptr->identity;
req->domain = b_ptr->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
mod_timer(&req->timer, jiffies + req->timer_intv);
tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
spin_unlock_bh(&req->lock);
}