linux_dsm_epyc7002/net/tipc/node.c

488 lines
13 KiB
C
Raw Normal View History

/*
* net/tipc/node.c: TIPC node management routines
*
tipc: introduce message to synchronize broadcast link Upon establishing a first link between two nodes, there is currently a risk that the two endpoints will disagree on exactly which sequence number reception and acknowleding of broadcast packets should start. The following scenarios may happen: 1: Node A sends an ACTIVATE message to B, telling it to start acking packets from sequence number N. 2: Node A sends out broadcast N, but does not expect an acknowledge from B, since B is not yet in its broadcast receiver's list. 3: Node A receives ACK for N from all nodes except B, and releases packet N. 4: Node B receives the ACTIVATE, activates its link endpoint, and stores the value N as sequence number of first expected packet. 5: Node B sends a NAME_DISTR message to A. 6: Node A receives the NAME_DISTR message, and activates its endpoint. At this moment B is added to A's broadcast receiver's set. Node A also sets sequence number 0 as the first broadcast packet to be received from B. 7: Node A sends broadcast N+1. 8: B receives N+1, determines there is a gap in the sequence, since it is expecting N, and sends a NACK for N back to A. 9: Node A has already released N, so no retransmission is possible. The broadcast link in direction A->B is stale. In addition to, or instead of, 7-9 above, the following may happen: 10: Node B sends broadcast M > 0 to A. 11: Node A receives M, falsely decides there must be a gap, since it is expecting packet 0, and asks for retransmission of packets [0,M-1]. 12: Node B has already released these packets, so the broadcast link is stale in direction B->A. We solve this problem by introducing a new unicast message type, BCAST_PROTOCOL/STATE, to convey the sequence number of the next sent broadcast packet to the other endpoint, at exactly the moment that endpoint is added to the own node's broadcast receivers list, and before any other unicast messages are permitted to be sent. Furthermore, we don't allow any node to start receiving and processing broadcast packets until this new synchronization message has been received. To maintain backwards compatibility, we still open up for broadcast reception if we receive a NAME_DISTR message without any preceding broadcast sync message. In this case, we must assume that the other end has an older code version, and will never send out the new synchronization message. Hence, for mixed old and new nodes, the issue arising in 7-12 of the above may happen with the same probability as before. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 12:51:31 +07:00
* Copyright (c) 2000-2006, 2012 Ericsson AB
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "core.h"
#include "config.h"
#include "node.h"
#include "name_distr.h"
#define NODE_HTABLE_SIZE 512
static void node_lost_contact(struct tipc_node *n_ptr);
static void node_established_contact(struct tipc_node *n_ptr);
static struct hlist_head node_htable[NODE_HTABLE_SIZE];
LIST_HEAD(tipc_node_list);
static u32 tipc_num_nodes;
static u32 tipc_num_links;
static DEFINE_SPINLOCK(node_list_lock);
/*
* A trivial power-of-two bitmask technique is used for speed, since this
* operation is done for every incoming TIPC packet. The number of hash table
* entries has been chosen so that no hash chain exceeds 8 nodes and will
* usually be much smaller (typically only a single node).
*/
static unsigned int tipc_hashfn(u32 addr)
{
return addr & (NODE_HTABLE_SIZE - 1);
}
/*
* tipc_node_find - locate specified node object, if it exists
*/
struct tipc_node *tipc_node_find(u32 addr)
{
struct tipc_node *node;
if (unlikely(!in_own_cluster_exact(addr)))
return NULL;
rcu_read_lock();
hlist_for_each_entry_rcu(node, &node_htable[tipc_hashfn(addr)], hash) {
if (node->addr == addr) {
rcu_read_unlock();
return node;
}
}
rcu_read_unlock();
return NULL;
}
struct tipc_node *tipc_node_create(u32 addr)
{
struct tipc_node *n_ptr, *temp_node;
spin_lock_bh(&node_list_lock);
n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
if (!n_ptr) {
spin_unlock_bh(&node_list_lock);
pr_warn("Node creation failed, no memory\n");
return NULL;
}
n_ptr->addr = addr;
spin_lock_init(&n_ptr->lock);
INIT_HLIST_NODE(&n_ptr->hash);
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->nsub);
hlist_add_head_rcu(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tipc_node_list, list) {
if (n_ptr->addr < temp_node->addr)
break;
}
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
n_ptr->signature = INVALID_NODE_SIG;
tipc_num_nodes++;
spin_unlock_bh(&node_list_lock);
return n_ptr;
}
static void tipc_node_delete(struct tipc_node *n_ptr)
{
list_del_rcu(&n_ptr->list);
hlist_del_rcu(&n_ptr->hash);
kfree_rcu(n_ptr, rcu);
tipc_num_nodes--;
}
void tipc_node_stop(void)
{
struct tipc_node *node, *t_node;
spin_lock_bh(&node_list_lock);
list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
tipc_node_delete(node);
spin_unlock_bh(&node_list_lock);
}
/**
* tipc_node_link_up - handle addition of link
*
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active = &n_ptr->active_links[0];
u32 addr = n_ptr->addr;
n_ptr->working_links++;
tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE,
l_ptr->bearer_id, addr);
pr_info("Established link <%s> on network plane %c\n",
l_ptr->name, l_ptr->net_plane);
if (!active[0]) {
active[0] = active[1] = l_ptr;
node_established_contact(n_ptr);
return;
}
if (l_ptr->priority < active[0]->priority) {
pr_info("New link <%s> becomes standby\n", l_ptr->name);
return;
}
tipc: align tipc function names with common naming practice in the network Rename the following functions, which are shorter and more in line with common naming practice in the network subsystem. tipc_bclink_send_msg->tipc_bclink_xmit tipc_bclink_recv_pkt->tipc_bclink_rcv tipc_disc_recv_msg->tipc_disc_rcv tipc_link_send_proto_msg->tipc_link_proto_xmit link_recv_proto_msg->tipc_link_proto_rcv link_send_sections_long->tipc_link_iovec_long_xmit tipc_link_send_sections_fast->tipc_link_iovec_xmit_fast tipc_link_send_sync->tipc_link_sync_xmit tipc_link_recv_sync->tipc_link_sync_rcv tipc_link_send_buf->__tipc_link_xmit tipc_link_send->tipc_link_xmit tipc_link_send_names->tipc_link_names_xmit tipc_named_recv->tipc_named_rcv tipc_link_recv_bundle->tipc_link_bundle_rcv tipc_link_dup_send_queue->tipc_link_dup_queue_xmit link_send_long_buf->tipc_link_frag_xmit tipc_multicast->tipc_port_mcast_xmit tipc_port_recv_mcast->tipc_port_mcast_rcv tipc_port_reject_sections->tipc_port_iovec_reject tipc_port_recv_proto_msg->tipc_port_proto_rcv tipc_connect->tipc_port_connect __tipc_connect->__tipc_port_connect __tipc_disconnect->__tipc_port_disconnect tipc_disconnect->tipc_port_disconnect tipc_shutdown->tipc_port_shutdown tipc_port_recv_msg->tipc_port_rcv tipc_port_recv_sections->tipc_port_iovec_rcv release->tipc_release accept->tipc_accept bind->tipc_bind get_name->tipc_getname poll->tipc_poll send_msg->tipc_sendmsg send_packet->tipc_send_packet send_stream->tipc_send_stream recv_msg->tipc_recvmsg recv_stream->tipc_recv_stream connect->tipc_connect listen->tipc_listen shutdown->tipc_shutdown setsockopt->tipc_setsockopt getsockopt->tipc_getsockopt Above changes have no impact on current users of the functions. Signed-off-by: Ying Xue <ying.xue@windriver.com> Reviewed-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-02-18 15:06:46 +07:00
tipc_link_dup_queue_xmit(active[0], l_ptr);
if (l_ptr->priority == active[0]->priority) {
active[0] = l_ptr;
return;
}
pr_info("Old link <%s> becomes standby\n", active[0]->name);
if (active[1] != active[0])
pr_info("Old link <%s> becomes standby\n", active[1]->name);
active[0] = active[1] = l_ptr;
}
/**
* node_select_active_links - select active link
*/
static void node_select_active_links(struct tipc_node *n_ptr)
{
struct tipc_link **active = &n_ptr->active_links[0];
u32 i;
u32 highest_prio = 0;
active[0] = active[1] = NULL;
for (i = 0; i < MAX_BEARERS; i++) {
struct tipc_link *l_ptr = n_ptr->links[i];
if (!l_ptr || !tipc_link_is_up(l_ptr) ||
(l_ptr->priority < highest_prio))
continue;
if (l_ptr->priority > highest_prio) {
highest_prio = l_ptr->priority;
active[0] = active[1] = l_ptr;
} else {
active[1] = l_ptr;
}
}
}
/**
* tipc_node_link_down - handle loss of link
*/
void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
struct tipc_link **active;
u32 addr = n_ptr->addr;
n_ptr->working_links--;
tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr);
if (!tipc_link_is_active(l_ptr)) {
pr_info("Lost standby link <%s> on network plane %c\n",
l_ptr->name, l_ptr->net_plane);
return;
}
pr_info("Lost link <%s> on network plane %c\n",
l_ptr->name, l_ptr->net_plane);
active = &n_ptr->active_links[0];
if (active[0] == l_ptr)
active[0] = active[1];
if (active[1] == l_ptr)
active[1] = active[0];
if (active[0] == l_ptr)
node_select_active_links(n_ptr);
if (tipc_node_is_up(n_ptr))
tipc_link_failover_send_queue(l_ptr);
else
node_lost_contact(n_ptr);
}
int tipc_node_active_links(struct tipc_node *n_ptr)
{
return n_ptr->active_links[0] != NULL;
}
int tipc_node_is_up(struct tipc_node *n_ptr)
{
return tipc_node_active_links(n_ptr);
}
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
n_ptr->links[l_ptr->bearer_id] = l_ptr;
spin_lock_bh(&node_list_lock);
tipc_num_links++;
spin_unlock_bh(&node_list_lock);
n_ptr->link_cnt++;
}
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
int i;
for (i = 0; i < MAX_BEARERS; i++) {
if (l_ptr != n_ptr->links[i])
continue;
n_ptr->links[i] = NULL;
spin_lock_bh(&node_list_lock);
tipc_num_links--;
spin_unlock_bh(&node_list_lock);
n_ptr->link_cnt--;
}
}
static void node_established_contact(struct tipc_node *n_ptr)
{
n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
tipc: introduce message to synchronize broadcast link Upon establishing a first link between two nodes, there is currently a risk that the two endpoints will disagree on exactly which sequence number reception and acknowleding of broadcast packets should start. The following scenarios may happen: 1: Node A sends an ACTIVATE message to B, telling it to start acking packets from sequence number N. 2: Node A sends out broadcast N, but does not expect an acknowledge from B, since B is not yet in its broadcast receiver's list. 3: Node A receives ACK for N from all nodes except B, and releases packet N. 4: Node B receives the ACTIVATE, activates its link endpoint, and stores the value N as sequence number of first expected packet. 5: Node B sends a NAME_DISTR message to A. 6: Node A receives the NAME_DISTR message, and activates its endpoint. At this moment B is added to A's broadcast receiver's set. Node A also sets sequence number 0 as the first broadcast packet to be received from B. 7: Node A sends broadcast N+1. 8: B receives N+1, determines there is a gap in the sequence, since it is expecting N, and sends a NACK for N back to A. 9: Node A has already released N, so no retransmission is possible. The broadcast link in direction A->B is stale. In addition to, or instead of, 7-9 above, the following may happen: 10: Node B sends broadcast M > 0 to A. 11: Node A receives M, falsely decides there must be a gap, since it is expecting packet 0, and asks for retransmission of packets [0,M-1]. 12: Node B has already released these packets, so the broadcast link is stale in direction B->A. We solve this problem by introducing a new unicast message type, BCAST_PROTOCOL/STATE, to convey the sequence number of the next sent broadcast packet to the other endpoint, at exactly the moment that endpoint is added to the own node's broadcast receivers list, and before any other unicast messages are permitted to be sent. Furthermore, we don't allow any node to start receiving and processing broadcast packets until this new synchronization message has been received. To maintain backwards compatibility, we still open up for broadcast reception if we receive a NAME_DISTR message without any preceding broadcast sync message. In this case, we must assume that the other end has an older code version, and will never send out the new synchronization message. Hence, for mixed old and new nodes, the issue arising in 7-12 of the above may happen with the same probability as before. Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2012-11-16 12:51:31 +07:00
n_ptr->bclink.oos_state = 0;
n_ptr->bclink.acked = tipc_bclink_get_last_sent();
tipc_bclink_add_node(n_ptr->addr);
}
static void node_lost_contact(struct tipc_node *n_ptr)
{
char addr_string[16];
u32 i;
pr_info("Lost contact with %s\n",
tipc_addr_string_fill(addr_string, n_ptr->addr));
/* Flush broadcast link info associated with lost node */
if (n_ptr->bclink.recv_permitted) {
kfree_skb_list(n_ptr->bclink.deferred_head);
tipc: Major redesign of broadcast link ACK/NACK algorithms Completely redesigns broadcast link ACK and NACK mechanisms to prevent spurious retransmit requests in dual LAN networks, and to prevent the broadcast link from stalling due to the failure of a receiving node to acknowledge receiving a broadcast message or request its retransmission. Note: These changes only impact the timing of when ACK and NACK messages are sent, and not the basic broadcast link protocol itself, so inter- operability with nodes using the "classic" algorithms is maintained. The revised algorithms are as follows: 1) An explicit ACK message is still sent after receiving 16 in-sequence messages, and implicit ACK information continues to be carried in other unicast link message headers (including link state messages). However, the timing of explicit ACKs is now based on the receiving node's absolute network address rather than its relative network address to ensure that the failure of another node does not delay the ACK beyond its 16 message target. 2) A NACK message is now typically sent only when a message gap persists for two consecutive incoming link state messages; this ensures that a suspected gap is not confirmed until both LANs in a dual LAN network have had an opportunity to deliver the message, thereby preventing spurious NACKs. A NACK message can also be generated by the arrival of a single link state message, if the deferred queue is so big that the current message gap cannot be the result of "normal" mis-ordering due to the use of dual LANs (or one LAN using a bonded interface). Since link state messages typically arrive at different nodes at different times the problem of multiple nodes issuing identical NACKs simultaneously is inherently avoided. 3) Nodes continue to "peek" at NACK messages sent by other nodes. If another node requests retransmission of a message gap suspected (but not yet confirmed) by the peeking node, the peeking node forgets about the gap and does not generate a duplicate retransmit request. (If the peeking node subsequently fails to receive the lost message, later link state messages will cause it to rediscover and confirm the gap and send another NACK.) 4) Message gap "equality" is now determined by the start of the gap only. This is sufficient to deal with the most common cases of message loss, and eliminates the need for complex end of gap computations. 5) A peeking node no longer tries to determine whether it should send a complementary NACK, since the most common cases of message loss don't require it to be sent. Consequently, the node no longer examines the "broadcast tag" field of a NACK message when peeking. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-28 01:17:53 +07:00
n_ptr->bclink.deferred_size = 0;
tipc: message reassembly using fragment chain When the first fragment of a long data data message is received on a link, a reassembly buffer large enough to hold the data from this and all subsequent fragments of the message is allocated. The payload of each new fragment is copied into this buffer upon arrival. When the last fragment is received, the reassembled message is delivered upwards to the port/socket layer. Not only is this an inefficient approach, but it may also cause bursts of reassembly failures in low memory situations. since we may fail to allocate the necessary large buffer in the first place. Furthermore, after 100 subsequent such failures the link will be reset, something that in reality aggravates the situation. To remedy this problem, this patch introduces a different approach. Instead of allocating a big reassembly buffer, we now append the arriving fragments to a reassembly chain on the link, and deliver the whole chain up to the socket layer once the last fragment has been received. This is safe because the retransmission layer of a TIPC link always delivers packets in strict uninterrupted order, to the reassembly layer as to all other upper layers. Hence there can never be more than one fragment chain pending reassembly at any given time in a link, and we can trust (but still verify) that the fragments will be chained up in the correct order. Signed-off-by: Erik Hugne <erik.hugne@ericsson.com> Reviewed-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-11-06 15:28:06 +07:00
if (n_ptr->bclink.reasm_head) {
kfree_skb(n_ptr->bclink.reasm_head);
n_ptr->bclink.reasm_head = NULL;
n_ptr->bclink.reasm_tail = NULL;
}
tipc_bclink_remove_node(n_ptr->addr);
tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
n_ptr->bclink.recv_permitted = false;
}
/* Abort link changeover */
for (i = 0; i < MAX_BEARERS; i++) {
struct tipc_link *l_ptr = n_ptr->links[i];
if (!l_ptr)
continue;
l_ptr->reset_checkpoint = l_ptr->next_in_no;
l_ptr->exp_msg_count = 0;
tipc_link_reset_fragments(l_ptr);
}
/* Notify subscribers and prevent re-contact with node until
* cleanup is done.
*/
n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN |
TIPC_NOTIFY_NODE_DOWN;
}
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
{
u32 domain;
struct sk_buff *buf;
struct tipc_node *n_ptr;
struct tipc_node_info node_info;
u32 payload_size;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (!tipc_addr_domain_valid(domain))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network address)");
spin_lock_bh(&node_list_lock);
if (!tipc_num_nodes) {
spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_none();
}
/* For now, get space for all other nodes */
payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes;
if (payload_size > 32768u) {
spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (too many nodes)");
}
spin_unlock_bh(&node_list_lock);
buf = tipc_cfg_reply_alloc(payload_size);
if (!buf)
return NULL;
/* Add TLVs for all nodes in scope */
rcu_read_lock();
list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
if (!tipc_in_scope(domain, n_ptr->addr))
continue;
node_info.addr = htonl(n_ptr->addr);
node_info.up = htonl(tipc_node_is_up(n_ptr));
tipc_cfg_append_tlv(buf, TIPC_TLV_NODE_INFO,
&node_info, sizeof(node_info));
}
rcu_read_unlock();
return buf;
}
struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
{
u32 domain;
struct sk_buff *buf;
struct tipc_node *n_ptr;
struct tipc_link_info link_info;
u32 payload_size;
if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
domain = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
if (!tipc_addr_domain_valid(domain))
return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
" (network address)");
if (!tipc_own_addr)
return tipc_cfg_reply_none();
spin_lock_bh(&node_list_lock);
tipc: Major redesign of broadcast link ACK/NACK algorithms Completely redesigns broadcast link ACK and NACK mechanisms to prevent spurious retransmit requests in dual LAN networks, and to prevent the broadcast link from stalling due to the failure of a receiving node to acknowledge receiving a broadcast message or request its retransmission. Note: These changes only impact the timing of when ACK and NACK messages are sent, and not the basic broadcast link protocol itself, so inter- operability with nodes using the "classic" algorithms is maintained. The revised algorithms are as follows: 1) An explicit ACK message is still sent after receiving 16 in-sequence messages, and implicit ACK information continues to be carried in other unicast link message headers (including link state messages). However, the timing of explicit ACKs is now based on the receiving node's absolute network address rather than its relative network address to ensure that the failure of another node does not delay the ACK beyond its 16 message target. 2) A NACK message is now typically sent only when a message gap persists for two consecutive incoming link state messages; this ensures that a suspected gap is not confirmed until both LANs in a dual LAN network have had an opportunity to deliver the message, thereby preventing spurious NACKs. A NACK message can also be generated by the arrival of a single link state message, if the deferred queue is so big that the current message gap cannot be the result of "normal" mis-ordering due to the use of dual LANs (or one LAN using a bonded interface). Since link state messages typically arrive at different nodes at different times the problem of multiple nodes issuing identical NACKs simultaneously is inherently avoided. 3) Nodes continue to "peek" at NACK messages sent by other nodes. If another node requests retransmission of a message gap suspected (but not yet confirmed) by the peeking node, the peeking node forgets about the gap and does not generate a duplicate retransmit request. (If the peeking node subsequently fails to receive the lost message, later link state messages will cause it to rediscover and confirm the gap and send another NACK.) 4) Message gap "equality" is now determined by the start of the gap only. This is sufficient to deal with the most common cases of message loss, and eliminates the need for complex end of gap computations. 5) A peeking node no longer tries to determine whether it should send a complementary NACK, since the most common cases of message loss don't require it to be sent. Consequently, the node no longer examines the "broadcast tag" field of a NACK message when peeking. Signed-off-by: Allan Stephens <allan.stephens@windriver.com> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2011-10-28 01:17:53 +07:00
/* Get space for all unicast links + broadcast link */
payload_size = TLV_SPACE((sizeof(link_info)) * (tipc_num_links + 1));
if (payload_size > 32768u) {
spin_unlock_bh(&node_list_lock);
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (too many links)");
}
spin_unlock_bh(&node_list_lock);
buf = tipc_cfg_reply_alloc(payload_size);
if (!buf)
return NULL;
/* Add TLV for broadcast link */
link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr));
link_info.up = htonl(1);
strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
/* Add TLVs for any other links in scope */
rcu_read_lock();
list_for_each_entry_rcu(n_ptr, &tipc_node_list, list) {
u32 i;
if (!tipc_in_scope(domain, n_ptr->addr))
continue;
tipc_node_lock(n_ptr);
for (i = 0; i < MAX_BEARERS; i++) {
if (!n_ptr->links[i])
continue;
link_info.dest = htonl(n_ptr->addr);
link_info.up = htonl(tipc_link_is_up(n_ptr->links[i]));
strcpy(link_info.str, n_ptr->links[i]->name);
tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO,
&link_info, sizeof(link_info));
}
tipc_node_unlock(n_ptr);
}
rcu_read_unlock();
return buf;
}
/**
* tipc_node_get_linkname - get the name of a link
*
* @bearer_id: id of the bearer
* @node: peer node address
* @linkname: link name output buffer
*
* Returns 0 on success
*/
int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
{
struct tipc_link *link;
struct tipc_node *node = tipc_node_find(addr);
if ((bearer_id >= MAX_BEARERS) || !node)
return -EINVAL;
tipc_node_lock(node);
link = node->links[bearer_id];
if (link) {
strncpy(linkname, link->name, len);
tipc_node_unlock(node);
return 0;
}
tipc_node_unlock(node);
return -EINVAL;
}
void tipc_node_unlock(struct tipc_node *node)
{
LIST_HEAD(nsub_list);
struct tipc_link *link;
int pkt_sz = 0;
u32 addr = 0;
if (likely(!node->action_flags)) {
spin_unlock_bh(&node->lock);
return;
}
if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) {
list_replace_init(&node->nsub, &nsub_list);
node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN;
}
if (node->action_flags & TIPC_NOTIFY_NODE_UP) {
link = node->active_links[0];
node->action_flags &= ~TIPC_NOTIFY_NODE_UP;
if (link) {
pkt_sz = ((link->max_pkt - INT_H_SIZE) / ITEM_SIZE) *
ITEM_SIZE;
addr = node->addr;
}
}
spin_unlock_bh(&node->lock);
if (!list_empty(&nsub_list))
tipc_nodesub_notify(&nsub_list);
if (pkt_sz)
tipc_named_node_up(pkt_sz, addr);
}