2006-01-03 01:04:38 +07:00
|
|
|
/*
|
|
|
|
* net/tipc/core.h: Include file for TIPC global declarations
|
2007-02-09 21:25:21 +07:00
|
|
|
*
|
tipc: introduce new TIPC server infrastructure
TIPC has two internal servers, one providing a subscription
service for topology events, and another providing the
configuration interface. These servers have previously been running
in BH context, accessing the TIPC-port (aka native) API directly.
Apart from these servers, even the TIPC socket implementation is
partially built on this API.
As this API may simultaneously be called via different paths and in
different contexts, a complex and costly lock policiy is required
in order to protect TIPC internal resources.
To eliminate the need for this complex lock policiy, we introduce
a new, generic service API that uses kernel sockets for message
passing instead of the native API. Once the toplogy and configuration
servers are converted to use this new service, all code pertaining
to the native API can be removed. This entails a significant
reduction in code amount and complexity, and opens up for a complete
rework of the locking policy in TIPC.
The new service also solves another problem:
As the current topology server works in BH context, it cannot easily
be blocked when sending of events fails due to congestion. In such
cases events may have to be silently dropped, something that is
unacceptable. Therefore, the new service keeps a dedicated outbound
queue receiving messages from BH context. Once messages are
inserted into this queue, we will immediately schedule a work from a
special workqueue. This way, messages/events from the topology server
are in reality sent in process context, and the server can block
if necessary.
Analogously, there is a new workqueue for receiving messages. Once a
notification about an arriving message is received in BH context, we
schedule a work from the receive workqueue to do the job of
receiving the message in process context.
As both sending and receive messages are now finished in processes,
subscribed events cannot be dropped any more.
As of this commit, this new server infrastructure is built, but
not actually yet called by the existing TIPC code, but since the
conversion changes required in order to use it are significant,
the addition is kept here as a separate commit.
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-17 21:54:39 +07:00
|
|
|
* Copyright (c) 2005-2006, 2013 Ericsson AB
|
|
|
|
* Copyright (c) 2005-2007, 2010-2013, Wind River Systems
|
2006-01-03 01:04:38 +07:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
2006-01-11 19:30:43 +07:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
2006-01-03 01:04:38 +07:00
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
2006-01-11 19:30:43 +07:00
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
2006-01-03 01:04:38 +07:00
|
|
|
*
|
2006-01-11 19:30:43 +07:00
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-01-03 01:04:38 +07:00
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _TIPC_CORE_H
|
|
|
|
#define _TIPC_CORE_H
|
|
|
|
|
2006-01-13 17:45:44 +07:00
|
|
|
#include <linux/tipc.h>
|
|
|
|
#include <linux/tipc_config.h>
|
2014-11-20 16:29:07 +07:00
|
|
|
#include <linux/tipc_netlink.h>
|
2006-01-03 01:04:38 +07:00
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/timer.h>
|
|
|
|
#include <linux/string.h>
|
2013-12-12 08:36:41 +07:00
|
|
|
#include <linux/uaccess.h>
|
2006-01-03 01:04:38 +07:00
|
|
|
#include <linux/interrupt.h>
|
2011-07-27 06:09:06 +07:00
|
|
|
#include <linux/atomic.h>
|
2006-01-03 01:04:38 +07:00
|
|
|
#include <asm/hardirq.h>
|
|
|
|
#include <linux/netdevice.h>
|
2007-02-09 21:25:21 +07:00
|
|
|
#include <linux/in.h>
|
2006-01-03 01:04:38 +07:00
|
|
|
#include <linux/list.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
|
|
|
#include <linux/slab.h>
|
2006-01-03 01:04:38 +07:00
|
|
|
#include <linux/vmalloc.h>
|
2014-04-21 09:55:42 +07:00
|
|
|
#include <linux/rtnetlink.h>
|
tipc: improve and extend media address conversion functions
TIPC currently handles two media specific addresses: Ethernet MAC
addresses and InfiniBand addresses. Those are kept in three different
formats:
1) A "raw" format as obtained from the device. This format is known
only by the media specific adapter code in eth_media.c and
ib_media.c.
2) A "generic" internal format, in the form of struct tipc_media_addr,
which can be referenced and passed around by the generic media-
unaware code.
3) A serialized version of the latter, to be conveyed in neighbor
discovery messages.
Conversion between the three formats can only be done by the media
specific code, so we have function pointers for this purpose in
struct tipc_media. Here, the media adapters can install their own
conversion functions at startup.
We now introduce a new such function, 'raw2addr()', whose purpose
is to convert from format 1 to format 2 above. We also try to as far
as possible uniform commenting, variable names and usage of these
functions, with the purpose of making them more comprehensible.
We can now also remove the function tipc_l2_media_addr_set(), whose
job is done better by the new function.
Finally, we expand the field for serialized addresses (format 3)
in discovery messages from 20 to 32 bytes. This is permitted
according to the spec, and reduces the risk of problems when we
add new media in the future.
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 16:39:13 +07:00
|
|
|
#include <linux/etherdevice.h>
|
2015-01-09 14:27:04 +07:00
|
|
|
#include <net/netns/generic.h>
|
2015-01-09 14:27:08 +07:00
|
|
|
#include <linux/rhashtable.h>
|
2010-05-11 21:30:08 +07:00
|
|
|
|
2015-05-14 21:46:13 +07:00
|
|
|
struct tipc_node;
|
|
|
|
struct tipc_bearer;
|
2015-10-22 19:51:33 +07:00
|
|
|
struct tipc_bc_base;
|
2015-05-14 21:46:13 +07:00
|
|
|
struct tipc_link;
|
|
|
|
struct tipc_name_table;
|
|
|
|
struct tipc_server;
|
2015-01-09 14:27:05 +07:00
|
|
|
|
2010-05-11 21:30:08 +07:00
|
|
|
#define TIPC_MOD_VER "2.0.0"
|
|
|
|
|
2015-05-14 21:46:13 +07:00
|
|
|
#define NODE_HTABLE_SIZE 512
|
|
|
|
#define MAX_BEARERS 3
|
|
|
|
|
2012-08-16 19:09:12 +07:00
|
|
|
extern int tipc_net_id __read_mostly;
|
2013-06-17 21:54:37 +07:00
|
|
|
extern int sysctl_tipc_rmem[3] __read_mostly;
|
tipc: add name distributor resiliency queue
TIPC name table updates are distributed asynchronously in a cluster,
entailing a risk of certain race conditions. E.g., if two nodes
simultaneously issue conflicting (overlapping) publications, this may
not be detected until both publications have reached a third node, in
which case one of the publications will be silently dropped on that
node. Hence, we end up with an inconsistent name table.
In most cases this conflict is just a temporary race, e.g., one
node is issuing a publication under the assumption that a previous,
conflicting, publication has already been withdrawn by the other node.
However, because of the (rtt related) distributed update delay, this
may not yet hold true on all nodes. The symptom of this failure is a
syslog message: "tipc: Cannot publish {%u,%u,%u}, overlap error".
In this commit we add a resiliency queue at the receiving end of
the name table distributor. When insertion of an arriving publication
fails, we retain it in this queue for a short amount of time, assuming
that another update will arrive very soon and clear the conflict. If so
happens, we insert the publication, otherwise we drop it.
The (configurable) retention value defaults to 2000 ms. Knowing from
experience that the situation described above is extremely rare, there
is no risk that the queue will accumulate any large number of items.
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-08-28 14:08:47 +07:00
|
|
|
extern int sysctl_tipc_named_timeout __read_mostly;
|
2006-01-03 01:04:38 +07:00
|
|
|
|
2015-01-09 14:27:04 +07:00
|
|
|
struct tipc_net {
|
2015-01-09 14:27:10 +07:00
|
|
|
u32 own_addr;
|
2015-01-09 14:27:04 +07:00
|
|
|
int net_id;
|
2015-01-09 14:27:12 +07:00
|
|
|
int random;
|
2015-01-09 14:27:05 +07:00
|
|
|
|
|
|
|
/* Node table and node list */
|
|
|
|
spinlock_t node_list_lock;
|
|
|
|
struct hlist_head node_htable[NODE_HTABLE_SIZE];
|
|
|
|
struct list_head node_list;
|
|
|
|
u32 num_nodes;
|
|
|
|
u32 num_links;
|
2015-01-09 14:27:06 +07:00
|
|
|
|
|
|
|
/* Bearer list */
|
|
|
|
struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
|
2015-01-09 14:27:07 +07:00
|
|
|
|
|
|
|
/* Broadcast link */
|
2015-10-22 19:51:34 +07:00
|
|
|
spinlock_t bclock;
|
2015-10-22 19:51:33 +07:00
|
|
|
struct tipc_bc_base *bcbase;
|
2015-01-09 14:27:07 +07:00
|
|
|
struct tipc_link *bcl;
|
2015-01-09 14:27:08 +07:00
|
|
|
|
|
|
|
/* Socket hash table */
|
|
|
|
struct rhashtable sk_rht;
|
2015-01-09 14:27:09 +07:00
|
|
|
|
|
|
|
/* Name table */
|
|
|
|
spinlock_t nametbl_lock;
|
|
|
|
struct name_table *nametbl;
|
2015-01-09 14:27:11 +07:00
|
|
|
|
|
|
|
/* Topology subscription server */
|
|
|
|
struct tipc_server *topsrv;
|
|
|
|
atomic_t subscription_count;
|
2015-01-09 14:27:04 +07:00
|
|
|
};
|
|
|
|
|
2015-07-31 05:24:26 +07:00
|
|
|
static inline struct tipc_net *tipc_net(struct net *net)
|
|
|
|
{
|
|
|
|
return net_generic(net, tipc_net_id);
|
|
|
|
}
|
|
|
|
|
2015-10-22 19:51:41 +07:00
|
|
|
static inline int tipc_netid(struct net *net)
|
|
|
|
{
|
|
|
|
return tipc_net(net)->net_id;
|
|
|
|
}
|
|
|
|
|
2015-11-20 02:30:42 +07:00
|
|
|
static inline struct list_head *tipc_nodes(struct net *net)
|
|
|
|
{
|
|
|
|
return &tipc_net(net)->node_list;
|
|
|
|
}
|
|
|
|
|
2015-05-14 21:46:14 +07:00
|
|
|
static inline u16 mod(u16 x)
|
|
|
|
{
|
|
|
|
return x & 0xffffu;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int less_eq(u16 left, u16 right)
|
|
|
|
{
|
|
|
|
return mod(right - left) < 32768u;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int more(u16 left, u16 right)
|
|
|
|
{
|
|
|
|
return !less_eq(left, right);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int less(u16 left, u16 right)
|
|
|
|
{
|
|
|
|
return less_eq(left, right) && (mod(right) != mod(left));
|
|
|
|
}
|
|
|
|
|
tipc: reduce locking scope during packet reception
We convert packet/message reception according to the same principle
we have been using for message sending and timeout handling:
We move the function tipc_rcv() to node.c, hence handling the initial
packet reception at the link aggregation level. The function grabs
the node lock, selects the receiving link, and accesses it via a new
call tipc_link_rcv(). This function appends buffers to the input
queue for delivery upwards, but it may also append outgoing packets
to the xmit queue, just as we do during regular message sending. The
latter will happen when buffers are forwarded from the link backlog,
or when retransmission is requested.
Upon return of this function, and after having released the node lock,
tipc_rcv() delivers/tranmsits the contents of those queues, but it may
also perform actions such as link activation or reset, as indicated by
the return flags from the link.
This reduces the number of cpu cycles spent inside the node spinlock,
and reduces contention on that lock.
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-17 03:54:31 +07:00
|
|
|
static inline int in_range(u16 val, u16 min, u16 max)
|
|
|
|
{
|
|
|
|
return !less(val, min) && !more(val, max);
|
|
|
|
}
|
|
|
|
|
2013-06-17 21:54:37 +07:00
|
|
|
#ifdef CONFIG_SYSCTL
|
2013-10-19 03:48:25 +07:00
|
|
|
int tipc_register_sysctl(void);
|
|
|
|
void tipc_unregister_sysctl(void);
|
2013-06-17 21:54:37 +07:00
|
|
|
#else
|
|
|
|
#define tipc_register_sysctl() 0
|
|
|
|
#define tipc_unregister_sysctl()
|
|
|
|
#endif
|
2007-02-09 21:25:21 +07:00
|
|
|
#endif
|