linux_dsm_epyc7002/net/mpls/internal.h
David Ahern 59b209667a net: mpls: change mpls_route layout
Move labels to the end of mpls_nh as a 0-sized array and within mpls_route
move the via for a nexthop after the mpls_nh. The new layout becomes:

   +----------------------+
   | mpls_route           |
   +----------------------+
   | mpls_nh 0            |
   +----------------------+
   | alignment padding    |   4 bytes for odd number of labels; 0 for even
   +----------------------+
   | via[rt_max_alen] 0   |
   +----------------------+
   | alignment padding    |   via's aligned on sizeof(unsigned long)
   +----------------------+
   | ...                  |
   +----------------------+
   | mpls_nh n-1          |
   +----------------------+
   | via[rt_max_alen] n-1 |
   +----------------------+

Memory allocated for nexthop + via is constant across all nexthops and
their via. It is based on the maximum number of labels across all nexthops
and the maximum via length. The size is saved in the mpls_route as
rt_nh_size. Accessing a nexthop becomes rt->rt_nh + index * rt->rt_nh_size.

The offset of the via address from a nexthop is saved as rt_via_offset
so that given an mpls_nh pointer the via for that hop is simply
nh + rt->rt_via_offset.

With prior code, memory allocated per mpls_route with 1 nexthop:
     via is an ethernet address - 64 bytes
     via is an ipv4 address     - 64
     via is an ipv6 address     - 72

With this patch set, memory allocated per mpls_route with 1 nexthop and
1 or 2 labels:
     via is an ethernet address - 56 bytes
     via is an ipv4 address     - 56
     via is an ipv6 address     - 64

The 8-byte reduction is due to the previous patch; the change introduced
by this patch has no impact on the size of allocations for 1 or 2 labels.

Performance impact of this change was examined using network namespaces
with veth pairs connecting namespaces. ns0 inserts the packet to the
label-switched path using an lwt route with encap mpls. ns1 adds 1 or 2
labels depending on test, ns2 (and ns3 for 2-label test) pops the label
and forwards. ns3 (or ns4) for a 2-label is the destination. Similar
series of namespaces used for 2-nexthop test.

Intent is to measure changes to latency (overhead in manipulating the
packet) in the forwarding path. Tests used netperf with UDP_RR.

IPv4:                     current   patches
   1 label, 1 nexthop      29908     30115
   2 label, 1 nexthop      29071     29612
   1 label, 2 nexthop      29582     29776
   2 label, 2 nexthop      29086     29149

IPv6:                     current   patches
   1 label, 1 nexthop      24502     24960
   2 label, 1 nexthop      24041     24407
   1 label, 2 nexthop      23795     23899
   2 label, 2 nexthop      23074     22959

In short, the change has no effect to a modest increase in performance.
This is expected since this patch does not really have an impact on routes
with 1 or 2 labels (the current limit) and 1 or 2 nexthops.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-04-01 20:21:44 -07:00

211 lines
5.7 KiB
C

#ifndef MPLS_INTERNAL_H
#define MPLS_INTERNAL_H
#include <net/mpls.h>
struct mpls_entry_decoded {
u32 label;
u8 ttl;
u8 tc;
u8 bos;
};
struct mpls_pcpu_stats {
struct mpls_link_stats stats;
struct u64_stats_sync syncp;
};
struct mpls_dev {
int input_enabled;
struct net_device *dev;
struct mpls_pcpu_stats __percpu *stats;
struct ctl_table_header *sysctl;
struct rcu_head rcu;
};
#if BITS_PER_LONG == 32
#define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \
do { \
__typeof__(*(mdev)->stats) *ptr = \
raw_cpu_ptr((mdev)->stats); \
local_bh_disable(); \
u64_stats_update_begin(&ptr->syncp); \
ptr->stats.pkts_field++; \
ptr->stats.bytes_field += (len); \
u64_stats_update_end(&ptr->syncp); \
local_bh_enable(); \
} while (0)
#define MPLS_INC_STATS(mdev, field) \
do { \
__typeof__(*(mdev)->stats) *ptr = \
raw_cpu_ptr((mdev)->stats); \
local_bh_disable(); \
u64_stats_update_begin(&ptr->syncp); \
ptr->stats.field++; \
u64_stats_update_end(&ptr->syncp); \
local_bh_enable(); \
} while (0)
#else
#define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \
do { \
this_cpu_inc((mdev)->stats->stats.pkts_field); \
this_cpu_add((mdev)->stats->stats.bytes_field, (len)); \
} while (0)
#define MPLS_INC_STATS(mdev, field) \
this_cpu_inc((mdev)->stats->stats.field)
#endif
struct sk_buff;
#define LABEL_NOT_SPECIFIED (1 << 20)
/* This maximum ha length copied from the definition of struct neighbour */
#define VIA_ALEN_ALIGN sizeof(unsigned long)
#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN))
enum mpls_payload_type {
MPT_UNSPEC, /* IPv4 or IPv6 */
MPT_IPV4 = 4,
MPT_IPV6 = 6,
/* Other types not implemented:
* - Pseudo-wire with or without control word (RFC4385)
* - GAL (RFC5586)
*/
};
struct mpls_nh { /* next hop label forwarding entry */
struct net_device __rcu *nh_dev;
/* nh_flags is accessed under RCU in the packet path; it is
* modified handling netdev events with rtnl lock held
*/
unsigned int nh_flags;
u8 nh_labels;
u8 nh_via_alen;
u8 nh_via_table;
u8 nh_reserved1;
u32 nh_label[0];
};
/* offset of via from beginning of mpls_nh */
#define MPLS_NH_VIA_OFF(num_labels) \
ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \
VIA_ALEN_ALIGN)
/* all nexthops within a route have the same size based on the
* max number of labels and max via length across all nexthops
*/
#define MPLS_NH_SIZE(num_labels, max_via_alen) \
(MPLS_NH_VIA_OFF((num_labels)) + \
ALIGN((max_via_alen), VIA_ALEN_ALIGN))
enum mpls_ttl_propagation {
MPLS_TTL_PROP_DEFAULT,
MPLS_TTL_PROP_ENABLED,
MPLS_TTL_PROP_DISABLED,
};
/* The route, nexthops and vias are stored together in the same memory
* block:
*
* +----------------------+
* | mpls_route |
* +----------------------+
* | mpls_nh 0 |
* +----------------------+
* | alignment padding | 4 bytes for odd number of labels
* +----------------------+
* | via[rt_max_alen] 0 |
* +----------------------+
* | alignment padding | via's aligned on sizeof(unsigned long)
* +----------------------+
* | ... |
* +----------------------+
* | mpls_nh n-1 |
* +----------------------+
* | via[rt_max_alen] n-1 |
* +----------------------+
*/
struct mpls_route { /* next hop label forwarding entry */
struct rcu_head rt_rcu;
u8 rt_protocol;
u8 rt_payload_type;
u8 rt_max_alen;
u8 rt_ttl_propagate;
u8 rt_nhn;
/* rt_nhn_alive is accessed under RCU in the packet path; it
* is modified handling netdev events with rtnl lock held
*/
u8 rt_nhn_alive;
u8 rt_nh_size;
u8 rt_via_offset;
u8 rt_reserved1;
struct mpls_nh rt_nh[0];
};
#define for_nexthops(rt) { \
int nhsel; struct mpls_nh *nh; u8 *__nh; \
for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
__nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define change_nexthops(rt) { \
int nhsel; struct mpls_nh *nh; u8 *__nh; \
for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \
__nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
__nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define endfor_nexthops(rt) }
static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
{
struct mpls_shim_hdr result;
result.label_stack_entry =
cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
(tc << MPLS_LS_TC_SHIFT) |
(bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
(ttl << MPLS_LS_TTL_SHIFT));
return result;
}
static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
{
struct mpls_entry_decoded result;
unsigned entry = be32_to_cpu(hdr->label_stack_entry);
result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
return result;
}
static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
{
return rcu_dereference_rtnl(dev->mpls_ptr);
}
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
u32 label[]);
int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
u8 via[]);
bool mpls_output_possible(const struct net_device *dev);
unsigned int mpls_dev_mtu(const struct net_device *dev);
bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
void mpls_stats_inc_outucastpkts(struct net_device *dev,
const struct sk_buff *skb);
#endif /* MPLS_INTERNAL_H */