linux_dsm_epyc7002/include/net/ip6_tunnel.h
Martin KaFai Lau cdf3464e6c ipv6: Fix dst_entry refcnt bugs in ip6_tunnel
Problems in the current dst_entry cache in the ip6_tunnel:

1. ip6_tnl_dst_set is racy.  There is no lock to protect it:
   - One major problem is that the dst refcnt gets messed up. F.e.
     the same dst_cache can be released multiple times and then
     triggering the infamous dst refcnt < 0 warning message.
   - Another issue is the inconsistency between dst_cache and
     dst_cookie.

   It can be reproduced by adding and removing the ip6gre tunnel
   while running a super_netperf TCP_CRR test.

2. ip6_tnl_dst_get does not take the dst refcnt before returning
   the dst.

This patch:
1. Create a percpu dst_entry cache in ip6_tnl
2. Use a spinlock to protect the dst_cache operations
3. ip6_tnl_dst_get always takes the dst refcnt before returning

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-15 14:53:05 -07:00

104 lines
3.1 KiB
C

#ifndef _NET_IP6_TUNNEL_H
#define _NET_IP6_TUNNEL_H
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h>
#define IP6TUNNEL_ERR_TIMEO (30*HZ)
/* capable of sending packets */
#define IP6_TNL_F_CAP_XMIT 0x10000
/* capable of receiving packets */
#define IP6_TNL_F_CAP_RCV 0x20000
/* determine capability on a per-packet basis */
#define IP6_TNL_F_CAP_PER_PACKET 0x40000
struct __ip6_tnl_parm {
char name[IFNAMSIZ]; /* name of tunnel device */
int link; /* ifindex of underlying L2 interface */
__u8 proto; /* tunnel protocol */
__u8 encap_limit; /* encapsulation limit for tunnel */
__u8 hop_limit; /* hop limit for tunnel */
__be32 flowinfo; /* traffic class and flowlabel for tunnel */
__u32 flags; /* tunnel flags */
struct in6_addr laddr; /* local tunnel end-point address */
struct in6_addr raddr; /* remote tunnel end-point address */
__be16 i_flags;
__be16 o_flags;
__be32 i_key;
__be32 o_key;
};
struct ip6_tnl_dst {
spinlock_t lock;
struct dst_entry *dst;
u32 cookie;
};
/* IPv6 tunnel */
struct ip6_tnl {
struct ip6_tnl __rcu *next; /* next tunnel in list */
struct net_device *dev; /* virtual device associated with tunnel */
struct net *net; /* netns for packet i/o */
struct __ip6_tnl_parm parms; /* tunnel configuration parameters */
struct flowi fl; /* flowi template for xmit */
struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */
int err_count;
unsigned long err_time;
/* These fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
__u32 o_seqno; /* The last output seqno */
int hlen; /* Precalculated GRE header length */
int mlink;
};
/* Tunnel encapsulation limit destination sub-option */
struct ipv6_tlv_tnl_enc_lim {
__u8 type; /* type-code for option */
__u8 length; /* option length */
__u8 encap_limit; /* tunnel encapsulation limit */
} __packed;
struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
int ip6_tnl_dst_init(struct ip6_tnl *t);
void ip6_tnl_dst_destroy(struct ip6_tnl *t);
void ip6_tnl_dst_reset(struct ip6_tnl *t);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
const struct in6_addr *raddr);
int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
const struct in6_addr *raddr);
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
const struct in6_addr *raddr);
struct net *ip6_tnl_get_link_net(const struct net_device *dev);
int ip6_tnl_get_iflink(const struct net_device *dev);
static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
struct net_device *dev)
{
struct net_device_stats *stats = &dev->stats;
int pkt_len, err;
pkt_len = skb->len;
err = ip6_local_out_sk(sk, skb);
if (net_xmit_eval(err) == 0) {
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->tx_bytes += pkt_len;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
stats->tx_errors++;
stats->tx_aborted_errors++;
}
}
#endif