mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-26 21:35:20 +07:00
3ad7a4b141
Vxlan COLLECT_METADATA mode today solves the per-vni netdev scalability problem in l3 networks. It expects all forwarding information to be present in dst_metadata. This patch series enhances collect metadata mode to include the case where only vni is present in dst_metadata, and the vxlan driver can then use the rest of the forwarding information datbase to make forwarding decisions. There is no change to default COLLECT_METADATA behaviour. These changes only apply to COLLECT_METADATA when used with the bridging use-case with a special dst_metadata tunnel info flag (eg: where vxlan device is part of a bridge). For all this to work, the vxlan driver will need to now support a single fdb table hashed by mac + vni. This series essentially makes this happen. use-case and workflow: vxlan collect metadata device participates in bridging vlan to vn-segments. Bridge driver above the vxlan device, sends the vni corresponding to the vlan in the dst_metadata. vxlan driver will lookup forwarding database with (mac + vni) for the required remote destination information to forward the packet. Changes introduced by this patch: - allow learning and forwarding database state in vxlan netdev in COLLECT_METADATA mode. Current behaviour is not changed by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used to support the new bridge friendly mode. - A single fdb table hashed by (mac, vni) to allow fdb entries with multiple vnis in the same fdb table - rx path already has the vni - tx path expects a vni in the packet with dst_metadata - prior to this series, fdb remote_dsts carried remote vni and the vxlan device carrying the fdb table represented the source vni. With the vxlan device now representing multiple vnis, this patch adds a src vni attribute to the fdb entry. The remote vni already uses NDA_VNI attribute. This patch introduces NDA_SRC_VNI netlink attribute to represent the src vni in a multi vni fdb table. iproute2 example (patched and pruned iproute2 output to just show relevant fdb entries): example shows same host mac learnt on two vni's. before (netdev per vni): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self after this patch with collect metadata in bridged mode (single netdev): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
171 lines
4.1 KiB
C
171 lines
4.1 KiB
C
#ifndef __LINUX_NEIGHBOUR_H
|
|
#define __LINUX_NEIGHBOUR_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/netlink.h>
|
|
|
|
struct ndmsg {
|
|
__u8 ndm_family;
|
|
__u8 ndm_pad1;
|
|
__u16 ndm_pad2;
|
|
__s32 ndm_ifindex;
|
|
__u16 ndm_state;
|
|
__u8 ndm_flags;
|
|
__u8 ndm_type;
|
|
};
|
|
|
|
enum {
|
|
NDA_UNSPEC,
|
|
NDA_DST,
|
|
NDA_LLADDR,
|
|
NDA_CACHEINFO,
|
|
NDA_PROBES,
|
|
NDA_VLAN,
|
|
NDA_PORT,
|
|
NDA_VNI,
|
|
NDA_IFINDEX,
|
|
NDA_MASTER,
|
|
NDA_LINK_NETNSID,
|
|
NDA_SRC_VNI,
|
|
__NDA_MAX
|
|
};
|
|
|
|
#define NDA_MAX (__NDA_MAX - 1)
|
|
|
|
/*
|
|
* Neighbor Cache Entry Flags
|
|
*/
|
|
|
|
#define NTF_USE 0x01
|
|
#define NTF_SELF 0x02
|
|
#define NTF_MASTER 0x04
|
|
#define NTF_PROXY 0x08 /* == ATF_PUBL */
|
|
#define NTF_EXT_LEARNED 0x10
|
|
#define NTF_ROUTER 0x80
|
|
|
|
/*
|
|
* Neighbor Cache Entry States.
|
|
*/
|
|
|
|
#define NUD_INCOMPLETE 0x01
|
|
#define NUD_REACHABLE 0x02
|
|
#define NUD_STALE 0x04
|
|
#define NUD_DELAY 0x08
|
|
#define NUD_PROBE 0x10
|
|
#define NUD_FAILED 0x20
|
|
|
|
/* Dummy states */
|
|
#define NUD_NOARP 0x40
|
|
#define NUD_PERMANENT 0x80
|
|
#define NUD_NONE 0x00
|
|
|
|
/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
|
|
and make no address resolution or NUD.
|
|
NUD_PERMANENT also cannot be deleted by garbage collectors.
|
|
*/
|
|
|
|
struct nda_cacheinfo {
|
|
__u32 ndm_confirmed;
|
|
__u32 ndm_used;
|
|
__u32 ndm_updated;
|
|
__u32 ndm_refcnt;
|
|
};
|
|
|
|
/*****************************************************************
|
|
* Neighbour tables specific messages.
|
|
*
|
|
* To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
|
|
* NLM_F_DUMP flag set. Every neighbour table configuration is
|
|
* spread over multiple messages to avoid running into message
|
|
* size limits on systems with many interfaces. The first message
|
|
* in the sequence transports all not device specific data such as
|
|
* statistics, configuration, and the default parameter set.
|
|
* This message is followed by 0..n messages carrying device
|
|
* specific parameter sets.
|
|
* Although the ordering should be sufficient, NDTA_NAME can be
|
|
* used to identify sequences. The initial message can be identified
|
|
* by checking for NDTA_CONFIG. The device specific messages do
|
|
* not contain this TLV but have NDTPA_IFINDEX set to the
|
|
* corresponding interface index.
|
|
*
|
|
* To change neighbour table attributes, send RTM_SETNEIGHTBL
|
|
* with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
|
|
* NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
|
|
* otherwise. Device specific parameter sets can be changed by
|
|
* setting NDTPA_IFINDEX to the interface index of the corresponding
|
|
* device.
|
|
****/
|
|
|
|
struct ndt_stats {
|
|
__u64 ndts_allocs;
|
|
__u64 ndts_destroys;
|
|
__u64 ndts_hash_grows;
|
|
__u64 ndts_res_failed;
|
|
__u64 ndts_lookups;
|
|
__u64 ndts_hits;
|
|
__u64 ndts_rcv_probes_mcast;
|
|
__u64 ndts_rcv_probes_ucast;
|
|
__u64 ndts_periodic_gc_runs;
|
|
__u64 ndts_forced_gc_runs;
|
|
__u64 ndts_table_fulls;
|
|
};
|
|
|
|
enum {
|
|
NDTPA_UNSPEC,
|
|
NDTPA_IFINDEX, /* u32, unchangeable */
|
|
NDTPA_REFCNT, /* u32, read-only */
|
|
NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
|
|
NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
|
|
NDTPA_RETRANS_TIME, /* u64, msecs */
|
|
NDTPA_GC_STALETIME, /* u64, msecs */
|
|
NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
|
|
NDTPA_QUEUE_LEN, /* u32 */
|
|
NDTPA_APP_PROBES, /* u32 */
|
|
NDTPA_UCAST_PROBES, /* u32 */
|
|
NDTPA_MCAST_PROBES, /* u32 */
|
|
NDTPA_ANYCAST_DELAY, /* u64, msecs */
|
|
NDTPA_PROXY_DELAY, /* u64, msecs */
|
|
NDTPA_PROXY_QLEN, /* u32 */
|
|
NDTPA_LOCKTIME, /* u64, msecs */
|
|
NDTPA_QUEUE_LENBYTES, /* u32 */
|
|
NDTPA_MCAST_REPROBES, /* u32 */
|
|
NDTPA_PAD,
|
|
__NDTPA_MAX
|
|
};
|
|
#define NDTPA_MAX (__NDTPA_MAX - 1)
|
|
|
|
struct ndtmsg {
|
|
__u8 ndtm_family;
|
|
__u8 ndtm_pad1;
|
|
__u16 ndtm_pad2;
|
|
};
|
|
|
|
struct ndt_config {
|
|
__u16 ndtc_key_len;
|
|
__u16 ndtc_entry_size;
|
|
__u32 ndtc_entries;
|
|
__u32 ndtc_last_flush; /* delta to now in msecs */
|
|
__u32 ndtc_last_rand; /* delta to now in msecs */
|
|
__u32 ndtc_hash_rnd;
|
|
__u32 ndtc_hash_mask;
|
|
__u32 ndtc_hash_chain_gc;
|
|
__u32 ndtc_proxy_qlen;
|
|
};
|
|
|
|
enum {
|
|
NDTA_UNSPEC,
|
|
NDTA_NAME, /* char *, unchangeable */
|
|
NDTA_THRESH1, /* u32 */
|
|
NDTA_THRESH2, /* u32 */
|
|
NDTA_THRESH3, /* u32 */
|
|
NDTA_CONFIG, /* struct ndt_config, read-only */
|
|
NDTA_PARMS, /* nested TLV NDTPA_* */
|
|
NDTA_STATS, /* struct ndt_stats, read-only */
|
|
NDTA_GC_INTERVAL, /* u64, msecs */
|
|
NDTA_PAD,
|
|
__NDTA_MAX
|
|
};
|
|
#define NDTA_MAX (__NDTA_MAX - 1)
|
|
|
|
#endif
|