mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 00:06:07 +07:00
0eeb075fad
This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache <local> 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache <local> 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com> Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com> Acked-by: Scott Feldman <sfeldma@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
174 lines
4.5 KiB
C
174 lines
4.5 KiB
C
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* Definitions for the IP protocol.
|
|
*
|
|
* Version: @(#)ip.h 1.0.2 04/28/93
|
|
*
|
|
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _UAPI_LINUX_IP_H
|
|
#define _UAPI_LINUX_IP_H
|
|
#include <linux/types.h>
|
|
#include <asm/byteorder.h>
|
|
|
|
#define IPTOS_TOS_MASK 0x1E
|
|
#define IPTOS_TOS(tos) ((tos)&IPTOS_TOS_MASK)
|
|
#define IPTOS_LOWDELAY 0x10
|
|
#define IPTOS_THROUGHPUT 0x08
|
|
#define IPTOS_RELIABILITY 0x04
|
|
#define IPTOS_MINCOST 0x02
|
|
|
|
#define IPTOS_PREC_MASK 0xE0
|
|
#define IPTOS_PREC(tos) ((tos)&IPTOS_PREC_MASK)
|
|
#define IPTOS_PREC_NETCONTROL 0xe0
|
|
#define IPTOS_PREC_INTERNETCONTROL 0xc0
|
|
#define IPTOS_PREC_CRITIC_ECP 0xa0
|
|
#define IPTOS_PREC_FLASHOVERRIDE 0x80
|
|
#define IPTOS_PREC_FLASH 0x60
|
|
#define IPTOS_PREC_IMMEDIATE 0x40
|
|
#define IPTOS_PREC_PRIORITY 0x20
|
|
#define IPTOS_PREC_ROUTINE 0x00
|
|
|
|
|
|
/* IP options */
|
|
#define IPOPT_COPY 0x80
|
|
#define IPOPT_CLASS_MASK 0x60
|
|
#define IPOPT_NUMBER_MASK 0x1f
|
|
|
|
#define IPOPT_COPIED(o) ((o)&IPOPT_COPY)
|
|
#define IPOPT_CLASS(o) ((o)&IPOPT_CLASS_MASK)
|
|
#define IPOPT_NUMBER(o) ((o)&IPOPT_NUMBER_MASK)
|
|
|
|
#define IPOPT_CONTROL 0x00
|
|
#define IPOPT_RESERVED1 0x20
|
|
#define IPOPT_MEASUREMENT 0x40
|
|
#define IPOPT_RESERVED2 0x60
|
|
|
|
#define IPOPT_END (0 |IPOPT_CONTROL)
|
|
#define IPOPT_NOOP (1 |IPOPT_CONTROL)
|
|
#define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY)
|
|
#define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY)
|
|
#define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT)
|
|
#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY)
|
|
#define IPOPT_RR (7 |IPOPT_CONTROL)
|
|
#define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY)
|
|
#define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY)
|
|
#define IPOPT_RA (20|IPOPT_CONTROL|IPOPT_COPY)
|
|
|
|
#define IPVERSION 4
|
|
#define MAXTTL 255
|
|
#define IPDEFTTL 64
|
|
|
|
#define IPOPT_OPTVAL 0
|
|
#define IPOPT_OLEN 1
|
|
#define IPOPT_OFFSET 2
|
|
#define IPOPT_MINOFF 4
|
|
#define MAX_IPOPTLEN 40
|
|
#define IPOPT_NOP IPOPT_NOOP
|
|
#define IPOPT_EOL IPOPT_END
|
|
#define IPOPT_TS IPOPT_TIMESTAMP
|
|
|
|
#define IPOPT_TS_TSONLY 0 /* timestamps only */
|
|
#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
|
|
#define IPOPT_TS_PRESPEC 3 /* specified modules only */
|
|
|
|
#define IPV4_BEET_PHMAXLEN 8
|
|
|
|
struct iphdr {
|
|
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
|
__u8 ihl:4,
|
|
version:4;
|
|
#elif defined (__BIG_ENDIAN_BITFIELD)
|
|
__u8 version:4,
|
|
ihl:4;
|
|
#else
|
|
#error "Please fix <asm/byteorder.h>"
|
|
#endif
|
|
__u8 tos;
|
|
__be16 tot_len;
|
|
__be16 id;
|
|
__be16 frag_off;
|
|
__u8 ttl;
|
|
__u8 protocol;
|
|
__sum16 check;
|
|
__be32 saddr;
|
|
__be32 daddr;
|
|
/*The options start here. */
|
|
};
|
|
|
|
|
|
struct ip_auth_hdr {
|
|
__u8 nexthdr;
|
|
__u8 hdrlen; /* This one is measured in 32 bit units! */
|
|
__be16 reserved;
|
|
__be32 spi;
|
|
__be32 seq_no; /* Sequence number */
|
|
__u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */
|
|
};
|
|
|
|
struct ip_esp_hdr {
|
|
__be32 spi;
|
|
__be32 seq_no; /* Sequence number */
|
|
__u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */
|
|
};
|
|
|
|
struct ip_comp_hdr {
|
|
__u8 nexthdr;
|
|
__u8 flags;
|
|
__be16 cpi;
|
|
};
|
|
|
|
struct ip_beet_phdr {
|
|
__u8 nexthdr;
|
|
__u8 hdrlen;
|
|
__u8 padlen;
|
|
__u8 reserved;
|
|
};
|
|
|
|
/* index values for the variables in ipv4_devconf */
|
|
enum
|
|
{
|
|
IPV4_DEVCONF_FORWARDING=1,
|
|
IPV4_DEVCONF_MC_FORWARDING,
|
|
IPV4_DEVCONF_PROXY_ARP,
|
|
IPV4_DEVCONF_ACCEPT_REDIRECTS,
|
|
IPV4_DEVCONF_SECURE_REDIRECTS,
|
|
IPV4_DEVCONF_SEND_REDIRECTS,
|
|
IPV4_DEVCONF_SHARED_MEDIA,
|
|
IPV4_DEVCONF_RP_FILTER,
|
|
IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
|
|
IPV4_DEVCONF_BOOTP_RELAY,
|
|
IPV4_DEVCONF_LOG_MARTIANS,
|
|
IPV4_DEVCONF_TAG,
|
|
IPV4_DEVCONF_ARPFILTER,
|
|
IPV4_DEVCONF_MEDIUM_ID,
|
|
IPV4_DEVCONF_NOXFRM,
|
|
IPV4_DEVCONF_NOPOLICY,
|
|
IPV4_DEVCONF_FORCE_IGMP_VERSION,
|
|
IPV4_DEVCONF_ARP_ANNOUNCE,
|
|
IPV4_DEVCONF_ARP_IGNORE,
|
|
IPV4_DEVCONF_PROMOTE_SECONDARIES,
|
|
IPV4_DEVCONF_ARP_ACCEPT,
|
|
IPV4_DEVCONF_ARP_NOTIFY,
|
|
IPV4_DEVCONF_ACCEPT_LOCAL,
|
|
IPV4_DEVCONF_SRC_VMARK,
|
|
IPV4_DEVCONF_PROXY_ARP_PVLAN,
|
|
IPV4_DEVCONF_ROUTE_LOCALNET,
|
|
IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL,
|
|
IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL,
|
|
IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
|
|
__IPV4_DEVCONF_MAX
|
|
};
|
|
|
|
#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
|
|
|
|
#endif /* _UAPI_LINUX_IP_H */
|