linux_dsm_epyc7002/include/uapi/linux/snmp.h

321 lines
12 KiB
C
Raw Normal View History

License cleanup: add SPDX license identifier to uapi header files with no license Many user space API headers are missing licensing information, which makes it hard for compliance tools to determine the correct license. By default are files without license information under the default license of the kernel, which is GPLV2. Marking them GPLV2 would exclude them from being included in non GPLV2 code, which is obviously not intended. The user space API headers fall under the syscall exception which is in the kernels COPYING file: NOTE! This copyright does *not* cover user programs that use kernel services by normal system calls - this is merely considered normal use of the kernel, and does *not* fall under the heading of "derived work". otherwise syscall usage would not be possible. Update the files which contain no license information with an SPDX license identifier. The chosen identifier is 'GPL-2.0 WITH Linux-syscall-note' which is the officially assigned identifier for the Linux syscall exception. SPDX license identifiers are a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. See the previous patch in this series for the methodology of how this patch was researched. Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-01 21:08:43 +07:00
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Definitions for MIBs
*
* Author: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
*/
#ifndef _LINUX_SNMP_H
#define _LINUX_SNMP_H
/* ipstats mib definitions */
/*
* RFC 1213: MIB-II
* RFC 2011 (updates 1213): SNMPv2-MIB-IP
* RFC 2863: Interfaces Group MIB
* RFC 2465: IPv6 MIB: General Group
* draft-ietf-ipv6-rfc2011-update-10.txt: MIB for IP: IP Statistics Tables
*/
enum
{
IPSTATS_MIB_NUM = 0,
/* frequently written fields in fast path, kept in same cache line */
IPSTATS_MIB_INPKTS, /* InReceives */
IPSTATS_MIB_INOCTETS, /* InOctets */
IPSTATS_MIB_INDELIVERS, /* InDelivers */
IPSTATS_MIB_OUTFORWDATAGRAMS, /* OutForwDatagrams */
IPSTATS_MIB_OUTPKTS, /* OutRequests */
IPSTATS_MIB_OUTOCTETS, /* OutOctets */
/* other fields */
IPSTATS_MIB_INHDRERRORS, /* InHdrErrors */
IPSTATS_MIB_INTOOBIGERRORS, /* InTooBigErrors */
IPSTATS_MIB_INNOROUTES, /* InNoRoutes */
IPSTATS_MIB_INADDRERRORS, /* InAddrErrors */
IPSTATS_MIB_INUNKNOWNPROTOS, /* InUnknownProtos */
IPSTATS_MIB_INTRUNCATEDPKTS, /* InTruncatedPkts */
IPSTATS_MIB_INDISCARDS, /* InDiscards */
IPSTATS_MIB_OUTDISCARDS, /* OutDiscards */
IPSTATS_MIB_OUTNOROUTES, /* OutNoRoutes */
IPSTATS_MIB_REASMTIMEOUT, /* ReasmTimeout */
IPSTATS_MIB_REASMREQDS, /* ReasmReqds */
IPSTATS_MIB_REASMOKS, /* ReasmOKs */
IPSTATS_MIB_REASMFAILS, /* ReasmFails */
IPSTATS_MIB_FRAGOKS, /* FragOKs */
IPSTATS_MIB_FRAGFAILS, /* FragFails */
IPSTATS_MIB_FRAGCREATES, /* FragCreates */
IPSTATS_MIB_INMCASTPKTS, /* InMcastPkts */
IPSTATS_MIB_OUTMCASTPKTS, /* OutMcastPkts */
IPSTATS_MIB_INBCASTPKTS, /* InBcastPkts */
IPSTATS_MIB_OUTBCASTPKTS, /* OutBcastPkts */
IPSTATS_MIB_INMCASTOCTETS, /* InMcastOctets */
IPSTATS_MIB_OUTMCASTOCTETS, /* OutMcastOctets */
IPSTATS_MIB_INBCASTOCTETS, /* InBcastOctets */
IPSTATS_MIB_OUTBCASTOCTETS, /* OutBcastOctets */
IPSTATS_MIB_CSUMERRORS, /* InCsumErrors */
IPSTATS_MIB_NOECTPKTS, /* InNoECTPkts */
IPSTATS_MIB_ECT1PKTS, /* InECT1Pkts */
IPSTATS_MIB_ECT0PKTS, /* InECT0Pkts */
IPSTATS_MIB_CEPKTS, /* InCEPkts */
__IPSTATS_MIB_MAX
};
/* icmp mib definitions */
/*
* RFC 1213: MIB-II ICMP Group
* RFC 2011 (updates 1213): SNMPv2 MIB for IP: ICMP group
*/
enum
{
ICMP_MIB_NUM = 0,
ICMP_MIB_INMSGS, /* InMsgs */
ICMP_MIB_INERRORS, /* InErrors */
ICMP_MIB_INDESTUNREACHS, /* InDestUnreachs */
ICMP_MIB_INTIMEEXCDS, /* InTimeExcds */
ICMP_MIB_INPARMPROBS, /* InParmProbs */
ICMP_MIB_INSRCQUENCHS, /* InSrcQuenchs */
ICMP_MIB_INREDIRECTS, /* InRedirects */
ICMP_MIB_INECHOS, /* InEchos */
ICMP_MIB_INECHOREPS, /* InEchoReps */
ICMP_MIB_INTIMESTAMPS, /* InTimestamps */
ICMP_MIB_INTIMESTAMPREPS, /* InTimestampReps */
ICMP_MIB_INADDRMASKS, /* InAddrMasks */
ICMP_MIB_INADDRMASKREPS, /* InAddrMaskReps */
ICMP_MIB_OUTMSGS, /* OutMsgs */
ICMP_MIB_OUTERRORS, /* OutErrors */
ICMP_MIB_OUTDESTUNREACHS, /* OutDestUnreachs */
ICMP_MIB_OUTTIMEEXCDS, /* OutTimeExcds */
ICMP_MIB_OUTPARMPROBS, /* OutParmProbs */
ICMP_MIB_OUTSRCQUENCHS, /* OutSrcQuenchs */
ICMP_MIB_OUTREDIRECTS, /* OutRedirects */
ICMP_MIB_OUTECHOS, /* OutEchos */
ICMP_MIB_OUTECHOREPS, /* OutEchoReps */
ICMP_MIB_OUTTIMESTAMPS, /* OutTimestamps */
ICMP_MIB_OUTTIMESTAMPREPS, /* OutTimestampReps */
ICMP_MIB_OUTADDRMASKS, /* OutAddrMasks */
ICMP_MIB_OUTADDRMASKREPS, /* OutAddrMaskReps */
ICMP_MIB_CSUMERRORS, /* InCsumErrors */
__ICMP_MIB_MAX
};
#define __ICMPMSG_MIB_MAX 512 /* Out+In for all 8-bit ICMP types */
/* icmp6 mib definitions */
/*
* RFC 2466: ICMPv6-MIB
*/
enum
{
ICMP6_MIB_NUM = 0,
ICMP6_MIB_INMSGS, /* InMsgs */
ICMP6_MIB_INERRORS, /* InErrors */
ICMP6_MIB_OUTMSGS, /* OutMsgs */
ICMP6_MIB_OUTERRORS, /* OutErrors */
ICMP6_MIB_CSUMERRORS, /* InCsumErrors */
__ICMP6_MIB_MAX
};
#define __ICMP6MSG_MIB_MAX 512 /* Out+In for all 8-bit ICMPv6 types */
/* tcp mib definitions */
/*
* RFC 1213: MIB-II TCP group
* RFC 2012 (updates 1213): SNMPv2-MIB-TCP
*/
enum
{
TCP_MIB_NUM = 0,
TCP_MIB_RTOALGORITHM, /* RtoAlgorithm */
TCP_MIB_RTOMIN, /* RtoMin */
TCP_MIB_RTOMAX, /* RtoMax */
TCP_MIB_MAXCONN, /* MaxConn */
TCP_MIB_ACTIVEOPENS, /* ActiveOpens */
TCP_MIB_PASSIVEOPENS, /* PassiveOpens */
TCP_MIB_ATTEMPTFAILS, /* AttemptFails */
TCP_MIB_ESTABRESETS, /* EstabResets */
TCP_MIB_CURRESTAB, /* CurrEstab */
TCP_MIB_INSEGS, /* InSegs */
TCP_MIB_OUTSEGS, /* OutSegs */
TCP_MIB_RETRANSSEGS, /* RetransSegs */
TCP_MIB_INERRS, /* InErrs */
TCP_MIB_OUTRSTS, /* OutRsts */
TCP_MIB_CSUMERRORS, /* InCsumErrors */
__TCP_MIB_MAX
};
/* udp mib definitions */
/*
* RFC 1213: MIB-II UDP group
* RFC 2013 (updates 1213): SNMPv2-MIB-UDP
*/
enum
{
UDP_MIB_NUM = 0,
UDP_MIB_INDATAGRAMS, /* InDatagrams */
UDP_MIB_NOPORTS, /* NoPorts */
UDP_MIB_INERRORS, /* InErrors */
UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */
UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */
UDP_MIB_SNDBUFERRORS, /* SndbufErrors */
UDP_MIB_CSUMERRORS, /* InCsumErrors */
UDP_MIB_IGNOREDMULTI, /* IgnoredMulti */
__UDP_MIB_MAX
};
/* linux mib definitions */
enum
{
LINUX_MIB_NUM = 0,
LINUX_MIB_SYNCOOKIESSENT, /* SyncookiesSent */
LINUX_MIB_SYNCOOKIESRECV, /* SyncookiesRecv */
LINUX_MIB_SYNCOOKIESFAILED, /* SyncookiesFailed */
LINUX_MIB_EMBRYONICRSTS, /* EmbryonicRsts */
LINUX_MIB_PRUNECALLED, /* PruneCalled */
LINUX_MIB_RCVPRUNED, /* RcvPruned */
LINUX_MIB_OFOPRUNED, /* OfoPruned */
LINUX_MIB_OUTOFWINDOWICMPS, /* OutOfWindowIcmps */
LINUX_MIB_LOCKDROPPEDICMPS, /* LockDroppedIcmps */
LINUX_MIB_ARPFILTER, /* ArpFilter */
LINUX_MIB_TIMEWAITED, /* TimeWaited */
LINUX_MIB_TIMEWAITRECYCLED, /* TimeWaitRecycled */
LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */
LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
LINUX_MIB_DELAYEDACKS, /* DelayedACKs */
LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */
LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */
LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */
LINUX_MIB_LISTENDROPS, /* ListenDrops */
LINUX_MIB_TCPHPHITS, /* TCPHPHits */
LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */
LINUX_MIB_TCPHPACKS, /* TCPHPAcks */
LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */
LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */
LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */
LINUX_MIB_TCPSACKREORDER, /* TCPSACKReorder */
LINUX_MIB_TCPRENOREORDER, /* TCPRenoReorder */
LINUX_MIB_TCPTSREORDER, /* TCPTSReorder */
LINUX_MIB_TCPFULLUNDO, /* TCPFullUndo */
LINUX_MIB_TCPPARTIALUNDO, /* TCPPartialUndo */
LINUX_MIB_TCPDSACKUNDO, /* TCPDSACKUndo */
LINUX_MIB_TCPLOSSUNDO, /* TCPLossUndo */
LINUX_MIB_TCPLOSTRETRANSMIT, /* TCPLostRetransmit */
LINUX_MIB_TCPRENOFAILURES, /* TCPRenoFailures */
LINUX_MIB_TCPSACKFAILURES, /* TCPSackFailures */
LINUX_MIB_TCPLOSSFAILURES, /* TCPLossFailures */
LINUX_MIB_TCPFASTRETRANS, /* TCPFastRetrans */
LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */
LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */
tcp: Tail loss probe (TLP) This patch series implement the Tail loss probe (TLP) algorithm described in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The first patch implements the basic algorithm. TLP's goal is to reduce tail latency of short transactions. It achieves this by converting retransmission timeouts (RTOs) occuring due to tail losses (losses at end of transactions) into fast recovery. TLP transmits one packet in two round-trips when a connection is in Open state and isn't receiving any ACKs. The transmitted packet, aka loss probe, can be either new or a retransmission. When there is tail loss, the ACK from a loss probe triggers FACK/early-retransmit based fast recovery, thus avoiding a costly RTO. In the absence of loss, there is no change in the connection state. PTO stands for probe timeout. It is a timer event indicating that an ACK is overdue and triggers a loss probe packet. The PTO value is set to max(2*SRTT, 10ms) and is adjusted to account for delayed ACK timer when there is only one oustanding packet. TLP Algorithm On transmission of new data in Open state: -> packets_out > 1: schedule PTO in max(2*SRTT, 10ms). -> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms) -> PTO = min(PTO, RTO) Conditions for scheduling PTO: -> Connection is in Open state. -> Connection is either cwnd limited or no new data to send. -> Number of probes per tail loss episode is limited to one. -> Connection is SACK enabled. When PTO fires: new_segment_exists: -> transmit new segment. -> packets_out++. cwnd remains same. no_new_packet: -> retransmit the last segment. Its ACK triggers FACK or early retransmit based recovery. ACK path: -> rearm RTO at start of ACK processing. -> reschedule PTO if need be. In addition, the patch includes a small variation to the Early Retransmit (ER) algorithm, such that ER and TLP together can in principle recover any N-degree of tail loss through fast recovery. TLP is controlled by the same sysctl as ER, tcp_early_retrans sysctl. tcp_early_retrans==0; disables TLP and ER. ==1; enables RFC5827 ER. ==2; delayed ER. ==3; TLP and delayed ER. [DEFAULT] ==4; TLP only. The TLP patch series have been extensively tested on Google Web servers. It is most effective for short Web trasactions, where it reduced RTOs by 15% and improved HTTP response time (average by 6%, 99th percentile by 10%). The transmitted probes account for <0.5% of the overall transmissions. Signed-off-by: Nandita Dukkipati <nanditad@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-11 17:00:43 +07:00
LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */
LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */
LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */
LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */
LINUX_MIB_TCPRCVCOLLAPSED, /* TCPRcvCollapsed */
LINUX_MIB_TCPDSACKOLDSENT, /* TCPDSACKOldSent */
LINUX_MIB_TCPDSACKOFOSENT, /* TCPDSACKOfoSent */
LINUX_MIB_TCPDSACKRECV, /* TCPDSACKRecv */
LINUX_MIB_TCPDSACKOFORECV, /* TCPDSACKOfoRecv */
LINUX_MIB_TCPABORTONDATA, /* TCPAbortOnData */
LINUX_MIB_TCPABORTONCLOSE, /* TCPAbortOnClose */
LINUX_MIB_TCPABORTONMEMORY, /* TCPAbortOnMemory */
LINUX_MIB_TCPABORTONTIMEOUT, /* TCPAbortOnTimeout */
LINUX_MIB_TCPABORTONLINGER, /* TCPAbortOnLinger */
LINUX_MIB_TCPABORTFAILED, /* TCPAbortFailed */
LINUX_MIB_TCPMEMORYPRESSURES, /* TCPMemoryPressures */
LINUX_MIB_TCPMEMORYPRESSURESCHRONO, /* TCPMemoryPressuresChrono */
LINUX_MIB_TCPSACKDISCARD, /* TCPSACKDiscard */
LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */
LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */
LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */
LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */
LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */
LINUX_MIB_TCPMD5FAILURE, /* TCPMD5Failure */
LINUX_MIB_SACKSHIFTED,
LINUX_MIB_SACKMERGED,
LINUX_MIB_SACKSHIFTFALLBACK,
LINUX_MIB_TCPBACKLOGDROP,
LINUX_MIB_PFMEMALLOCDROP,
LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
LINUX_MIB_TCPDEFERACCEPTDROP,
LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */
LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */
LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */
LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */
LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */
LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */
LINUX_MIB_TCPOFODROP, /* TCPOFODrop */
LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */
LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */
LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */
LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */
LINUX_MIB_TCPFASTOPENACTIVEFAIL, /* TCPFastOpenActiveFail */
LINUX_MIB_TCPFASTOPENPASSIVE, /* TCPFastOpenPassive*/
LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */
LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */
LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */
LINUX_MIB_TCPFASTOPENBLACKHOLE, /* TCPFastOpenBlackholeDetect */
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */
LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */
tcp: auto corking With the introduction of TCP Small Queues, TSO auto sizing, and TCP pacing, we can implement Automatic Corking in the kernel, to help applications doing small write()/sendmsg() to TCP sockets. Idea is to change tcp_push() to check if the current skb payload is under skb optimal size (a multiple of MSS bytes) If under 'size_goal', and at least one packet is still in Qdisc or NIC TX queues, set the TCP Small Queue Throttled bit, so that the push will be delayed up to TX completion time. This delay might allow the application to coalesce more bytes in the skb in following write()/sendmsg()/sendfile() system calls. The exact duration of the delay is depending on the dynamics of the system, and might be zero if no packet for this flow is actually held in Qdisc or NIC TX ring. Using FQ/pacing is a way to increase the probability of autocorking being triggered. Add a new sysctl (/proc/sys/net/ipv4/tcp_autocorking) to control this feature and default it to 1 (enabled) Add a new SNMP counter : nstat -a | grep TcpExtTCPAutoCorking This counter is incremented every time we detected skb was under used and its flush was deferred. Tested: Interesting effects when using line buffered commands under ssh. Excellent performance results in term of cpu usage and total throughput. lpq83:~# echo 1 >/proc/sys/net/ipv4/tcp_autocorking lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128 9410.39 Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128': 35209.439626 task-clock # 2.901 CPUs utilized 2,294 context-switches # 0.065 K/sec 101 CPU-migrations # 0.003 K/sec 4,079 page-faults # 0.116 K/sec 97,923,241,298 cycles # 2.781 GHz [83.31%] 51,832,908,236 stalled-cycles-frontend # 52.93% frontend cycles idle [83.30%] 25,697,986,603 stalled-cycles-backend # 26.24% backend cycles idle [66.70%] 102,225,978,536 instructions # 1.04 insns per cycle # 0.51 stalled cycles per insn [83.38%] 18,657,696,819 branches # 529.906 M/sec [83.29%] 91,679,646 branch-misses # 0.49% of all branches [83.40%] 12.136204899 seconds time elapsed lpq83:~# echo 0 >/proc/sys/net/ipv4/tcp_autocorking lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128 6624.89 Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128': 40045.864494 task-clock # 3.301 CPUs utilized 171 context-switches # 0.004 K/sec 53 CPU-migrations # 0.001 K/sec 4,080 page-faults # 0.102 K/sec 111,340,458,645 cycles # 2.780 GHz [83.34%] 61,778,039,277 stalled-cycles-frontend # 55.49% frontend cycles idle [83.31%] 29,295,522,759 stalled-cycles-backend # 26.31% backend cycles idle [66.67%] 108,654,349,355 instructions # 0.98 insns per cycle # 0.57 stalled cycles per insn [83.34%] 19,552,170,748 branches # 488.244 M/sec [83.34%] 157,875,417 branch-misses # 0.81% of all branches [83.34%] 12.130267788 seconds time elapsed Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-12-06 13:36:05 +07:00
LINUX_MIB_TCPAUTOCORKING, /* TCPAutoCorking */
LINUX_MIB_TCPFROMZEROWINDOWADV, /* TCPFromZeroWindowAdv */
LINUX_MIB_TCPTOZEROWINDOWADV, /* TCPToZeroWindowAdv */
LINUX_MIB_TCPWANTZEROWINDOWADV, /* TCPWantZeroWindowAdv */
LINUX_MIB_TCPSYNRETRANS, /* TCPSynRetrans */
LINUX_MIB_TCPORIGDATASENT, /* TCPOrigDataSent */
LINUX_MIB_TCPHYSTARTTRAINDETECT, /* TCPHystartTrainDetect */
LINUX_MIB_TCPHYSTARTTRAINCWND, /* TCPHystartTrainCwnd */
LINUX_MIB_TCPHYSTARTDELAYDETECT, /* TCPHystartDelayDetect */
LINUX_MIB_TCPHYSTARTDELAYCWND, /* TCPHystartDelayCwnd */
tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks Helpers for mitigating ACK loops by rate-limiting dupacks sent in response to incoming out-of-window packets. This patch includes: - rate-limiting logic - sysctl to control how often we allow dupacks to out-of-window packets - SNMP counter for cases where we rate-limited our dupack sending The rate-limiting logic in this patch decides to not send dupacks in response to out-of-window segments if (a) they are SYNs or pure ACKs and (b) the remote endpoint is sending them faster than the configured rate limit. We rate-limit our responses rather than blocking them entirely or resetting the connection, because legitimate connections can rely on dupacks in response to some out-of-window segments. For example, zero window probes are typically sent with a sequence number that is below the current window, and ZWPs thus expect to thus elicit a dupack in response. We allow dupacks in response to TCP segments with data, because these may be spurious retransmissions for which the remote endpoint wants to receive DSACKs. This is safe because segments with data can't realistically be part of ACK loops, which by their nature consist of each side sending pure/data-less ACKs to each other. The dupack interval is controlled by a new sysctl knob, tcp_invalid_ratelimit, given in milliseconds, in case an administrator needs to dial this upward in the face of a high-rate DoS attack. The name and units are chosen to be analogous to the existing analogous knob for ICMP, icmp_ratelimit. The default value for tcp_invalid_ratelimit is 500ms, which allows at most one such dupack per 500ms. This is chosen to be 2x faster than the 1-second minimum RTO interval allowed by RFC 6298 (section 2, rule 2.4). We allow the extra 2x factor because network delay variations can cause packets sent at 1 second intervals to be compressed and arrive much closer. Reported-by: Avery Fay <avery@mixpanel.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-02-07 04:04:38 +07:00
LINUX_MIB_TCPACKSKIPPEDSYNRECV, /* TCPACKSkippedSynRecv */
LINUX_MIB_TCPACKSKIPPEDPAWS, /* TCPACKSkippedPAWS */
LINUX_MIB_TCPACKSKIPPEDSEQ, /* TCPACKSkippedSeq */
LINUX_MIB_TCPACKSKIPPEDFINWAIT2, /* TCPACKSkippedFinWait2 */
LINUX_MIB_TCPACKSKIPPEDTIMEWAIT, /* TCPACKSkippedTimeWait */
LINUX_MIB_TCPACKSKIPPEDCHALLENGE, /* TCPACKSkippedChallenge */
LINUX_MIB_TCPWINPROBE, /* TCPWinProbe */
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
LINUX_MIB_TCPDELIVERED, /* TCPDelivered */
LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
__LINUX_MIB_MAX
};
/* linux Xfrm mib definitions */
enum
{
LINUX_MIB_XFRMNUM = 0,
LINUX_MIB_XFRMINERROR, /* XfrmInError */
LINUX_MIB_XFRMINBUFFERERROR, /* XfrmInBufferError */
LINUX_MIB_XFRMINHDRERROR, /* XfrmInHdrError */
LINUX_MIB_XFRMINNOSTATES, /* XfrmInNoStates */
LINUX_MIB_XFRMINSTATEPROTOERROR, /* XfrmInStateProtoError */
LINUX_MIB_XFRMINSTATEMODEERROR, /* XfrmInStateModeError */
LINUX_MIB_XFRMINSTATESEQERROR, /* XfrmInStateSeqError */
LINUX_MIB_XFRMINSTATEEXPIRED, /* XfrmInStateExpired */
LINUX_MIB_XFRMINSTATEMISMATCH, /* XfrmInStateMismatch */
LINUX_MIB_XFRMINSTATEINVALID, /* XfrmInStateInvalid */
LINUX_MIB_XFRMINTMPLMISMATCH, /* XfrmInTmplMismatch */
LINUX_MIB_XFRMINNOPOLS, /* XfrmInNoPols */
LINUX_MIB_XFRMINPOLBLOCK, /* XfrmInPolBlock */
LINUX_MIB_XFRMINPOLERROR, /* XfrmInPolError */
LINUX_MIB_XFRMOUTERROR, /* XfrmOutError */
LINUX_MIB_XFRMOUTBUNDLEGENERROR, /* XfrmOutBundleGenError */
LINUX_MIB_XFRMOUTBUNDLECHECKERROR, /* XfrmOutBundleCheckError */
LINUX_MIB_XFRMOUTNOSTATES, /* XfrmOutNoStates */
LINUX_MIB_XFRMOUTSTATEPROTOERROR, /* XfrmOutStateProtoError */
LINUX_MIB_XFRMOUTSTATEMODEERROR, /* XfrmOutStateModeError */
LINUX_MIB_XFRMOUTSTATESEQERROR, /* XfrmOutStateSeqError */
LINUX_MIB_XFRMOUTSTATEEXPIRED, /* XfrmOutStateExpired */
LINUX_MIB_XFRMOUTPOLBLOCK, /* XfrmOutPolBlock */
LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */
LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */
LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/
LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */
LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */
__LINUX_MIB_XFRMMAX
};
#endif /* _LINUX_SNMP_H */