tcp: remove early retransmit

This patch removes the support of RFC5827 early retransmit (i.e.,
fast recovery on small inflight with <3 dupacks) because it is
subsumed by the new RACK loss detection. More specifically when
RACK receives DUPACKs, it'll arm a reordering timer to start fast
recovery after a quarter of (min)RTT, hence it covers the early
retransmit except RACK does not limit itself to specific inflight
or dupack numbers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Yuchung Cheng 2017-01-12 22:11:39 -08:00 committed by David S. Miller
parent 840a3cbe89
commit bec41a11dd
12 changed files with 12 additions and 111 deletions

View File

@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN
Allows TCP to send "duplicate" SACKs. Allows TCP to send "duplicate" SACKs.
tcp_early_retrans - INTEGER tcp_early_retrans - INTEGER
Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold Tail loss probe (TLP) converts RTOs occurring due to tail
for triggering fast retransmit when the amount of outstanding data is losses into fast recovery (draft-ietf-tcpm-rack). Note that
small and when no previously unsent data can be transmitted (such TLP requires RACK to function properly (see tcp_recovery below)
that limited transmit could be used). Also controls the use of
Tail loss probe (TLP) that converts RTOs occurring due to tail
losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
Possible values: Possible values:
0 disables ER 0 disables TLP
1 enables ER 3 or 4 enables TLP
2 enables ER but delays fast recovery and fast retransmit
by a fourth of RTT. This mitigates connection falsely
recovers when network has a small degree of reordering
(less than 3 packets).
3 enables delayed ER and TLP.
4 enables TLP only.
Default: 3 Default: 3
tcp_ecn - INTEGER tcp_ecn - INTEGER

View File

@ -224,8 +224,7 @@ struct tcp_sock {
repair : 1, repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue; u8 repair_queue;
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ u8 syn_data:1, /* SYN includes data */
syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */

View File

@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
const struct sk_buff *next_skb); const struct sk_buff *next_skb);
/* tcp_input.c */ /* tcp_input.c */
void tcp_resume_early_retransmit(struct sock *sk);
void tcp_rearm_rto(struct sock *sk); void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk); void tcp_reset(struct sock *sk);
@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
} }
/* TCP early-retransmit (ER) is similar to but more conservative than
* the thin-dupack feature. Enable ER only if thin-dupack is disabled.
*/
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
tp->do_early_retrans = sysctl_tcp_early_retrans &&
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
!(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
net->ipv4.sysctl_tcp_reordering == 3;
}
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
{
tp->do_early_retrans = 0;
}
static inline unsigned int tcp_left_out(const struct tcp_sock *tp) static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
{ {
return tp->sacked_out + tp->lost_out; return tp->sacked_out + tp->lost_out;

View File

@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
} }
if (icsk->icsk_pending == ICSK_TIME_RETRANS || if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1; r->idiag_timer = 1;

View File

@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk)
tp->mss_cache = TCP_MSS_DEFAULT; tp->mss_cache = TCP_MSS_DEFAULT;
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
tcp_assign_congestion_control(sk); tcp_assign_congestion_control(sk);
tp->tsoffset = 0; tp->tsoffset = 0;
@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EINVAL; err = -EINVAL;
else { else {
tp->thin_dupack = val; tp->thin_dupack = val;
if (tp->thin_dupack)
tcp_disable_early_retrans(tp);
} }
break; break;

View File

@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
tcp_disable_fack(tp); tcp_disable_fack(tp);
} }
if (metric > 0)
tcp_disable_early_retrans(tp);
tp->rack.reord = 1; tp->rack.reord = 1;
} }
@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
} }
static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned long delay;
/* Delay early retransmit and entering fast recovery for
* max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
* available, or RTO is scheduled to fire first.
*/
if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
(flag & FLAG_ECE) || !tp->srtt_us)
return false;
delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
msecs_to_jiffies(2));
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
return false;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
TCP_RTO_MAX);
return true;
}
/* Linux NewReno/SACK/FACK/ECN state machine. /* Linux NewReno/SACK/FACK/ECN state machine.
* -------------------------------------- * --------------------------------------
* *
@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
tcp_is_sack(tp) && !tcp_send_head(sk)) tcp_is_sack(tp) && !tcp_send_head(sk))
return true; return true;
/* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
* retransmissions due to small network reorderings, we implement
* Mitigation A.3 in the RFC and delay the retransmission for a short
* interval if appropriate.
*/
if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
(tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
!tcp_may_send_now(sk))
return !tcp_pause_early_retransmit(sk, flag);
return false; return false;
} }
@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk)
} else { } else {
u32 rto = inet_csk(sk)->icsk_rto; u32 rto = inet_csk(sk)->icsk_rto;
/* Offset the time elapsed after installing regular RTO */ /* Offset the time elapsed after installing regular RTO */
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk); struct sk_buff *skb = tcp_write_queue_head(sk);
const u32 rto_time_stamp = const u32 rto_time_stamp =
@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk)
} }
} }
/* This function is called when the delayed ER timer fires. TCP enters
* fast recovery and performs fast-retransmit.
*/
void tcp_resume_early_retransmit(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tcp_rearm_rto(sk);
/* Stop if ER is disabled after the delayed ER timer is scheduled */
if (!tp->do_early_retrans)
return;
tcp_enter_recovery(sk, false);
tcp_update_scoreboard(sk, 1);
tcp_xmit_retransmit_queue(sk);
}
/* If we get here, the whole TSO packet has not been acked. */ /* If we get here, the whole TSO packet has not been acked. */
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{ {
@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
skb_mstamp_get(&sack_state.ack_time); skb_mstamp_get(&sack_state.ack_time);
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk); tcp_rearm_rto(sk);
if (after(ack, prior_snd_una)) { if (after(ack, prior_snd_una)) {

View File

@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
int state; int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS || if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1; timer_active = 1;

View File

@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk)
val = tcp_metric_get(tm, TCP_METRIC_REORDERING); val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val) { if (val && tp->reordering != val) {
tcp_disable_fack(tp); tcp_disable_fack(tp);
tcp_disable_early_retrans(tp);
tp->reordering = val; tp->reordering = val;
} }

View File

@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->sacked_out = 0; newtp->sacked_out = 0;
newtp->fackets_out = 0; newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_enable_early_retrans(newtp);
newtp->tlp_high_seq = 0; newtp->tlp_high_seq = 0;
newtp->lsndtime = treq->snt_synack.stamp_jiffies; newtp->lsndtime = treq->snt_synack.stamp_jiffies;
newsk->sk_txhash = treq->txhash; newsk->sk_txhash = treq->txhash;

View File

@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
tp->packets_out += tcp_skb_pcount(skb); tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
tcp_rearm_rto(sk); tcp_rearm_rto(sk);
}
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb)); tcp_skb_pcount(skb));
@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
u32 timeout, tlp_time_stamp, rto_time_stamp; u32 timeout, tlp_time_stamp, rto_time_stamp;
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
return false;
/* No consecutive loss probes. */ /* No consecutive loss probes. */
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
tcp_rearm_rto(sk); tcp_rearm_rto(sk);
@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections /* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application. * in Open state, that are either limited by cwnd or application.
*/ */
if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) !tp->packets_out || !tcp_is_sack(tp) ||
icsk->icsk_ca_state != TCP_CA_Open)
return false; return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&

View File

@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk)
case ICSK_TIME_REO_TIMEOUT: case ICSK_TIME_REO_TIMEOUT:
tcp_rack_reo_timeout(sk); tcp_rack_reo_timeout(sk);
break; break;
case ICSK_TIME_EARLY_RETRANS:
tcp_resume_early_retransmit(sk);
break;
case ICSK_TIME_LOSS_PROBE: case ICSK_TIME_LOSS_PROBE:
tcp_send_loss_probe(sk); tcp_send_loss_probe(sk);
break; break;

View File

@ -1745,7 +1745,6 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
srcp = ntohs(inet->inet_sport); srcp = ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS || if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1; timer_active = 1;