Merge branch 'tcp-sack-compression-changes'

Eric Dumazet says:

====================
tcp: sack compression changes

Patch series refines SACK compression.

We had issues with missing SACK when TCP option space is tight.

Uses hrtimer slack to improve performance.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-04-30 13:24:01 -07:00
commit 1b2e788490
8 changed files with 68 additions and 15 deletions

View File

@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER
Default : 1,000,000 ns (1 ms)
tcp_comp_sack_slack_ns - LONG INTEGER
This sysctl control the slack used when arming the
timer used by SACK compression. This gives extra time
for small RTT flows, and reduces system overhead by allowing
opportunistic reduction of timer interrupts.
Default : 100,000 ns (100 us)
tcp_comp_sack_nr - INTEGER
Max number of SACK that can be compressed.
Using 0 disables SACK compression.

View File

@ -268,6 +268,7 @@ struct tcp_sock {
} rack;
u16 advmss; /* Advertised MSS */
u8 compressed_ack;
u8 dup_ack_counter;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */

View File

@ -173,6 +173,7 @@ struct netns_ipv4 {
int sysctl_tcp_rmem[3];
int sysctl_tcp_comp_sack_nr;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;

View File

@ -1329,6 +1329,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_comp_sack_slack_ns",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "tcp_comp_sack_nr",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_nr,

View File

@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
static void tcp_sack_compress_send_ack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->compressed_ack)
return;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
/* Since we have to send one ack finally,
* substract one from tp->compressed_ack to keep
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - 1);
tp->compressed_ack = 0;
tcp_send_ack(sk);
}
/* Reasonable amount of sack blocks included in TCP SACK option
* The max is 4, but this becomes 3 if TCP timestamps are there.
* Given that SACK packets might be lost, be conservative and use 2.
*/
#define TCP_SACK_BLOCKS_EXPECTED 2
static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
if (tcp_sack_extend(sp, seq, end_seq)) {
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
tcp_sack_compress_send_ack(sk);
/* Rotate this_sack to the first one. */
for (; this_sack > 0; this_sack--, sp--)
swap(*sp, *(sp - 1));
@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
}
}
if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
tcp_sack_compress_send_ack(sk);
/* Could not find an adjacent existing SACK, build a new one,
* put it at the front, and shift everyone else down. We
* always know there is at least one SACK present already here.
@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@ -5275,15 +5305,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = 0;
tp->dup_ack_counter = 0;
}
if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
tp->dup_ack_counter++;
goto send_now;
}
tp->compressed_ack++;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@ -5296,8 +5324,9 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
HRTIMER_MODE_REL_PINNED_SOFT);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
HRTIMER_MODE_REL_PINNED_SOFT);
}
static inline void tcp_ack_snd_check(struct sock *sk)

View File

@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);

View File

@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
if (unlikely(tp->compressed_ack)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
tp->compressed_ack - TCP_FASTRETRANS_THRESH);
tp->compressed_ack = TCP_FASTRETRANS_THRESH;
tp->compressed_ack);
tp->compressed_ack = 0;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}

View File

@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
if (tp->compressed_ack) {
/* Since we have to send one ack finally,
* substract one from tp->compressed_ack to keep
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
tp->compressed_ack--;
tcp_send_ack(sk);
}
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
&sk->sk_tsq_flags))