Merge branch 'tcp-take-care-of-empty-skbs-in-write-queue'

Eric Dumazet says:
====================
tcp: take care of empty skbs in write queue

We understood recently that TCP sockets could have an empty
skb at the tail of the write queue, leading to various problems.

This patch series :

1) Make sure we do not send an empty packet since this
   was unintended and causing crashes in old kernels.

2) Change tcp_write_queue_empty() to not be fooled by
   the presence of an empty skb.

3) Fix a bug that could trigger suboptimal epoll()
   application behavior under memory pressure.
====================

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
This commit is contained in:
Jakub Kicinski 2019-12-14 09:57:36 -08:00
commit cd1263b6dc
3 changed files with 23 additions and 7 deletions

View File

@ -1766,9 +1766,18 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
/**
* tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
* @sk: socket
*
* Since the write queue can have a temporary empty skb in it,
* we must not use "return skb_queue_empty(&sk->sk_write_queue)"
*/
static inline bool tcp_write_queue_empty(const struct sock *sk)
{
return skb_queue_empty(&sk->sk_write_queue);
const struct tcp_sock *tp = tcp_sk(sk);
return tp->write_seq == tp->snd_nxt;
}
static inline bool tcp_rtx_queue_empty(const struct sock *sk)

View File

@ -1087,8 +1087,7 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
goto out;
out_err:
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
err == -EAGAIN)) {
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
@ -1419,8 +1418,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
sock_zerocopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
err == -EAGAIN)) {
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}

View File

@ -2438,6 +2438,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (tcp_small_queue_check(sk, skb, 0))
break;
/* Argh, we hit an empty skb(), presumably a thread
* is sleeping in sendmsg()/sk_stream_wait_memory().
* We do not want to send a pure-ack packet and have
* a strange looking rtx queue with empty packet(s).
*/
if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
break;
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@ -3121,7 +3129,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
*/
void tcp_send_fin(struct sock *sk)
{
struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
struct sk_buff *skb, *tskb, *tail = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* Optimization, tack on the FIN if we have one skb in write queue and
@ -3129,6 +3137,7 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
tskb = tail;
if (!tskb && tcp_under_memory_pressure(sk))
tskb = skb_rb_last(&sk->tcp_rtx_queue);
@ -3136,7 +3145,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
if (tcp_write_queue_empty(sk)) {
if (!tail) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have