linux_dsm_epyc7002/net/mptcp/syncookies.c
Jianguo Wu 0fa11e1a20 mptcp: fix warning in __skb_flow_dissect() when do syn cookie for subflow join
[ Upstream commit 0c71929b5893e410e0efbe1bbeca6f19a5f19956 ]

I did stress test with wrk[1] and webfsd[2] with the assistance of
mptcp-tools[3]:

  Server side:
      ./use_mptcp.sh webfsd -4 -R /tmp/ -p 8099
  Client side:
      ./use_mptcp.sh wrk -c 200 -d 30 -t 4 http://192.168.174.129:8099/

and got the following warning message:

[   55.552626] TCP: request_sock_subflow: Possible SYN flooding on port 8099. Sending cookies.  Check SNMP counters.
[   55.553024] ------------[ cut here ]------------
[   55.553027] WARNING: CPU: 0 PID: 10 at net/core/flow_dissector.c:984 __skb_flow_dissect+0x280/0x1650
...
[   55.553117] CPU: 0 PID: 10 Comm: ksoftirqd/0 Not tainted 5.12.0+ #18
[   55.553121] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
[   55.553124] RIP: 0010:__skb_flow_dissect+0x280/0x1650
...
[   55.553133] RSP: 0018:ffffb79580087770 EFLAGS: 00010246
[   55.553137] RAX: 0000000000000000 RBX: ffffffff8ddb58e0 RCX: ffffb79580087888
[   55.553139] RDX: ffffffff8ddb58e0 RSI: ffff8f7e4652b600 RDI: 0000000000000000
[   55.553141] RBP: ffffb79580087858 R08: 0000000000000000 R09: 0000000000000008
[   55.553143] R10: 000000008c622965 R11: 00000000d3313a5b R12: ffff8f7e4652b600
[   55.553146] R13: ffff8f7e465c9062 R14: 0000000000000000 R15: ffffb79580087888
[   55.553149] FS:  0000000000000000(0000) GS:ffff8f7f75e00000(0000) knlGS:0000000000000000
[   55.553152] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.553154] CR2: 00007f73d1d19000 CR3: 0000000135e10004 CR4: 00000000003706f0
[   55.553160] Call Trace:
[   55.553166]  ? __sha256_final+0x67/0xd0
[   55.553173]  ? sha256+0x7e/0xa0
[   55.553177]  __skb_get_hash+0x57/0x210
[   55.553182]  subflow_init_req_cookie_join_save+0xac/0xc0
[   55.553189]  subflow_check_req+0x474/0x550
[   55.553195]  ? ip_route_output_key_hash+0x67/0x90
[   55.553200]  ? xfrm_lookup_route+0x1d/0xa0
[   55.553207]  subflow_v4_route_req+0x8e/0xd0
[   55.553212]  tcp_conn_request+0x31e/0xab0
[   55.553218]  ? selinux_socket_sock_rcv_skb+0x116/0x210
[   55.553224]  ? tcp_rcv_state_process+0x179/0x6d0
[   55.553229]  tcp_rcv_state_process+0x179/0x6d0
[   55.553235]  tcp_v4_do_rcv+0xaf/0x220
[   55.553239]  tcp_v4_rcv+0xce4/0xd80
[   55.553243]  ? ip_route_input_rcu+0x246/0x260
[   55.553248]  ip_protocol_deliver_rcu+0x35/0x1b0
[   55.553253]  ip_local_deliver_finish+0x44/0x50
[   55.553258]  ip_local_deliver+0x6c/0x110
[   55.553262]  ? ip_rcv_finish_core.isra.19+0x5a/0x400
[   55.553267]  ip_rcv+0xd1/0xe0
...

After debugging, I found in __skb_flow_dissect(), skb->dev and skb->sk
are both NULL, then net is NULL, and trigger WARN_ON_ONCE(!net),
actually net is always NULL in this code path, as skb->dev is set to
NULL in tcp_v4_rcv(), and skb->sk is never set.

Code snippet in __skb_flow_dissect() that trigger warning:
  975         if (skb) {
  976                 if (!net) {
  977                         if (skb->dev)
  978                                 net = dev_net(skb->dev);
  979                         else if (skb->sk)
  980                                 net = sock_net(skb->sk);
  981                 }
  982         }
  983
  984         WARN_ON_ONCE(!net);

So, using seq and transport header derived hash.

[1] https://github.com/wg/wrk
[2] https://github.com/ourway/webfsd
[3] https://github.com/pabeni/mptcp-tools

Fixes: 9466a1cceb ("mptcp: enable JOIN requests even if cookies are in use")
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2021-07-28 14:35:34 +02:00

145 lines
4.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include "protocol.h"
/* Syncookies do not work for JOIN requests.
*
* Unlike MP_CAPABLE, where the ACK cookie contains the needed MPTCP
* options to reconstruct the initial syn state, MP_JOIN does not contain
* the token to obtain the mptcp socket nor the server-generated nonce
* that was used in the cookie SYN/ACK response.
*
* Keep a small best effort state table to store the syn/synack data,
* indexed by skb hash.
*
* A MP_JOIN SYN packet handled by syn cookies is only stored if the 32bit
* token matches a known mptcp connection that can still accept more subflows.
*
* There is no timeout handling -- state is only re-constructed
* when the TCP ACK passed the cookie validation check.
*/
struct join_entry {
u32 token;
u32 remote_nonce;
u32 local_nonce;
u8 join_id;
u8 local_id;
u8 backup;
u8 valid;
};
#define COOKIE_JOIN_SLOTS 1024
static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp;
static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
{
static u32 mptcp_join_hash_secret __read_mostly;
struct tcphdr *th = tcp_hdr(skb);
u32 seq, i;
net_get_random_once(&mptcp_join_hash_secret,
sizeof(mptcp_join_hash_secret));
if (th->syn)
seq = TCP_SKB_CB(skb)->seq;
else
seq = TCP_SKB_CB(skb)->seq - 1;
i = jhash_3words(seq, net_hash_mix(net),
(__force __u32)th->source << 16 | (__force __u32)th->dest,
mptcp_join_hash_secret);
return i % ARRAY_SIZE(join_entries);
}
static void mptcp_join_store_state(struct join_entry *entry,
const struct mptcp_subflow_request_sock *subflow_req)
{
entry->token = subflow_req->token;
entry->remote_nonce = subflow_req->remote_nonce;
entry->local_nonce = subflow_req->local_nonce;
entry->backup = subflow_req->backup;
entry->join_id = subflow_req->remote_id;
entry->local_id = subflow_req->local_id;
entry->valid = 1;
}
void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req,
struct sk_buff *skb)
{
struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
u32 i = mptcp_join_entry_hash(skb, net);
/* No use in waiting if other cpu is already using this slot --
* would overwrite the data that got stored.
*/
spin_lock_bh(&join_entry_locks[i]);
mptcp_join_store_state(&join_entries[i], subflow_req);
spin_unlock_bh(&join_entry_locks[i]);
}
/* Called for a cookie-ack with MP_JOIN option present.
* Look up the saved state based on skb hash & check token matches msk
* in same netns.
*
* Caller will check msk can still accept another subflow. The hmac
* present in the cookie ACK mptcp option space will be checked later.
*/
bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req,
struct sk_buff *skb)
{
struct net *net = read_pnet(&subflow_req->sk.req.ireq_net);
u32 i = mptcp_join_entry_hash(skb, net);
struct mptcp_sock *msk;
struct join_entry *e;
e = &join_entries[i];
spin_lock_bh(&join_entry_locks[i]);
if (e->valid == 0) {
spin_unlock_bh(&join_entry_locks[i]);
return false;
}
e->valid = 0;
msk = mptcp_token_get_sock(e->token);
if (!msk) {
spin_unlock_bh(&join_entry_locks[i]);
return false;
}
/* If this fails, the token got re-used in the mean time by another
* mptcp socket in a different netns, i.e. entry is outdated.
*/
if (!net_eq(sock_net((struct sock *)msk), net))
goto err_put;
subflow_req->remote_nonce = e->remote_nonce;
subflow_req->local_nonce = e->local_nonce;
subflow_req->backup = e->backup;
subflow_req->remote_id = e->join_id;
subflow_req->token = e->token;
subflow_req->msk = msk;
spin_unlock_bh(&join_entry_locks[i]);
return true;
err_put:
spin_unlock_bh(&join_entry_locks[i]);
sock_put((struct sock *)msk);
return false;
}
void __init mptcp_join_cookie_init(void)
{
int i;
for (i = 0; i < COOKIE_JOIN_SLOTS; i++)
spin_lock_init(&join_entry_locks[i]);
}