From 4b7ddc58e61ada82be1f47f82d5139f0ab2ba478 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 Aug 2020 18:07:27 -0700 Subject: [PATCH 1/8] netfilter: delete repeated words Drop duplicated words in net/netfilter/ and net/ipv4/netfilter/. Signed-off-by: Randy Dunlap Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_pptp.c | 2 +- net/netfilter/nf_conntrack_pptp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/xt_recent.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 7afde8828b4c..3f248a19faa3 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -3,7 +3,7 @@ * nf_nat_pptp.c * * NAT support for PPTP (Point to Point Tunneling Protocol). - * PPTP is a a protocol for creating virtual private networks. + * PPTP is a protocol for creating virtual private networks. * It is a specification defined by Microsoft and some vendors * working with Microsoft. PPTP is built on top of a modified * version of the Internet Generic Routing Encapsulation Protocol. diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 1f44d523b512..5105d4250012 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Connection tracking support for PPTP (Point to Point Tunneling Protocol). - * PPTP is a a protocol for creating virtual private networks. + * PPTP is a protocol for creating virtual private networks. * It is a specification defined by Microsoft and some vendors * working with Microsoft. PPTP is built on top of a modified * version of the Internet Generic Routing Encapsulation Protocol. diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6892e497781c..e8c86ee4c1c4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1152,7 +1152,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { - /* Set ASSURED if we see see valid ack in ESTABLISHED + /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 19bef176145e..606411869698 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -640,7 +640,7 @@ static void __net_exit recent_proc_net_exit(struct net *net) struct recent_table *t; /* recent_net_exit() is called before recent_mt_destroy(). Make sure - * that the parent xt_recent proc entry is is empty before trying to + * that the parent xt_recent proc entry is empty before trying to * remove it. */ spin_lock_bh(&recent_lock); From ee921183557af39c1a0475f982d43b0fcac25e2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Aug 2020 13:55:36 +0200 Subject: [PATCH 2/8] netfilter: nfnetlink: nfnetlink_unicast() reports EAGAIN instead of ENOBUFS Frontend callback reports EAGAIN to nfnetlink to retry a command, this is used to signal that module autoloading is required. Unfortunately, nlmsg_unicast() reports EAGAIN in case the receiver socket buffer gets full, so it enters a busy-loop. This patch updates nfnetlink_unicast() to turn EAGAIN into ENOBUFS and to use nlmsg_unicast(). Remove the flags field in nfnetlink_unicast() since this is always MSG_DONTWAIT in the existing code which is exactly what nlmsg_unicast() passes to netlink_unicast() as parameter. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 3 +- net/netfilter/nf_tables_api.c | 61 ++++++++++++++--------------- net/netfilter/nfnetlink.c | 11 ++++-- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 2 +- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 851425c3178f..89016d08f6a2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,8 +43,7 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, - int flags); +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid); static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 71e501c5ad21..b7dc1cbf40ea 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -815,11 +815,11 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, family, table); if (err < 0) - goto err; + goto err_fill_table_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_table_info: kfree_skb(skb2); return err; } @@ -1563,11 +1563,11 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, family, table, chain); if (err < 0) - goto err; + goto err_fill_chain_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_chain_info: kfree_skb(skb2); return err; } @@ -3008,11 +3008,11 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, NULL); if (err < 0) - goto err; + goto err_fill_rule_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_rule_info: kfree_skb(skb2); return err; } @@ -3968,11 +3968,11 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); if (err < 0) - goto err; + goto err_fill_set_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); -err: +err_fill_set_info: kfree_skb(skb2); return err; } @@ -4860,24 +4860,18 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -ENOMEM; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb == NULL) - goto err1; + return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, NFT_MSG_NEWSETELEM, 0, set, &elem); if (err < 0) - goto err2; + goto err_fill_setelem; - err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT); - /* This avoids a loop in nfnetlink. */ - if (err < 0) - goto err1; + return nfnetlink_unicast(skb, ctx->net, ctx->portid); - return 0; -err2: +err_fill_setelem: kfree_skb(skb); -err1: - /* this avoids a loop in nfnetlink. */ - return err == -EAGAIN ? -ENOBUFS : err; + return err; } /* called with rcu_read_lock held */ @@ -6182,10 +6176,11 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); if (err < 0) - goto err; + goto err_fill_obj_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_obj_info: kfree_skb(skb2); return err; } @@ -7045,10 +7040,11 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, NFT_MSG_NEWFLOWTABLE, 0, family, flowtable, &flowtable->hook_list); if (err < 0) - goto err; + goto err_fill_flowtable_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_flowtable_info: kfree_skb(skb2); return err; } @@ -7234,10 +7230,11 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk, err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq); if (err < 0) - goto err; + goto err_fill_gen_info; - return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); -err: + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + +err_fill_gen_info: kfree_skb(skb2); return err; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 5f24edf95830..3a2e64e13b22 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -149,10 +149,15 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, - int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid) { - return netlink_unicast(net->nfnl, skb, portid, flags); + int err; + + err = nlmsg_unicast(net->nfnl, skb, portid); + if (err == -EAGAIN) + err = -ENOBUFS; + + return err; } EXPORT_SYMBOL_GPL(nfnetlink_unicast); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f02992419850..b35e8d9a5b37 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -356,8 +356,7 @@ __nfulnl_send(struct nfulnl_instance *inst) goto out; } } - nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid); out: inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index dadfc06245a3..d1d8bca03b4f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -681,7 +681,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, net, queue->peer_portid); if (err < 0) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; From da2f849e89ed621f3e0688ec5ba92725ed9f0f92 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 23 Aug 2020 20:15:37 +0200 Subject: [PATCH 3/8] selftests: netfilter: fix header example nft_flowtable.sh is made for bash not sh. Also give values which not return "RTNETLINK answers: Invalid argument" Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_flowtable.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index a47d1d832210..28e32fddf9b2 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -11,7 +11,7 @@ # result in fragmentation and/or PMTU discovery. # # You can check with different Orgininator/Link/Responder MTU eg: -# sh nft_flowtable.sh -o1000 -l500 -r100 +# nft_flowtable.sh -o8000 -l1500 -r2000 # From a7bf670ebe192120cbe0a0ab6448baad6fbf7983 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 23 Aug 2020 20:15:59 +0200 Subject: [PATCH 4/8] selftests: netfilter: exit on invalid parameters exit script with comments when parameters are wrong during address addition. No need for a message when trying to change MTU with lower values: output is self-explanatory. Use short testing sequence to avoid shellcheck warnings (suggested by Stefano Brivio). Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- .../testing/selftests/netfilter/nft_flowtable.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 28e32fddf9b2..dc05c9940597 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -96,10 +96,16 @@ do esac done -ip -net nsr1 link set veth0 mtu $omtu +if ! ip -net nsr1 link set veth0 mtu $omtu; then + exit 1 +fi + ip -net ns1 link set eth0 mtu $omtu -ip -net nsr2 link set veth1 mtu $rmtu +if ! ip -net nsr2 link set veth1 mtu $rmtu; then + exit 1 +fi + ip -net ns2 link set eth0 mtu $rmtu # transfer-net between nsr1 and nsr2. @@ -120,7 +126,10 @@ for i in 1 2; do ip -net ns$i route add default via 10.0.$i.1 ip -net ns$i addr add dead:$i::99/64 dev eth0 ip -net ns$i route add default via dead:$i::1 - ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null + if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi # don't set ip DF bit for first two tests ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null From d721b68654d0fcf8930fd0d2edfff78df82fb8c4 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 23 Aug 2020 20:17:07 +0200 Subject: [PATCH 5/8] selftests: netfilter: remove unused variable in make_file() 'who' variable was not used in make_file() Problem found using Shellcheck Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_flowtable.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index dc05c9940597..1058952d1b36 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -212,7 +212,6 @@ ns2out=$(mktemp) make_file() { name=$1 - who=$2 SIZE=$((RANDOM % (1024 * 8))) TSIZE=$((SIZE * 1024)) @@ -304,8 +303,8 @@ test_tcp_forwarding_nat() return $lret } -make_file "$ns1in" "ns1" -make_file "$ns2in" "ns2" +make_file "$ns1in" +make_file "$ns2in" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. From 2f4bba4ef77cf01ae805554e8f1d98e57b28f25f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 23 Aug 2020 20:17:39 +0200 Subject: [PATCH 6/8] selftests: netfilter: simplify command testing Fix some shellcheck SC2181 warnings: "Check exit code directly with e.g. 'if mycmd;', not indirectly with $?." as suggested by Stefano Brivio. Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_flowtable.sh | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 1058952d1b36..44a879826236 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -27,8 +27,7 @@ ns2out="" log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) checktool (){ - $1 > /dev/null 2>&1 - if [ $? -ne 0 ];then + if ! $1 > /dev/null 2>&1; then echo "SKIP: Could not $2" exit $ksft_skip fi @@ -187,15 +186,13 @@ if [ $? -ne 0 ]; then fi # test basic connectivity -ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 bash exit 1 fi -ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then echo "ERROR: ns2 cannot reach ns1" 1>&2 exit 1 fi @@ -230,8 +227,7 @@ check_transfer() out=$2 what=$3 - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then + if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 ls -l "$in" ls -l "$out" @@ -268,13 +264,11 @@ test_tcp_forwarding_ip() wait - check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then lret=1 fi - check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then lret=1 fi @@ -308,8 +302,7 @@ make_file "$ns2in" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -340,9 +333,7 @@ table ip nat { } EOF -test_tcp_forwarding_nat ns1 ns2 - -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT" else echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 @@ -354,8 +345,7 @@ fi # Same as second test, but with PMTU discovery enabled. handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) -ip netns exec nsr1 nft delete rule inet filter forward $handle -if [ $? -ne 0 ] ;then +if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then echo "FAIL: Could not delete large-packet accept rule" exit 1 fi @@ -363,8 +353,7 @@ fi ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -test_tcp_forwarding_nat ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" else echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 @@ -410,8 +399,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1 ip -net ns2 route add default via 10.0.2.1 ip -net ns2 route add default via dead:2::1 -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" From 67afbda69645a89adb365f1dfa35181e09cd7e80 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 23 Aug 2020 20:18:06 +0200 Subject: [PATCH 7/8] selftests: netfilter: add command usage Avoid bad command arguments. Based on tools/power/cpupower/bench/cpufreq-bench_plot.sh Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_flowtable.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 44a879826236..431296c0f91c 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -86,12 +86,23 @@ omtu=9000 lmtu=1500 rmtu=2000 +usage(){ + echo "nft_flowtable.sh [OPTIONS]" + echo + echo "MTU options" + echo " -o originator" + echo " -l link" + echo " -r responder" + exit 1 +} + while getopts "o:l:r:" o do case $o in o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; + *) usage;; esac done From c46172147ebbeb70094db48d76ab7945d96c638b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:07:18 +0200 Subject: [PATCH 8/8] netfilter: conntrack: do not auto-delete clash entries on reply Its possible that we have more than one packet with the same ct tuple simultaneously, e.g. when an application emits n packets on same UDP socket from multiple threads. NAT rules might be applied to those packets. With the right set of rules, n packets will be mapped to m destinations, where at least two packets end up with the same destination. When this happens, the existing clash resolution may merge the skb that is processed after the first has been received with the identical tuple already in hash table. However, its possible that this identical tuple is a NAT_CLASH tuple. In that case the second skb will be sent, but no reply can be received since the reply that is processed first removes the NAT_CLASH tuple. Do not auto-delete, this gives a 1 second window for replies to be passed back to originator. Packets that are coming later (udp stream case) will not be affected: they match the original ct entry, not a NAT_CLASH one. Also prevent NAT_CLASH entries from getting offloaded. Fixes: 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_udp.c | 26 ++++++++++---------------- net/netfilter/nft_flow_offload.c | 2 +- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 760ca2422816..af402f458ee0 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -81,18 +81,6 @@ static bool udp_error(struct sk_buff *skb, return false; } -static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct, - struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - u32 extra_jiffies) -{ - if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY && - ct->status & IPS_NAT_CLASH)) - nf_ct_kill(ct); - else - nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies); -} - /* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -124,12 +112,15 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, extra); + /* never set ASSURED for IPS_NAT_CLASH, they time out soon */ + if (unlikely((ct->status & IPS_NAT_CLASH))) + return NF_ACCEPT; + /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, - timeouts[UDP_CT_UNREPLIED]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } @@ -206,12 +197,15 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct, if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_REPLIED]); + + if (unlikely((ct->status & IPS_NAT_CLASH))) + return NF_ACCEPT; + /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, - timeouts[UDP_CT_UNREPLIED]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 3b9b97aa4b32..3a6c84fb2c90 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -102,7 +102,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, } if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || - ct->status & IPS_SEQ_ADJUST) + ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH)) goto out; if (!nf_ct_is_confirmed(ct))