From 8ea204c2b658eaef55b4716fde469fb66c589a3d Mon Sep 17 00:00:00 2001 From: Ferenc Fejes Date: Sat, 30 May 2020 23:09:00 +0200 Subject: [PATCH 1/3] net: Make locking in sock_bindtoindex optional The sock_bindtoindex intended for kernel wide usage however it will lock the socket regardless of the context. This modification relax this behavior optionally: locking the socket will be optional by calling the sock_bindtoindex with lock_sk = true. The modification applied to all users of the sock_bindtoindex. Signed-off-by: Ferenc Fejes Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/bee6355da40d9e991b2f2d12b67d55ebb5f5b207.1590871065.git.fejes@inf.elte.hu --- include/net/sock.h | 2 +- net/core/sock.c | 10 ++++++---- net/ipv4/udp_tunnel.c | 2 +- net/ipv6/ip6_udp_tunnel.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6e9f713a7860..c53cc42b5ab9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2690,7 +2690,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) void sock_def_readable(struct sock *sk); -int sock_bindtoindex(struct sock *sk, int ifindex); +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); void sock_enable_timestamps(struct sock *sk); void sock_no_linger(struct sock *sk); void sock_set_keepalive(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 61ec573221a6..6c4acf1f0220 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -594,13 +594,15 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex) return ret; } -int sock_bindtoindex(struct sock *sk, int ifindex) +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) { int ret; - lock_sock(sk); + if (lock_sk) + lock_sock(sk); ret = sock_bindtoindex_locked(sk, ifindex); - release_sock(sk); + if (lock_sk) + release_sock(sk); return ret; } @@ -646,7 +648,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, goto out; } - return sock_bindtoindex(sk, index); + return sock_bindtoindex(sk, index, true); out: #endif diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 2158e8bddf41..3eecba0874aa 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -22,7 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->bind_ifindex) { - err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); if (err < 0) goto error; } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 2e0ad1bc84a8..cdc4d4ee2420 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -30,7 +30,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; } if (cfg->bind_ifindex) { - err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); if (err < 0) goto error; } From 70c58997c1e864c96dfdf072572047303db8f42a Mon Sep 17 00:00:00 2001 From: Ferenc Fejes Date: Sat, 30 May 2020 23:09:01 +0200 Subject: [PATCH 2/3] bpf: Allow SO_BINDTODEVICE opt in bpf_setsockopt Extending the supported sockopts in bpf_setsockopt with SO_BINDTODEVICE. We call sock_bindtoindex with parameter lock_sk = false in this context because we already owning the socket. Signed-off-by: Ferenc Fejes Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/4149e304867b8d5a606a305bc59e29b063e51f49.1590871065.git.fejes@inf.elte.hu --- net/core/filter.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 85ff827aab73..ae82bcb03124 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen, u32 flags) { + char devname[IFNAMSIZ]; + struct net *net; + int ifindex; int ret = 0; int val; @@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, sock_owned_by_me(sk); if (level == SOL_SOCKET) { - if (optlen != sizeof(int)) + if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; val = *((int *)optval); @@ -4298,6 +4301,29 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, sk_dst_reset(sk); } break; + case SO_BINDTODEVICE: + ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + optlen = min_t(long, optlen, IFNAMSIZ - 1); + strncpy(devname, optval, optlen); + devname[optlen] = 0; + + ifindex = 0; + if (devname[0] != '\0') { + struct net_device *dev; + + ret = -ENODEV; + + net = sock_net(sk); + dev = dev_get_by_name(net, devname); + if (!dev) + break; + ifindex = dev->ifindex; + dev_put(dev); + } + ret = sock_bindtoindex(sk, ifindex, false); +#endif + break; default: ret = -EINVAL; } From 9c441fe4c06a553ad770b6f21616327a3badf793 Mon Sep 17 00:00:00 2001 From: Ferenc Fejes Date: Sat, 30 May 2020 23:09:02 +0200 Subject: [PATCH 3/3] selftests/bpf: Add test for SO_BINDTODEVICE opt of bpf_setsockopt This test intended to verify if SO_BINDTODEVICE option works in bpf_setsockopt. Because we already in the SOL_SOCKET level in this connect bpf prog its safe to verify the sanity in the beginning of the connect_v4_prog by calling the bind_to_device test helper. The testing environment already created by the test_sock_addr.sh script so this test assume that two netdevices already existing in the system: veth pair with names test_sock_addr1 and test_sock_addr2. The test will try to bind the socket to those devices first. Then the test assume there are no netdevice with "nonexistent_dev" name so the bpf_setsockopt will give use ENODEV error. At the end the test remove the device binding from the socket by binding it to an empty name. Signed-off-by: Ferenc Fejes Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/3f055b8e45c65639c5c73d0b4b6c589e60b86f15.1590871065.git.fejes@inf.elte.hu --- .../selftests/bpf/progs/connect4_prog.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index c2c85c31cffd..1ab2c5eba86c 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -21,6 +23,10 @@ #define TCP_CA_NAME_MAX 16 #endif +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + int _version SEC("version") = 1; __attribute__ ((noinline)) @@ -75,6 +81,29 @@ static __inline int set_cc(struct bpf_sock_addr *ctx) return 0; } +static __inline int bind_to_device(struct bpf_sock_addr *ctx) +{ + char veth1[IFNAMSIZ] = "test_sock_addr1"; + char veth2[IFNAMSIZ] = "test_sock_addr2"; + char missing[IFNAMSIZ] = "nonexistent_dev"; + char del_bind[IFNAMSIZ] = ""; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth1, sizeof(veth1))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &veth2, sizeof(veth2))) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -88,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + /* Bind to device and unbind it. */ + if (bind_to_device(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM)