From 642e450b6b5955f2059d0ae372183f7c6323f951 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 16 Sep 2020 14:00:25 +0200 Subject: [PATCH 1/7] xsk: Do not discard packet when NETDEV_TX_BUSY In the skb Tx path, transmission of a packet is performed with dev_direct_xmit(). When NETDEV_TX_BUSY is set in the drivers, it signifies that it was not possible to send the packet right now, please try later. Unfortunately, the xsk transmit code discarded the packet and returned EBUSY to the application. Fix this unnecessary packet loss, by not discarding the packet in the Tx ring and return EAGAIN. As EAGAIN is returned to the application, it can then retry the send operation later and the packet will then likely be sent as the driver will then likely have space/resources to send the packet. In summary, EAGAIN tells the application that the packet was not discarded from the Tx ring and that it needs to call send() again. EBUSY, on the other hand, signifies that the packet was not sent and discarded from the Tx ring. The application needs to put the packet on the Tx ring again if it wants it to be sent. Fixes: 35fcde7f8deb ("xsk: support for Tx") Reported-by: Arkadiusz Zema Suggested-by: Arkadiusz Zema Suggested-by: Daniel Borkmann Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/bpf/1600257625-2353-1-git-send-email-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c3231620d210..6c5e09e7440a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -377,15 +377,30 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; + /* Hinder dev_direct_xmit from freeing the packet and + * therefore completing it in the destructor + */ + refcount_inc(&skb->users); err = dev_direct_xmit(skb, xs->queue_id); + if (err == NETDEV_TX_BUSY) { + /* Tell user-space to retry the send */ + skb->destructor = sock_wfree; + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + err = -EAGAIN; + goto out; + } + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ - if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { + if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ + kfree_skb(skb); err = -EBUSY; goto out; } + consume_skb(skb); sent_frame = true; } From ba2fd563b7408b8dd9bbd72fcae8bcb3c14e8ec8 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Thu, 17 Sep 2020 04:58:33 -0700 Subject: [PATCH 2/7] tools/bpftool: Support passing BPFTOOL_VERSION to make This change facilitates out-of-tree builds, packaging, and versioning for test and debug purposes. Defining BPFTOOL_VERSION allows self-contained builds within the tools tree, since it avoids use of the 'kernelversion' target in the top-level makefile, which would otherwise pull in several other includes from outside the tools tree. Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200917115833.1235518-1-Tony.Ambardar@gmail.com --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8462690a039b..4828913703b6 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -25,7 +25,7 @@ endif LIBBPF = $(LIBBPF_PATH)libbpf.a -BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) $(LIBBPF): FORCE $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) From e23bb04b0c938588eae41b7f4712b722290ed2b8 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:33 -0700 Subject: [PATCH 3/7] bpf: Fix sysfs export of empty BTF section If BTF data is missing or removed from the ELF section it is still exported via sysfs as a zero-length file: root@OpenWrt:/# ls -l /sys/kernel/btf/vmlinux -r--r--r-- 1 root root 0 Jul 18 02:59 /sys/kernel/btf/vmlinux Moreover, reads from this file succeed and leak kernel data: root@OpenWrt:/# hexdump -C /sys/kernel/btf/vmlinux|head -10 000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 000cc0 00 00 00 00 00 00 00 00 00 00 00 00 80 83 b0 80 |................| 000cd0 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 000ce0 00 00 00 00 00 00 00 00 00 00 00 00 57 ac 6e 9d |............W.n.| 000cf0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 002650 00 00 00 00 00 00 00 10 00 00 00 01 00 00 00 01 |................| 002660 80 82 9a c4 80 85 97 80 81 a9 51 68 00 00 00 02 |..........Qh....| 002670 80 25 44 dc 80 85 97 80 81 a9 50 24 81 ab c4 60 |.%D.......P$...`| This situation was first observed with kernel 5.4.x, cross-compiled for a MIPS target system. Fix by adding a sanity-check for export of zero-length data sections. Fixes: 341dfcf8d78e ("btf: expose BTF info through sysfs") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b38db205a66238f70823039a8c531535864eaac5.1600417359.git.Tony.Ambardar@gmail.com --- kernel/bpf/sysfs_btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 3b495773de5a..11b3380887fa 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -30,15 +30,15 @@ static struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { - if (!__start_BTF) + bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; + + if (!__start_BTF || bin_attr_btf_vmlinux.size == 0) return 0; btf_kobj = kobject_create_and_add("btf", kernel_kobj); if (!btf_kobj) return -ENOMEM; - bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; - return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); } From 65c204398928f9c79f1a29912b410439f7052635 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:34 -0700 Subject: [PATCH 4/7] bpf: Prevent .BTF section elimination Systems with memory or disk constraints often reduce the kernel footprint by configuring LD_DEAD_CODE_DATA_ELIMINATION. However, this can result in removal of any BTF information. Use the KEEP() macro to preserve the BTF data as done with other important sections, while still allowing for smaller kernels. Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/a635b5d3e2da044e7b51ec1315e8910fbce0083f.1600417359.git.Tony.Ambardar@gmail.com --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5430febd34be..7636bc71c71f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -661,7 +661,7 @@ #define BTF \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ __start_BTF = .; \ - *(.BTF) \ + KEEP(*(.BTF)) \ __stop_BTF = .; \ } \ . = ALIGN(4); \ From 1245008122d7311683d70c05b2eea167a314fb5f Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:35 -0700 Subject: [PATCH 5/7] libbpf: Fix native endian assumption when parsing BTF Code in btf__parse_raw() fails to detect raw BTF of non-native endianness and assumes it must be ELF data, which then fails to parse as ELF and yields a misleading error message: root:/# bpftool btf dump file /sys/kernel/btf/vmlinux libbpf: failed to get EHDR from /sys/kernel/btf/vmlinux For example, this could occur after cross-compiling a BTF-enabled kernel for a target with non-native endianness, which is currently unsupported. Check for correct endianness and emit a clearer error message: root:/# bpftool btf dump file /sys/kernel/btf/vmlinux libbpf: non-native BTF endianness is not supported Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/90f81508ecc57bc0da318e0fe0f45cfe49b17ea7.1600417359.git.Tony.Ambardar@gmail.com --- tools/lib/bpf/btf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7dfca7016aaa..6bdbc389b493 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -659,6 +659,12 @@ struct btf *btf__parse_raw(const char *path) err = -EIO; goto err_out; } + if (magic == __bswap_16(BTF_MAGIC)) { + /* non-native endian raw BTF */ + pr_warn("non-native BTF endianness is not supported\n"); + err = -LIBBPF_ERRNO__ENDIAN; + goto err_out; + } if (magic != BTF_MAGIC) { /* definitely not a raw BTF */ err = -EPROTO; From 87f92ac4c12758c4da3bbe4393f1d884b610b8a6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 24 Sep 2020 10:17:05 -0700 Subject: [PATCH 6/7] libbpf: Fix XDP program load regression for old kernels Fix regression in libbpf, introduced by XDP link change, which causes XDP programs to fail to be loaded into kernel due to specified BPF_XDP expected_attach_type. While kernel doesn't enforce expected_attach_type for BPF_PROG_TYPE_XDP, some old kernels already support XDP program, but they don't yet recognize expected_attach_type field in bpf_attr, so setting it to non-zero value causes program load to fail. Luckily, libbpf already has a mechanism to deal with such cases, so just make expected_attach_type optional for XDP programs. Fixes: dc8698cac7aa ("libbpf: Add support for BPF XDP link") Reported-by: Nikita Shirokov Reported-by: Udip Pant Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200924171705.3803628-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7253b833576c..e493d6048143 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6925,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), - BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, BPF_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), From 9cf51446e68607136e42a4e531a30c888c472463 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 28 Sep 2020 17:00:23 +0800 Subject: [PATCH 7/7] bpf, powerpc: Fix misuse of fallthrough in bpf_jit_comp() The user defined label following "fallthrough" is not considered by GCC and causes build failure. kernel-source/include/linux/compiler_attributes.h:208:41: error: attribute 'fallthrough' not preceding a case label or default label [-Werror] 208 define fallthrough _attribute((fallthrough_)) ^~~~~~~~~~~~~ Fixes: df561f6688fe ("treewide: Use fallthrough pseudo-keyword") Signed-off-by: He Zhe Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/20200928090023.38117-1-zhe.he@windriver.com --- arch/powerpc/net/bpf_jit_comp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 78d61f97371e..e809cb5a1631 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - fallthrough; cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) {