diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt new file mode 100644 index 000000000000..cefef855dea4 --- /dev/null +++ b/Documentation/bpf/bpf_devel_QA.txt @@ -0,0 +1,519 @@ +This document provides information for the BPF subsystem about various +workflows related to reporting bugs, submitting patches, and queueing +patches for stable kernels. + +For general information about submitting patches, please refer to +Documentation/process/. This document only describes additional specifics +related to BPF. + +Reporting bugs: +--------------- + +Q: How do I report bugs for BPF kernel code? + +A: Since all BPF kernel development as well as bpftool and iproute2 BPF + loader development happens through the netdev kernel mailing list, + please report any found issues around BPF to the following mailing + list: + + netdev@vger.kernel.org + + This may also include issues related to XDP, BPF tracing, etc. + + Given netdev has a high volume of traffic, please also add the BPF + maintainers to Cc (from kernel MAINTAINERS file): + + Alexei Starovoitov + Daniel Borkmann + + In case a buggy commit has already been identified, make sure to keep + the actual commit authors in Cc as well for the report. They can + typically be identified through the kernel's git tree. + + Please do *not* report BPF issues to bugzilla.kernel.org since it + is a guarantee that the reported issue will be overlooked. + +Submitting patches: +------------------- + +Q: To which mailing list do I need to submit my BPF patches? + +A: Please submit your BPF patches to the netdev kernel mailing list: + + netdev@vger.kernel.org + + Historically, BPF came out of networking and has always been maintained + by the kernel networking community. Although these days BPF touches + many other subsystems as well, the patches are still routed mainly + through the networking community. + + In case your patch has changes in various different subsystems (e.g. + tracing, security, etc), make sure to Cc the related kernel mailing + lists and maintainers from there as well, so they are able to review + the changes and provide their Acked-by's to the patches. + +Q: Where can I find patches currently under discussion for BPF subsystem? + +A: All patches that are Cc'ed to netdev are queued for review under netdev + patchwork project: + + http://patchwork.ozlabs.org/project/netdev/list/ + + Those patches which target BPF, are assigned to a 'bpf' delegate for + further processing from BPF maintainers. The current queue with + patches under review can be found at: + + https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147 + + Once the patches have been reviewed by the BPF community as a whole + and approved by the BPF maintainers, their status in patchwork will be + changed to 'Accepted' and the submitter will be notified by mail. This + means that the patches look good from a BPF perspective and have been + applied to one of the two BPF kernel trees. + + In case feedback from the community requires a respin of the patches, + their status in patchwork will be set to 'Changes Requested', and purged + from the current review queue. Likewise for cases where patches would + get rejected or are not applicable to the BPF trees (but assigned to + the 'bpf' delegate). + +Q: How do the changes make their way into Linux? + +A: There are two BPF kernel trees (git repositories). Once patches have + been accepted by the BPF maintainers, they will be applied to one + of the two BPF trees: + + https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/ + https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/ + + The bpf tree itself is for fixes only, whereas bpf-next for features, + cleanups or other kind of improvements ("next-like" content). This is + analogous to net and net-next trees for networking. Both bpf and + bpf-next will only have a master branch in order to simplify against + which branch patches should get rebased to. + + Accumulated BPF patches in the bpf tree will regularly get pulled + into the net kernel tree. Likewise, accumulated BPF patches accepted + into the bpf-next tree will make their way into net-next tree. net and + net-next are both run by David S. Miller. From there, they will go + into the kernel mainline tree run by Linus Torvalds. To read up on the + process of net and net-next being merged into the mainline tree, see + the netdev FAQ under: + + Documentation/networking/netdev-FAQ.txt + + Occasionally, to prevent merge conflicts, we might send pull requests + to other trees (e.g. tracing) with a small subset of the patches, but + net and net-next are always the main trees targeted for integration. + + The pull requests will contain a high-level summary of the accumulated + patches and can be searched on netdev kernel mailing list through the + following subject lines (yyyy-mm-dd is the date of the pull request): + + pull-request: bpf yyyy-mm-dd + pull-request: bpf-next yyyy-mm-dd + +Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be + applied to? + +A: The process is the very same as described in the netdev FAQ, so + please read up on it. The subject line must indicate whether the + patch is a fix or rather "next-like" content in order to let the + maintainers know whether it is targeted at bpf or bpf-next. + + For fixes eventually landing in bpf -> net tree, the subject must + look like: + + git format-patch --subject-prefix='PATCH bpf' start..finish + + For features/improvements/etc that should eventually land in + bpf-next -> net-next, the subject must look like: + + git format-patch --subject-prefix='PATCH bpf-next' start..finish + + If unsure whether the patch or patch series should go into bpf + or net directly, or bpf-next or net-next directly, it is not a + problem either if the subject line says net or net-next as target. + It is eventually up to the maintainers to do the delegation of + the patches. + + If it is clear that patches should go into bpf or bpf-next tree, + please make sure to rebase the patches against those trees in + order to reduce potential conflicts. + + In case the patch or patch series has to be reworked and sent out + again in a second or later revision, it is also required to add a + version number (v2, v3, ...) into the subject prefix: + + git format-patch --subject-prefix='PATCH net-next v2' start..finish + + When changes have been requested to the patch series, always send the + whole patch series again with the feedback incorporated (never send + individual diffs on top of the old series). + +Q: What does it mean when a patch gets applied to bpf or bpf-next tree? + +A: It means that the patch looks good for mainline inclusion from + a BPF point of view. + + Be aware that this is not a final verdict that the patch will + automatically get accepted into net or net-next trees eventually: + + On the netdev kernel mailing list reviews can come in at any point + in time. If discussions around a patch conclude that they cannot + get included as-is, we will either apply a follow-up fix or drop + them from the trees entirely. Therefore, we also reserve to rebase + the trees when deemed necessary. After all, the purpose of the tree + is to i) accumulate and stage BPF patches for integration into trees + like net and net-next, and ii) run extensive BPF test suite and + workloads on the patches before they make their way any further. + + Once the BPF pull request was accepted by David S. Miller, then + the patches end up in net or net-next tree, respectively, and + make their way from there further into mainline. Again, see the + netdev FAQ for additional information e.g. on how often they are + merged to mainline. + +Q: How long do I need to wait for feedback on my BPF patches? + +A: We try to keep the latency low. The usual time to feedback will + be around 2 or 3 business days. It may vary depending on the + complexity of changes and current patch load. + +Q: How often do you send pull requests to major kernel trees like + net or net-next? + +A: Pull requests will be sent out rather often in order to not + accumulate too many patches in bpf or bpf-next. + + As a rule of thumb, expect pull requests for each tree regularly + at the end of the week. In some cases pull requests could additionally + come also in the middle of the week depending on the current patch + load or urgency. + +Q: Are patches applied to bpf-next when the merge window is open? + +A: For the time when the merge window is open, bpf-next will not be + processed. This is roughly analogous to net-next patch processing, + so feel free to read up on the netdev FAQ about further details. + + During those two weeks of merge window, we might ask you to resend + your patch series once bpf-next is open again. Once Linus released + a v*-rc1 after the merge window, we continue processing of bpf-next. + + For non-subscribers to kernel mailing lists, there is also a status + page run by David S. Miller on net-next that provides guidance: + + http://vger.kernel.org/~davem/net-next.html + +Q: I made a BPF verifier change, do I need to add test cases for + BPF kernel selftests? + +A: If the patch has changes to the behavior of the verifier, then yes, + it is absolutely necessary to add test cases to the BPF kernel + selftests suite. If they are not present and we think they are + needed, then we might ask for them before accepting any changes. + + In particular, test_verifier.c is tracking a high number of BPF test + cases, including a lot of corner cases that LLVM BPF back end may + generate out of the restricted C code. Thus, adding test cases is + absolutely crucial to make sure future changes do not accidentally + affect prior use-cases. Thus, treat those test cases as: verifier + behavior that is not tracked in test_verifier.c could potentially + be subject to change. + +Q: When should I add code to samples/bpf/ and when to BPF kernel + selftests? + +A: In general, we prefer additions to BPF kernel selftests rather than + samples/bpf/. The rationale is very simple: kernel selftests are + regularly run by various bots to test for kernel regressions. + + The more test cases we add to BPF selftests, the better the coverage + and the less likely it is that those could accidentally break. It is + not that BPF kernel selftests cannot demo how a specific feature can + be used. + + That said, samples/bpf/ may be a good place for people to get started, + so it might be advisable that simple demos of features could go into + samples/bpf/, but advanced functional and corner-case testing rather + into kernel selftests. + + If your sample looks like a test case, then go for BPF kernel selftests + instead! + +Q: When should I add code to the bpftool? + +A: The main purpose of bpftool (under tools/bpf/bpftool/) is to provide + a central user space tool for debugging and introspection of BPF programs + and maps that are active in the kernel. If UAPI changes related to BPF + enable for dumping additional information of programs or maps, then + bpftool should be extended as well to support dumping them. + +Q: When should I add code to iproute2's BPF loader? + +A: For UAPI changes related to the XDP or tc layer (e.g. cls_bpf), the + convention is that those control-path related changes are added to + iproute2's BPF loader as well from user space side. This is not only + useful to have UAPI changes properly designed to be usable, but also + to make those changes available to a wider user base of major + downstream distributions. + +Q: Do you accept patches as well for iproute2's BPF loader? + +A: Patches for the iproute2's BPF loader have to be sent to: + + netdev@vger.kernel.org + + While those patches are not processed by the BPF kernel maintainers, + please keep them in Cc as well, so they can be reviewed. + + The official git repository for iproute2 is run by Stephen Hemminger + and can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git/ + + The patches need to have a subject prefix of '[PATCH iproute2 master]' + or '[PATCH iproute2 net-next]'. 'master' or 'net-next' describes the + target branch where the patch should be applied to. Meaning, if kernel + changes went into the net-next kernel tree, then the related iproute2 + changes need to go into the iproute2 net-next branch, otherwise they + can be targeted at master branch. The iproute2 net-next branch will get + merged into the master branch after the current iproute2 version from + master has been released. + + Like BPF, the patches end up in patchwork under the netdev project and + are delegated to 'shemminger' for further processing: + + http://patchwork.ozlabs.org/project/netdev/list/?delegate=389 + +Q: What is the minimum requirement before I submit my BPF patches? + +A: When submitting patches, always take the time and properly test your + patches *prior* to submission. Never rush them! If maintainers find + that your patches have not been properly tested, it is a good way to + get them grumpy. Testing patch submissions is a hard requirement! + + Note, fixes that go to bpf tree *must* have a Fixes: tag included. The + same applies to fixes that target bpf-next, where the affected commit + is in net-next (or in some cases bpf-next). The Fixes: tag is crucial + in order to identify follow-up commits and tremendously helps for people + having to do backporting, so it is a must have! + + We also don't accept patches with an empty commit message. Take your + time and properly write up a high quality commit message, it is + essential! + + Think about it this way: other developers looking at your code a month + from now need to understand *why* a certain change has been done that + way, and whether there have been flaws in the analysis or assumptions + that the original author did. Thus providing a proper rationale and + describing the use-case for the changes is a must. + + Patch submissions with >1 patch must have a cover letter which includes + a high level description of the series. This high level summary will + then be placed into the merge commit by the BPF maintainers such that + it is also accessible from the git log for future reference. + +Q: What do I need to consider when adding a new instruction or feature + that would require BPF JIT and/or LLVM integration as well? + +A: We try hard to keep all BPF JITs up to date such that the same user + experience can be guaranteed when running BPF programs on different + architectures without having the program punt to the less efficient + interpreter in case the in-kernel BPF JIT is enabled. + + If you are unable to implement or test the required JIT changes for + certain architectures, please work together with the related BPF JIT + developers in order to get the feature implemented in a timely manner. + Please refer to the git log (arch/*/net/) to locate the necessary + people for helping out. + + Also always make sure to add BPF test cases (e.g. test_bpf.c and + test_verifier.c) for new instructions, so that they can receive + broad test coverage and help run-time testing the various BPF JITs. + + In case of new BPF instructions, once the changes have been accepted + into the Linux kernel, please implement support into LLVM's BPF back + end. See LLVM section below for further information. + +Stable submission: +------------------ + +Q: I need a specific BPF commit in stable kernels. What should I do? + +A: In case you need a specific fix in stable kernels, first check whether + the commit has already been applied in the related linux-*.y branches: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/ + + If not the case, then drop an email to the BPF maintainers with the + netdev kernel mailing list in Cc and ask for the fix to be queued up: + + netdev@vger.kernel.org + + The process in general is the same as on netdev itself, see also the + netdev FAQ document. + +Q: Do you also backport to kernels not currently maintained as stable? + +A: No. If you need a specific BPF commit in kernels that are currently not + maintained by the stable maintainers, then you are on your own. + + The current stable and longterm stable kernels are all listed here: + + https://www.kernel.org/ + +Q: The BPF patch I am about to submit needs to go to stable as well. What + should I do? + +A: The same rules apply as with netdev patch submissions in general, see + netdev FAQ under: + + Documentation/networking/netdev-FAQ.txt + + Never add "Cc: stable@vger.kernel.org" to the patch description, but + ask the BPF maintainers to queue the patches instead. This can be done + with a note, for example, under the "---" part of the patch which does + not go into the git log. Alternatively, this can be done as a simple + request by mail instead. + +Q: Where do I find currently queued BPF patches that will be submitted + to stable? + +A: Once patches that fix critical bugs got applied into the bpf tree, they + are queued up for stable submission under: + + http://patchwork.ozlabs.org/bundle/bpf/stable/?state=* + + They will be on hold there at minimum until the related commit made its + way into the mainline kernel tree. + + After having been under broader exposure, the queued patches will be + submitted by the BPF maintainers to the stable maintainers. + +Testing patches: +---------------- + +Q: Which BPF kernel selftests version should I run my kernel against? + +A: If you run a kernel xyz, then always run the BPF kernel selftests from + that kernel xyz as well. Do not expect that the BPF selftest from the + latest mainline tree will pass all the time. + + In particular, test_bpf.c and test_verifier.c have a large number of + test cases and are constantly updated with new BPF test sequences, or + existing ones are adapted to verifier changes e.g. due to verifier + becoming smarter and being able to better track certain things. + +LLVM: +----- + +Q: Where do I find LLVM with BPF support? + +A: The BPF back end for LLVM is upstream in LLVM since version 3.7.1. + + All major distributions these days ship LLVM with BPF back end enabled, + so for the majority of use-cases it is not required to compile LLVM by + hand anymore, just install the distribution provided package. + + LLVM's static compiler lists the supported targets through 'llc --version', + make sure BPF targets are listed. Example: + + $ llc --version + LLVM (http://llvm.org/): + LLVM version 6.0.0svn + Optimized build. + Default target: x86_64-unknown-linux-gnu + Host CPU: skylake + + Registered Targets: + bpf - BPF (host endian) + bpfeb - BPF (big endian) + bpfel - BPF (little endian) + x86 - 32-bit X86: Pentium-Pro and above + x86-64 - 64-bit X86: EM64T and AMD64 + + For developers in order to utilize the latest features added to LLVM's + BPF back end, it is advisable to run the latest LLVM releases. Support + for new BPF kernel features such as additions to the BPF instruction + set are often developed together. + + All LLVM releases can be found at: http://releases.llvm.org/ + +Q: Got it, so how do I build LLVM manually anyway? + +A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have + that set up, proceed with building the latest LLVM and clang version + from the git repositories: + + $ git clone http://llvm.org/git/llvm.git + $ cd llvm/tools + $ git clone --depth 1 http://llvm.org/git/clang.git + $ cd ..; mkdir build; cd build + $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_BUILD_RUNTIME=OFF + $ make -j $(getconf _NPROCESSORS_ONLN) + + The built binaries can then be found in the build/bin/ directory, where + you can point the PATH variable to. + +Q: Should I notify BPF kernel maintainers about issues in LLVM's BPF code + generation back end or about LLVM generated code that the verifier + refuses to accept? + +A: Yes, please do! LLVM's BPF back end is a key piece of the whole BPF + infrastructure and it ties deeply into verification of programs from the + kernel side. Therefore, any issues on either side need to be investigated + and fixed whenever necessary. + + Therefore, please make sure to bring them up at netdev kernel mailing + list and Cc BPF maintainers for LLVM and kernel bits: + + Yonghong Song + Alexei Starovoitov + Daniel Borkmann + + LLVM also has an issue tracker where BPF related bugs can be found: + + https://bugs.llvm.org/buglist.cgi?quicksearch=bpf + + However, it is better to reach out through mailing lists with having + maintainers in Cc. + +Q: I have added a new BPF instruction to the kernel, how can I integrate + it into LLVM? + +A: LLVM has a -mcpu selector for the BPF back end in order to allow the + selection of BPF instruction set extensions. By default the 'generic' + processor target is used, which is the base instruction set (v1) of BPF. + + LLVM has an option to select -mcpu=probe where it will probe the host + kernel for supported BPF instruction set extensions and selects the + optimal set automatically. + + For cross-compilation, a specific version can be select manually as well. + + $ llc -march bpf -mcpu=help + Available CPUs for this target: + + generic - Select the generic processor. + probe - Select the probe processor. + v1 - Select the v1 processor. + v2 - Select the v2 processor. + [...] + + Newly added BPF instructions to the Linux kernel need to follow the same + scheme, bump the instruction set version and implement probing for the + extensions such that -mcpu=probe users can benefit from the optimization + transparently when upgrading their kernels. + + If you are unable to implement support for the newly added BPF instruction + please reach out to BPF developers for help. + + By the way, the BPF kernel selftests run with -mcpu=probe for better + test coverage. + +Happy BPF hacking! diff --git a/MAINTAINERS b/MAINTAINERS index 4007fa2e193e..77ad4bbeb669 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2724,12 +2724,16 @@ M: Alexei Starovoitov M: Daniel Borkmann L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git S: Supported F: arch/x86/net/bpf_jit* F: Documentation/networking/filter.txt F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h +F: include/trace/events/bpf.h +F: include/trace/events/xdp.h F: include/uapi/linux/bpf* F: include/uapi/linux/filter.h F: kernel/bpf/ diff --git a/include/linux/filter.h b/include/linux/filter.h index 80b5b482cb46..0062302e1285 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -985,6 +985,7 @@ struct bpf_sock_ops_kern { u32 reply; u32 replylong[4]; }; + u32 is_fullsock; }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6998707e81f3..6cc205ccccc1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2011,10 +2011,12 @@ static inline int tcp_call_bpf(struct sock *sk, int op) struct bpf_sock_ops_kern sock_ops; int ret; - if (sk_fullsock(sk)) - sock_owned_by_me(sk); - memset(&sock_ops, 0, sizeof(sock_ops)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; + sock_owned_by_me(sk); + } + sock_ops.sk = sk; sock_ops.op = op; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c223ab30293..80d62e88590c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -941,6 +941,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. diff --git a/net/core/filter.c b/net/core/filter.c index 8ec5a504eb28..754abe1041b7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3013,6 +3013,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; if (flags & BPF_F_DONT_FRAGMENT) info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; + if (flags & BPF_F_ZERO_CSUM_TX) + info->key.tun_flags &= ~TUNNEL_CSUM; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; @@ -3028,9 +3030,6 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); } - if (flags & BPF_F_ZERO_CSUM_TX) - info->key.tun_flags &= ~TUNNEL_CSUM; - return 0; } @@ -4438,6 +4437,42 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, offsetof(struct sock_common, skc_num)); break; + + case offsetof(struct bpf_sock_ops, is_fullsock): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, + is_fullsock), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + is_fullsock)); + break; + +/* Helper macro for adding read access to tcp_sock fields. */ +#define SOCK_OPS_GET_TCP32(FIELD_NAME) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \ + offsetof(struct tcp_sock, FIELD_NAME)); \ + } while (0) + + case offsetof(struct bpf_sock_ops, snd_cwnd): + SOCK_OPS_GET_TCP32(snd_cwnd); + break; + + case offsetof(struct bpf_sock_ops, srtt_us): + SOCK_OPS_GET_TCP32(srtt_us); + break; } return insn - insn_buf; }