linux_dsm_epyc7002/include/uapi/linux/if_xdp.h
Magnus Karlsson 77cd0d7b3f xsk: add support for need_wakeup flag in AF_XDP rings
This commit adds support for a new flag called need_wakeup in the
AF_XDP Tx and fill rings. When this flag is set, it means that the
application has to explicitly wake up the kernel Rx (for the bit in
the fill ring) or kernel Tx (for bit in the Tx ring) processing by
issuing a syscall. Poll() can wake up both depending on the flags
submitted and sendto() will wake up tx processing only.

The main reason for introducing this new flag is to be able to
efficiently support the case when application and driver is executing
on the same core. Previously, the driver was just busy-spinning on the
fill ring if it ran out of buffers in the HW and there were none on
the fill ring. This approach works when the application is running on
another core as it can replenish the fill ring while the driver is
busy-spinning. Though, this is a lousy approach if both of them are
running on the same core as the probability of the fill ring getting
more entries when the driver is busy-spinning is zero. With this new
feature the driver now sets the need_wakeup flag and returns to the
application. The application can then replenish the fill queue and
then explicitly wake up the Rx processing in the kernel using the
syscall poll(). For Tx, the flag is only set to one if the driver has
no outstanding Tx completion interrupts. If it has some, the flag is
zero as it will be woken up by a completion interrupt anyway.

As a nice side effect, this new flag also improves the performance of
the case where application and driver are running on two different
cores as it reduces the number of syscalls to the kernel. The kernel
tells user space if it needs to be woken up by a syscall, and this
eliminates many of the syscalls.

This flag needs some simple driver support. If the driver does not
support this, the Rx flag is always zero and the Tx flag is always
one. This makes any application relying on this feature default to the
old behaviour of not requiring any syscalls in the Rx path and always
having to call sendto() in the Tx path.

For backwards compatibility reasons, this feature has to be explicitly
turned on using a new bind flag (XDP_USE_NEED_WAKEUP). I recommend
that you always turn it on as it so far always have had a positive
performance impact.

The name and inspiration of the flag has been taken from io_uring by
Jens Axboe. Details about this feature in io_uring can be found in
http://kernel.dk/io_uring.pdf, section 8.3.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-08-17 23:07:32 +02:00

100 lines
2.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* if_xdp: XDP socket user-space interface
* Copyright(c) 2018 Intel Corporation.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
#ifndef _LINUX_IF_XDP_H
#define _LINUX_IF_XDP_H
#include <linux/types.h>
/* Options for the sxdp_flags field */
#define XDP_SHARED_UMEM (1 << 0)
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
/* If this option is set, the driver might go sleep and in that case
* the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
* set. If it is set, the application need to explicitly wake up the
* driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
* running the driver and the application on the same core, you should
* use this option so that the kernel will yield to the user space
* application.
*/
#define XDP_USE_NEED_WAKEUP (1 << 3)
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
};
/* XDP_RING flags */
#define XDP_RING_NEED_WAKEUP (1 << 0)
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
__u64 flags;
};
struct xdp_mmap_offsets {
struct xdp_ring_offset rx;
struct xdp_ring_offset tx;
struct xdp_ring_offset fr; /* Fill */
struct xdp_ring_offset cr; /* Completion */
};
/* XDP socket options */
#define XDP_MMAP_OFFSETS 1
#define XDP_RX_RING 2
#define XDP_TX_RING 3
#define XDP_UMEM_REG 4
#define XDP_UMEM_FILL_RING 5
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
#define XDP_OPTIONS 8
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
};
struct xdp_statistics {
__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
};
struct xdp_options {
__u32 flags;
};
/* Flags for the flags field of struct xdp_options */
#define XDP_OPTIONS_ZEROCOPY (1 << 0)
/* Pgoff for mmaping the rings */
#define XDP_PGOFF_RX_RING 0
#define XDP_PGOFF_TX_RING 0x80000000
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
/* Rx/Tx descriptor */
struct xdp_desc {
__u64 addr;
__u32 len;
__u32 options;
};
/* UMEM descriptor is __u64 */
#endif /* _LINUX_IF_XDP_H */