2018-05-18 19:00:21 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
|
|
|
/*
|
2018-05-02 18:01:23 +07:00
|
|
|
* if_xdp: XDP socket user-space interface
|
|
|
|
* Copyright(c) 2018 Intel Corporation.
|
|
|
|
*
|
|
|
|
* Author(s): Björn Töpel <bjorn.topel@intel.com>
|
|
|
|
* Magnus Karlsson <magnus.karlsson@intel.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_IF_XDP_H
|
|
|
|
#define _LINUX_IF_XDP_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
2018-05-02 18:01:26 +07:00
|
|
|
/* Options for the sxdp_flags field */
|
2018-06-04 19:05:55 +07:00
|
|
|
#define XDP_SHARED_UMEM (1 << 0)
|
|
|
|
#define XDP_COPY (1 << 1) /* Force copy-mode */
|
|
|
|
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
|
2018-05-02 18:01:26 +07:00
|
|
|
|
|
|
|
struct sockaddr_xdp {
|
|
|
|
__u16 sxdp_family;
|
2018-05-22 14:34:57 +07:00
|
|
|
__u16 sxdp_flags;
|
2018-05-02 18:01:26 +07:00
|
|
|
__u32 sxdp_ifindex;
|
|
|
|
__u32 sxdp_queue_id;
|
|
|
|
__u32 sxdp_shared_umem_fd;
|
|
|
|
};
|
|
|
|
|
xsk: remove explicit ring structure from uapi
In this commit we remove the explicit ring structure from the the
uapi. It is tricky for an uapi to depend on a certain L1 cache line
size, since it can differ for variants of the same architecture. Now,
we let the user application determine the offsets of the producer,
consumer and descriptors by asking the socket via getsockopt.
A typical flow would be (Rx ring):
struct xdp_mmap_offsets off;
struct xdp_desc *ring;
u32 *prod, *cons;
void *map;
...
getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
map = mmap(NULL, off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
prod = map + off.rx.producer;
cons = map + off.rx.consumer;
ring = map + off.rx.desc;
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-22 14:34:59 +07:00
|
|
|
struct xdp_ring_offset {
|
|
|
|
__u64 producer;
|
|
|
|
__u64 consumer;
|
|
|
|
__u64 desc;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct xdp_mmap_offsets {
|
|
|
|
struct xdp_ring_offset rx;
|
|
|
|
struct xdp_ring_offset tx;
|
|
|
|
struct xdp_ring_offset fr; /* Fill */
|
|
|
|
struct xdp_ring_offset cr; /* Completion */
|
|
|
|
};
|
|
|
|
|
2018-05-02 18:01:23 +07:00
|
|
|
/* XDP socket options */
|
xsk: remove explicit ring structure from uapi
In this commit we remove the explicit ring structure from the the
uapi. It is tricky for an uapi to depend on a certain L1 cache line
size, since it can differ for variants of the same architecture. Now,
we let the user application determine the offsets of the producer,
consumer and descriptors by asking the socket via getsockopt.
A typical flow would be (Rx ring):
struct xdp_mmap_offsets off;
struct xdp_desc *ring;
u32 *prod, *cons;
void *map;
...
getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
map = mmap(NULL, off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
prod = map + off.rx.producer;
cons = map + off.rx.consumer;
ring = map + off.rx.desc;
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-22 14:34:59 +07:00
|
|
|
#define XDP_MMAP_OFFSETS 1
|
|
|
|
#define XDP_RX_RING 2
|
|
|
|
#define XDP_TX_RING 3
|
|
|
|
#define XDP_UMEM_REG 4
|
|
|
|
#define XDP_UMEM_FILL_RING 5
|
|
|
|
#define XDP_UMEM_COMPLETION_RING 6
|
|
|
|
#define XDP_STATISTICS 7
|
2018-05-02 18:01:23 +07:00
|
|
|
|
|
|
|
struct xdp_umem_reg {
|
|
|
|
__u64 addr; /* Start of packet data area */
|
|
|
|
__u64 len; /* Length of packet data area */
|
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 18:57:13 +07:00
|
|
|
__u32 chunk_size;
|
|
|
|
__u32 headroom;
|
2018-05-02 18:01:23 +07:00
|
|
|
};
|
|
|
|
|
2018-05-02 18:01:35 +07:00
|
|
|
struct xdp_statistics {
|
|
|
|
__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
|
|
|
|
__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
|
|
|
|
__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
|
|
|
|
};
|
|
|
|
|
2018-05-02 18:01:24 +07:00
|
|
|
/* Pgoff for mmaping the rings */
|
2018-05-02 18:01:25 +07:00
|
|
|
#define XDP_PGOFF_RX_RING 0
|
2018-05-02 18:01:32 +07:00
|
|
|
#define XDP_PGOFF_TX_RING 0x80000000
|
2018-06-07 20:37:34 +07:00
|
|
|
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
|
|
|
|
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
|
2018-05-02 18:01:24 +07:00
|
|
|
|
xsk: remove explicit ring structure from uapi
In this commit we remove the explicit ring structure from the the
uapi. It is tricky for an uapi to depend on a certain L1 cache line
size, since it can differ for variants of the same architecture. Now,
we let the user application determine the offsets of the producer,
consumer and descriptors by asking the socket via getsockopt.
A typical flow would be (Rx ring):
struct xdp_mmap_offsets off;
struct xdp_desc *ring;
u32 *prod, *cons;
void *map;
...
getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
map = mmap(NULL, off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
prod = map + off.rx.producer;
cons = map + off.rx.consumer;
ring = map + off.rx.desc;
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-05-22 14:34:59 +07:00
|
|
|
/* Rx/Tx descriptor */
|
2018-05-02 18:01:25 +07:00
|
|
|
struct xdp_desc {
|
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 18:57:13 +07:00
|
|
|
__u64 addr;
|
2018-05-02 18:01:25 +07:00
|
|
|
__u32 len;
|
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 18:57:13 +07:00
|
|
|
__u32 options;
|
2018-05-02 18:01:25 +07:00
|
|
|
};
|
|
|
|
|
xsk: new descriptor addressing scheme
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-06-04 18:57:13 +07:00
|
|
|
/* UMEM descriptor is __u64 */
|
2018-05-02 18:01:24 +07:00
|
|
|
|
2018-05-02 18:01:23 +07:00
|
|
|
#endif /* _LINUX_IF_XDP_H */
|