2017-11-01 21:08:43 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
2013-10-15 02:04:13 +07:00
|
|
|
#ifndef _UAPI_ASM_SOCKET_H
|
|
|
|
#define _UAPI_ASM_SOCKET_H
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2019-03-11 22:38:17 +07:00
|
|
|
#include <linux/posix_types.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/sockios.h>
|
|
|
|
|
|
|
|
/* For setsockopt(2) */
|
|
|
|
#define SOL_SOCKET 0xffff
|
|
|
|
|
|
|
|
#define SO_DEBUG 0x0001
|
|
|
|
#define SO_REUSEADDR 0x0004
|
|
|
|
#define SO_KEEPALIVE 0x0008
|
|
|
|
#define SO_DONTROUTE 0x0010
|
|
|
|
#define SO_BROADCAST 0x0020
|
|
|
|
#define SO_LINGER 0x0080
|
|
|
|
#define SO_OOBINLINE 0x0100
|
2013-01-22 16:49:50 +07:00
|
|
|
#define SO_REUSEPORT 0x0200
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SO_SNDBUF 0x1001
|
|
|
|
#define SO_RCVBUF 0x1002
|
2005-08-10 09:30:51 +07:00
|
|
|
#define SO_SNDBUFFORCE 0x100a
|
|
|
|
#define SO_RCVBUFFORCE 0x100b
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SO_SNDLOWAT 0x1003
|
|
|
|
#define SO_RCVLOWAT 0x1004
|
2019-02-02 22:34:53 +07:00
|
|
|
#define SO_SNDTIMEO_OLD 0x1005
|
|
|
|
#define SO_RCVTIMEO_OLD 0x1006
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SO_ERROR 0x1007
|
|
|
|
#define SO_TYPE 0x1008
|
2009-08-04 14:28:28 +07:00
|
|
|
#define SO_PROTOCOL 0x1028
|
2009-08-04 14:28:29 +07:00
|
|
|
#define SO_DOMAIN 0x1029
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SO_PEERNAME 0x2000
|
|
|
|
|
|
|
|
#define SO_NO_CHECK 0x400b
|
|
|
|
#define SO_PRIORITY 0x400c
|
|
|
|
#define SO_BSDCOMPAT 0x400e
|
|
|
|
#define SO_PASSCRED 0x4010
|
|
|
|
#define SO_PEERCRED 0x4011
|
|
|
|
|
|
|
|
/* Security levels - as per NRL IPv6 - don't actually do anything */
|
|
|
|
#define SO_SECURITY_AUTHENTICATION 0x4016
|
|
|
|
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x4017
|
|
|
|
#define SO_SECURITY_ENCRYPTION_NETWORK 0x4018
|
|
|
|
|
|
|
|
#define SO_BINDTODEVICE 0x4019
|
|
|
|
|
|
|
|
/* Socket filtering */
|
|
|
|
#define SO_ATTACH_FILTER 0x401a
|
|
|
|
#define SO_DETACH_FILTER 0x401b
|
sk-filter: Add ability to get socket filter program (v2)
The SO_ATTACH_FILTER option is set only. I propose to add the get
ability by using SO_ATTACH_FILTER in getsockopt. To be less
irritating to eyes the SO_GET_FILTER alias to it is declared. This
ability is required by checkpoint-restore project to be able to
save full state of a socket.
There are two issues with getting filter back.
First, kernel modifies the sock_filter->code on filter load, thus in
order to return the filter element back to user we have to decode it
into user-visible constants. Fortunately the modification in question
is interconvertible.
Second, the BPF_S_ALU_DIV_K code modifies the command argument k to
speed up the run-time division by doing kernel_k = reciprocal(user_k).
Bad news is that different user_k may result in same kernel_k, so we
can't get the original user_k back. Good news is that we don't have
to do it. What we need to is calculate a user2_k so, that
reciprocal(user2_k) == reciprocal(user_k) == kernel_k
i.e. if it's re-loaded back the compiled again value will be exactly
the same as it was. That said, the user2_k can be calculated like this
user2_k = reciprocal(kernel_k)
with an exception, that if kernel_k == 0, then user2_k == 1.
The optlen argument is treated like this -- when zero, kernel returns
the amount of sock_fprog elements in filter, otherwise it should be
large enough for the sock_fprog array.
changes since v1:
* Declared SO_GET_FILTER in all arch headers
* Added decode of vlan-tag codes
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-01 09:01:48 +07:00
|
|
|
#define SO_GET_FILTER SO_ATTACH_FILTER
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#define SO_ACCEPTCONN 0x401c
|
|
|
|
|
|
|
|
#define SO_PEERSEC 0x401d
|
[AF_UNIX]: Datagram getpeersec
This patch implements an API whereby an application can determine the
label of its peer's Unix datagram sockets via the auxiliary data mechanism of
recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of the peer of a Unix datagram socket. The application
can then use this security context to determine the security context for
processing on behalf of the peer who sent the packet.
Patch design and implementation:
The design and implementation is very similar to the UDP case for INET
sockets. Basically we build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message). To retrieve the security
context, the application first indicates to the kernel such desire by
setting the SO_PASSSEC option via getsockopt. Then the application
retrieves the security context using the auxiliary data mechanism.
An example server application for Unix datagram socket should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_SOCKET, SO_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_SOCKET &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
sock_setsockopt is enhanced with a new socket option SOCK_PASSSEC to allow
a server socket to receive security context of the peer.
Testing:
We have tested the patch by setting up Unix datagram client and server
applications. We verified that the server can retrieve the security context
using the auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-30 02:27:47 +07:00
|
|
|
#define SO_PASSSEC 0x401e
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-01-31 10:08:16 +07:00
|
|
|
#define SO_MARK 0x401f
|
|
|
|
|
net: Generalize socket rx gap / receive queue overflow cmsg
Create a new socket level option to report number of queue overflows
Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames. This value was
exported via a SOL_PACKET level cmsg. AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option. As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames. It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count). Tested
successfully by me.
Notes:
1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.
2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me. This also saves us having
to code in a per-protocol opt in mechanism.
3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-10-13 03:26:31 +07:00
|
|
|
#define SO_RXQ_OVFL 0x4021
|
|
|
|
|
2011-11-09 16:15:42 +07:00
|
|
|
#define SO_WIFI_STATUS 0x4022
|
|
|
|
#define SCM_WIFI_STATUS SO_WIFI_STATUS
|
2012-02-21 14:31:34 +07:00
|
|
|
#define SO_PEEK_OFF 0x4023
|
2011-11-09 16:15:42 +07:00
|
|
|
|
2012-02-11 22:39:30 +07:00
|
|
|
/* Instruct lower device to use last 4-bytes of skb data as FCS */
|
|
|
|
#define SO_NOFCS 0x4024
|
|
|
|
|
2013-01-17 04:55:49 +07:00
|
|
|
#define SO_LOCK_FILTER 0x4025
|
2012-02-11 22:39:30 +07:00
|
|
|
|
2013-03-28 18:19:25 +07:00
|
|
|
#define SO_SELECT_ERR_QUEUE 0x4026
|
|
|
|
|
2013-07-10 21:13:36 +07:00
|
|
|
#define SO_BUSY_POLL 0x4027
|
2013-06-14 20:33:57 +07:00
|
|
|
|
2014-01-17 02:15:12 +07:00
|
|
|
#define SO_MAX_PACING_RATE 0x4028
|
2013-09-24 22:20:52 +07:00
|
|
|
|
2014-01-17 23:09:45 +07:00
|
|
|
#define SO_BPF_EXTENSIONS 0x4029
|
|
|
|
|
net: introduce SO_INCOMING_CPU
Alternative to RPS/RFS is to use hardware support for multiple
queues.
Then split a set of million of sockets into worker threads, each
one using epoll() to manage events on its own socket pool.
Ideally, we want one thread per RX/TX queue/cpu, but we have no way to
know after accept() or connect() on which queue/cpu a socket is managed.
We normally use one cpu per RX queue (IRQ smp_affinity being properly
set), so remembering on socket structure which cpu delivered last packet
is enough to solve the problem.
After accept(), connect(), or even file descriptor passing around
processes, applications can use :
int cpu;
socklen_t len = sizeof(cpu);
getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);
And use this information to put the socket into the right silo
for optimal performance, as all networking stack should run
on the appropriate cpu, without need to send IPI (RPS/RFS).
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-11-11 20:54:28 +07:00
|
|
|
#define SO_INCOMING_CPU 0x402A
|
|
|
|
|
2014-12-02 06:06:35 +07:00
|
|
|
#define SO_ATTACH_BPF 0x402B
|
|
|
|
#define SO_DETACH_BPF SO_DETACH_FILTER
|
|
|
|
|
2016-01-05 05:41:47 +07:00
|
|
|
#define SO_ATTACH_REUSEPORT_CBPF 0x402C
|
|
|
|
#define SO_ATTACH_REUSEPORT_EBPF 0x402D
|
|
|
|
|
2016-02-25 01:02:52 +07:00
|
|
|
#define SO_CNX_ADVICE 0x402E
|
|
|
|
|
2016-11-28 14:07:18 +07:00
|
|
|
#define SCM_TIMESTAMPING_OPT_STATS 0x402F
|
|
|
|
|
2017-03-21 02:22:03 +07:00
|
|
|
#define SO_MEMINFO 0x4030
|
|
|
|
|
2017-03-25 00:08:36 +07:00
|
|
|
#define SO_INCOMING_NAPI_ID 0x4031
|
|
|
|
|
2017-04-06 09:00:55 +07:00
|
|
|
#define SO_COOKIE 0x4032
|
|
|
|
|
2017-05-22 21:26:24 +07:00
|
|
|
#define SCM_TIMESTAMPING_PKTINFO 0x4033
|
2017-05-22 10:13:37 +07:00
|
|
|
|
net: introduce SO_PEERGROUPS getsockopt
This adds the new getsockopt(2) option SO_PEERGROUPS on SOL_SOCKET to
retrieve the auxiliary groups of the remote peer. It is designed to
naturally extend SO_PEERCRED. That is, the underlying data is from the
same credentials. Regarding its syntax, it is based on SO_PEERSEC. That
is, if the provided buffer is too small, ERANGE is returned and @optlen
is updated. Otherwise, the information is copied, @optlen is set to the
actual size, and 0 is returned.
While SO_PEERCRED (and thus `struct ucred') already returns the primary
group, it lacks the auxiliary group vector. However, nearly all access
controls (including kernel side VFS and SYSVIPC, but also user-space
polkit, DBus, ...) consider the entire set of groups, rather than just
the primary group. But this is currently not possible with pure
SO_PEERCRED. Instead, user-space has to work around this and query the
system database for the auxiliary groups of a UID retrieved via
SO_PEERCRED.
Unfortunately, there is no race-free way to query the auxiliary groups
of the PID/UID retrieved via SO_PEERCRED. Hence, the current user-space
solution is to use getgrouplist(3p), which itself falls back to NSS and
whatever is configured in nsswitch.conf(3). This effectively checks
which groups we *would* assign to the user if it logged in *now*. On
normal systems it is as easy as reading /etc/group, but with NSS it can
resort to quering network databases (eg., LDAP), using IPC or network
communication.
Long story short: Whenever we want to use auxiliary groups for access
checks on IPC, we need further IPC to talk to the user/group databases,
rather than just relying on SO_PEERCRED and the incoming socket. This
is unfortunate, and might even result in dead-locks if the database
query uses the same IPC as the original request.
So far, those recursions / dead-locks have been avoided by using
primitive IPC for all crucial NSS modules. However, we want to avoid
re-inventing the wheel for each NSS module that might be involved in
user/group queries. Hence, we would preferably make DBus (and other IPC
that supports access-management based on groups) work without resorting
to the user/group database. This new SO_PEERGROUPS ioctl would allow us
to make dbus-daemon work without ever calling into NSS.
Cc: Michal Sekletar <msekleta@redhat.com>
Cc: Simon McVittie <simon.mcvittie@collabora.co.uk>
Reviewed-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-21 15:47:15 +07:00
|
|
|
#define SO_PEERGROUPS 0x4034
|
|
|
|
|
2017-08-04 03:29:40 +07:00
|
|
|
#define SO_ZEROCOPY 0x4035
|
|
|
|
|
2018-07-04 05:42:48 +07:00
|
|
|
#define SO_TXTIME 0x4036
|
|
|
|
#define SCM_TXTIME SO_TXTIME
|
|
|
|
|
net: introduce SO_BINDTOIFINDEX sockopt
This introduces a new generic SOL_SOCKET-level socket option called
SO_BINDTOIFINDEX. It behaves similar to SO_BINDTODEVICE, but takes a
network interface index as argument, rather than the network interface
name.
User-space often refers to network-interfaces via their index, but has
to temporarily resolve it to a name for a call into SO_BINDTODEVICE.
This might pose problems when the network-device is renamed
asynchronously by other parts of the system. When this happens, the
SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong
device.
In most cases user-space only ever operates on devices which they
either manage themselves, or otherwise have a guarantee that the device
name will not change (e.g., devices that are UP cannot be renamed).
However, particularly in libraries this guarantee is non-obvious and it
would be nice if that race-condition would simply not exist. It would
make it easier for those libraries to operate even in situations where
the device-name might change under the hood.
A real use-case that we recently hit is trying to start the network
stack early in the initrd but make it survive into the real system.
Existing distributions rename network-interfaces during the transition
from initrd into the real system. This, obviously, cannot affect
devices that are up and running (unless you also consider moving them
between network-namespaces). However, the network manager now has to
make sure its management engine for dormant devices will not run in
parallel to these renames. Particularly, when you offload operations
like DHCP into separate processes, these might setup their sockets
early, and thus have to resolve the device-name possibly running into
this race-condition.
By avoiding a call to resolve the device-name, we no longer depend on
the name and can run network setup of dormant devices in parallel to
the transition off the initrd. The SO_BINDTOIFINDEX ioctl plugs this
race.
Reviewed-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-15 20:42:14 +07:00
|
|
|
#define SO_BINDTOIFINDEX 0x4037
|
|
|
|
|
2019-02-02 22:34:46 +07:00
|
|
|
#define SO_TIMESTAMP_OLD 0x4012
|
|
|
|
#define SO_TIMESTAMPNS_OLD 0x4013
|
|
|
|
#define SO_TIMESTAMPING_OLD 0x4020
|
|
|
|
|
2019-02-02 22:34:50 +07:00
|
|
|
#define SO_TIMESTAMP_NEW 0x4038
|
|
|
|
#define SO_TIMESTAMPNS_NEW 0x4039
|
2019-02-02 22:34:51 +07:00
|
|
|
#define SO_TIMESTAMPING_NEW 0x403A
|
2019-02-02 22:34:50 +07:00
|
|
|
|
2019-02-02 22:34:54 +07:00
|
|
|
#define SO_RCVTIMEO_NEW 0x4040
|
|
|
|
#define SO_SNDTIMEO_NEW 0x4041
|
|
|
|
|
2019-06-14 05:00:01 +07:00
|
|
|
#define SO_DETACH_REUSEPORT_BPF 0x4042
|
|
|
|
|
2019-02-02 22:34:46 +07:00
|
|
|
#if !defined(__KERNEL__)
|
|
|
|
|
2019-02-02 22:34:50 +07:00
|
|
|
#if __BITS_PER_LONG == 64
|
|
|
|
#define SO_TIMESTAMP SO_TIMESTAMP_OLD
|
|
|
|
#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD
|
2019-02-02 22:34:51 +07:00
|
|
|
#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD
|
2019-02-02 22:34:54 +07:00
|
|
|
#define SO_RCVTIMEO SO_RCVTIMEO_OLD
|
|
|
|
#define SO_SNDTIMEO SO_SNDTIMEO_OLD
|
2019-02-02 22:34:50 +07:00
|
|
|
#else
|
|
|
|
#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
|
|
|
|
#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
|
2019-02-02 22:34:51 +07:00
|
|
|
#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
|
2019-02-02 22:34:54 +07:00
|
|
|
|
|
|
|
#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
|
|
|
|
#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
|
2019-02-02 22:34:50 +07:00
|
|
|
#endif
|
|
|
|
|
2019-02-02 22:34:46 +07:00
|
|
|
#define SCM_TIMESTAMP SO_TIMESTAMP
|
|
|
|
#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
|
|
|
|
#define SCM_TIMESTAMPING SO_TIMESTAMPING
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-10-15 02:04:13 +07:00
|
|
|
#endif /* _UAPI_ASM_SOCKET_H */
|