2005-04-17 05:20:36 +07:00
|
|
|
#ifndef _LINUX_SOCKET_H
|
|
|
|
#define _LINUX_SOCKET_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <asm/socket.h> /* arch-dependent defines */
|
|
|
|
#include <linux/sockios.h> /* the SIOCxxx I/O controls */
|
|
|
|
#include <linux/uio.h> /* iovec support */
|
|
|
|
#include <linux/types.h> /* pid_t */
|
|
|
|
#include <linux/compiler.h> /* __user */
|
2012-10-13 16:46:48 +07:00
|
|
|
#include <uapi/linux/socket.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-06-13 10:28:59 +07:00
|
|
|
struct pid;
|
|
|
|
struct cred;
|
|
|
|
|
2009-10-29 16:59:18 +07:00
|
|
|
#define __sockaddr_check_size(size) \
|
|
|
|
BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
|
|
|
|
|
2011-01-07 02:41:42 +07:00
|
|
|
#ifdef CONFIG_PROC_FS
|
2005-08-16 12:18:02 +07:00
|
|
|
struct seq_file;
|
|
|
|
extern void socket_seq_show(struct seq_file *seq);
|
2011-01-07 02:41:42 +07:00
|
|
|
#endif
|
2005-08-16 12:18:02 +07:00
|
|
|
|
2011-08-08 12:48:07 +07:00
|
|
|
typedef __kernel_sa_family_t sa_family_t;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* 1003.1g requires sa_family_t and that sa_data is char.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct sockaddr {
|
|
|
|
sa_family_t sa_family; /* address family, AF_xxx */
|
|
|
|
char sa_data[14]; /* 14 bytes of protocol address */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct linger {
|
|
|
|
int l_onoff; /* Linger active */
|
|
|
|
int l_linger; /* How long to linger for */
|
|
|
|
};
|
|
|
|
|
|
|
|
#define sockaddr_storage __kernel_sockaddr_storage
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As we do 4.4BSD message passing we use a 4.4BSD message passing
|
|
|
|
* system, not 4.3. Thus msg_accrights(len) are now missing. They
|
|
|
|
* belong in an obscure libc emulation or the bin.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct msghdr {
|
2014-01-22 11:39:46 +07:00
|
|
|
void *msg_name; /* ptr to socket address structure */
|
|
|
|
int msg_namelen; /* size of socket address structure */
|
2014-11-24 22:42:55 +07:00
|
|
|
struct iov_iter msg_iter; /* data */
|
2014-01-22 11:39:46 +07:00
|
|
|
void *msg_control; /* ancillary data */
|
|
|
|
__kernel_size_t msg_controllen; /* ancillary data buffer length */
|
|
|
|
unsigned int msg_flags; /* flags on received message */
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
separate kernel- and userland-side msghdr
Kernel-side struct msghdr is (currently) using the same layout as
userland one, but it's not a one-to-one copy - even without considering
32bit compat issues, we have msg_iov, msg_name and msg_control copied
to kernel[1]. It's fairly localized, so we get away with a few functions
where that knowledge is needed (and we could shrink that set even
more). Pretty much everything deals with the kernel-side variant and
the few places that want userland one just use a bunch of force-casts
to paper over the differences.
The thing is, kernel-side definition of struct msghdr is *not* exposed
in include/uapi - libc doesn't see it, etc. So we can add struct user_msghdr,
with proper annotations and let the few places that ever deal with those
beasts use it for userland pointers. Saner typechecking aside, that will
allow to change the layout of kernel-side msghdr - e.g. replace
msg_iov/msg_iovlen there with struct iov_iter, getting rid of the need
to modify the iovec as we copy data to/from it, etc.
We could introduce kernel_msghdr instead, but that would create much more
noise - the absolute majority of the instances would need to have the
type switched to kernel_msghdr and definition of struct msghdr in
include/linux/socket.h is not going to be seen by userland anyway.
This commit just introduces user_msghdr and switches the few places that
are dealing with userland-side msghdr to it.
[1] actually, it's even trickier than that - we copy msg_control for
sendmsg, but keep the userland address on recvmsg.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-04-07 01:03:05 +07:00
|
|
|
|
|
|
|
struct user_msghdr {
|
|
|
|
void __user *msg_name; /* ptr to socket address structure */
|
|
|
|
int msg_namelen; /* size of socket address structure */
|
|
|
|
struct iovec __user *msg_iov; /* scatter/gather array */
|
|
|
|
__kernel_size_t msg_iovlen; /* # elements in msg_iov */
|
|
|
|
void __user *msg_control; /* ancillary data */
|
|
|
|
__kernel_size_t msg_controllen; /* ancillary data buffer length */
|
|
|
|
unsigned int msg_flags; /* flags on received message */
|
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-10-13 13:40:10 +07:00
|
|
|
/* For recvmmsg/sendmmsg */
|
|
|
|
struct mmsghdr {
|
separate kernel- and userland-side msghdr
Kernel-side struct msghdr is (currently) using the same layout as
userland one, but it's not a one-to-one copy - even without considering
32bit compat issues, we have msg_iov, msg_name and msg_control copied
to kernel[1]. It's fairly localized, so we get away with a few functions
where that knowledge is needed (and we could shrink that set even
more). Pretty much everything deals with the kernel-side variant and
the few places that want userland one just use a bunch of force-casts
to paper over the differences.
The thing is, kernel-side definition of struct msghdr is *not* exposed
in include/uapi - libc doesn't see it, etc. So we can add struct user_msghdr,
with proper annotations and let the few places that ever deal with those
beasts use it for userland pointers. Saner typechecking aside, that will
allow to change the layout of kernel-side msghdr - e.g. replace
msg_iov/msg_iovlen there with struct iov_iter, getting rid of the need
to modify the iovec as we copy data to/from it, etc.
We could introduce kernel_msghdr instead, but that would create much more
noise - the absolute majority of the instances would need to have the
type switched to kernel_msghdr and definition of struct msghdr in
include/linux/socket.h is not going to be seen by userland anyway.
This commit just introduces user_msghdr and switches the few places that
are dealing with userland-side msghdr to it.
[1] actually, it's even trickier than that - we copy msg_control for
sendmsg, but keep the userland address on recvmsg.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-04-07 01:03:05 +07:00
|
|
|
struct user_msghdr msg_hdr;
|
2012-04-15 12:58:06 +07:00
|
|
|
unsigned int msg_len;
|
2009-10-13 13:40:10 +07:00
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* POSIX 1003.1g - ancillary data object information
|
|
|
|
* Ancillary data consits of a sequence of pairs of
|
|
|
|
* (cmsghdr, cmsg_data[])
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct cmsghdr {
|
|
|
|
__kernel_size_t cmsg_len; /* data byte count, including hdr */
|
|
|
|
int cmsg_level; /* originating protocol */
|
|
|
|
int cmsg_type; /* protocol-specific type */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2011-03-31 08:57:33 +07:00
|
|
|
* Ancillary data object information MACROS
|
2005-04-17 05:20:36 +07:00
|
|
|
* Table 5-14 of POSIX 1003.1g
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg))
|
|
|
|
#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg))
|
|
|
|
|
|
|
|
#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
|
|
|
|
|
|
|
|
#define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr))))
|
|
|
|
#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len))
|
|
|
|
#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len))
|
|
|
|
|
|
|
|
#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \
|
|
|
|
(struct cmsghdr *)(ctl) : \
|
|
|
|
(struct cmsghdr *)NULL)
|
|
|
|
#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
|
|
|
|
#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
|
|
|
|
(cmsg)->cmsg_len <= (unsigned long) \
|
|
|
|
((mhdr)->msg_controllen - \
|
|
|
|
((char *)(cmsg) - (char *)(mhdr)->msg_control)))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the next cmsg header
|
|
|
|
*
|
|
|
|
* PLEASE, do not touch this function. If you think, that it is
|
|
|
|
* incorrect, grep kernel sources and think about consequences
|
|
|
|
* before trying to improve it.
|
|
|
|
*
|
|
|
|
* Now it always returns valid, not truncated ancillary object
|
|
|
|
* HEADER. But caller still MUST check, that cmsg->cmsg_len is
|
|
|
|
* inside range, given by msg->msg_controllen before using
|
|
|
|
* ancillary object DATA. --ANK (980731)
|
|
|
|
*/
|
|
|
|
|
2009-10-05 14:24:36 +07:00
|
|
|
static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
|
2005-04-17 05:20:36 +07:00
|
|
|
struct cmsghdr *__cmsg)
|
|
|
|
{
|
|
|
|
struct cmsghdr * __ptr;
|
|
|
|
|
|
|
|
__ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len));
|
|
|
|
if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
|
|
|
|
return (struct cmsghdr *)0;
|
|
|
|
|
|
|
|
return __ptr;
|
|
|
|
}
|
|
|
|
|
2009-10-05 14:24:36 +07:00
|
|
|
static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* "Socket"-level control message types: */
|
|
|
|
|
|
|
|
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
|
|
|
|
#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
|
[SECURITY]: TCP/UDP getpeersec
This patch implements an application of the LSM-IPSec networking
controls whereby an application can determine the label of the
security association its TCP or UDP sockets are currently connected to
via getsockopt and the auxiliary data mechanism of recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of an IPSec security association a particular TCP or
UDP socket is using. The application can then use this security
context to determine the security context for processing on behalf of
the peer at the other end of this connection. In the case of UDP, the
security context is for each individual packet. An example
application is the inetd daemon, which could be modified to start
daemons running at security contexts dependent on the remote client.
Patch design approach:
- Design for TCP
The patch enables the SELinux LSM to set the peer security context for
a socket based on the security context of the IPSec security
association. The application may retrieve this context using
getsockopt. When called, the kernel determines if the socket is a
connected (TCP_ESTABLISHED) TCP socket and, if so, uses the dst_entry
cache on the socket to retrieve the security associations. If a
security association has a security context, the context string is
returned, as for UNIX domain sockets.
- Design for UDP
Unlike TCP, UDP is connectionless. This requires a somewhat different
API to retrieve the peer security context. With TCP, the peer
security context stays the same throughout the connection, thus it can
be retrieved at any time between when the connection is established
and when it is torn down. With UDP, each read/write can have
different peer and thus the security context might change every time.
As a result the security context retrieval must be done TOGETHER with
the packet retrieval.
The solution is to build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message).
Patch implementation details:
- Implementation for TCP
The security context can be retrieved by applications using getsockopt
with the existing SO_PEERSEC flag. As an example (ignoring error
checking):
getsockopt(sockfd, SOL_SOCKET, SO_PEERSEC, optbuf, &optlen);
printf("Socket peer context is: %s\n", optbuf);
The SELinux function, selinux_socket_getpeersec, is extended to check
for labeled security associations for connected (TCP_ESTABLISHED ==
sk->sk_state) TCP sockets only. If so, the socket has a dst_cache of
struct dst_entry values that may refer to security associations. If
these have security associations with security contexts, the security
context is returned.
getsockopt returns a buffer that contains a security context string or
the buffer is unmodified.
- Implementation for UDP
To retrieve the security context, the application first indicates to
the kernel such desire by setting the IP_PASSSEC option via
getsockopt. Then the application retrieves the security context using
the auxiliary data mechanism.
An example server application for UDP should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_IP, IP_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_IP &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
ip_setsockopt is enhanced with a new socket option IP_PASSSEC to allow
a server socket to receive security context of the peer. A new
ancillary message type SCM_SECURITY.
When the packet is received we get the security context from the
sec_path pointer which is contained in the sk_buff, and copy it to the
ancillary message space. An additional LSM hook,
selinux_socket_getpeersec_udp, is defined to retrieve the security
context from the SELinux space. The existing function,
selinux_socket_getpeersec does not suit our purpose, because the
security context is copied directly to user space, rather than to
kernel space.
Testing:
We have tested the patch by setting up TCP and UDP connections between
applications on two machines using the IPSec policies that result in
labeled security associations being built. For TCP, we can then
extract the peer security context using getsockopt on either end. For
UDP, the receiving end can retrieve the security context using the
auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-21 13:41:23 +07:00
|
|
|
#define SCM_SECURITY 0x03 /* rw: security label */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
struct ucred {
|
|
|
|
__u32 pid;
|
|
|
|
__u32 uid;
|
|
|
|
__u32 gid;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Supported address families. */
|
|
|
|
#define AF_UNSPEC 0
|
|
|
|
#define AF_UNIX 1 /* Unix domain sockets */
|
|
|
|
#define AF_LOCAL 1 /* POSIX name for AF_UNIX */
|
|
|
|
#define AF_INET 2 /* Internet IP Protocol */
|
|
|
|
#define AF_AX25 3 /* Amateur Radio AX.25 */
|
|
|
|
#define AF_IPX 4 /* Novell IPX */
|
|
|
|
#define AF_APPLETALK 5 /* AppleTalk DDP */
|
|
|
|
#define AF_NETROM 6 /* Amateur Radio NET/ROM */
|
|
|
|
#define AF_BRIDGE 7 /* Multiprotocol bridge */
|
|
|
|
#define AF_ATMPVC 8 /* ATM PVCs */
|
|
|
|
#define AF_X25 9 /* Reserved for X.25 project */
|
|
|
|
#define AF_INET6 10 /* IP version 6 */
|
|
|
|
#define AF_ROSE 11 /* Amateur Radio X.25 PLP */
|
|
|
|
#define AF_DECnet 12 /* Reserved for DECnet project */
|
|
|
|
#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/
|
|
|
|
#define AF_SECURITY 14 /* Security callback pseudo AF */
|
|
|
|
#define AF_KEY 15 /* PF_KEY key management API */
|
|
|
|
#define AF_NETLINK 16
|
|
|
|
#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */
|
|
|
|
#define AF_PACKET 17 /* Packet family */
|
|
|
|
#define AF_ASH 18 /* Ash */
|
|
|
|
#define AF_ECONET 19 /* Acorn Econet */
|
|
|
|
#define AF_ATMSVC 20 /* ATM SVCs */
|
2009-02-27 14:41:38 +07:00
|
|
|
#define AF_RDS 21 /* RDS sockets */
|
2005-04-17 05:20:36 +07:00
|
|
|
#define AF_SNA 22 /* Linux SNA Project (nutters!) */
|
|
|
|
#define AF_IRDA 23 /* IRDA sockets */
|
|
|
|
#define AF_PPPOX 24 /* PPPoX sockets */
|
|
|
|
#define AF_WANPIPE 25 /* Wanpipe API Sockets */
|
|
|
|
#define AF_LLC 26 /* Linux LLC */
|
2013-05-30 00:09:07 +07:00
|
|
|
#define AF_IB 27 /* Native InfiniBand address */
|
2007-12-17 06:59:24 +07:00
|
|
|
#define AF_CAN 29 /* Controller Area Network */
|
2006-01-03 01:04:38 +07:00
|
|
|
#define AF_TIPC 30 /* TIPC sockets */
|
2005-04-17 05:20:36 +07:00
|
|
|
#define AF_BLUETOOTH 31 /* Bluetooth sockets */
|
2007-02-09 04:51:54 +07:00
|
|
|
#define AF_IUCV 32 /* IUCV sockets */
|
2007-04-27 05:48:28 +07:00
|
|
|
#define AF_RXRPC 33 /* RxRPC sockets */
|
2008-07-26 23:52:34 +07:00
|
|
|
#define AF_ISDN 34 /* mISDN sockets */
|
2008-09-23 09:51:15 +07:00
|
|
|
#define AF_PHONET 35 /* Phonet sockets */
|
2009-06-08 19:18:47 +07:00
|
|
|
#define AF_IEEE802154 36 /* IEEE802154 sockets */
|
2010-03-30 20:56:19 +07:00
|
|
|
#define AF_CAIF 37 /* CAIF sockets */
|
2010-10-19 20:04:42 +07:00
|
|
|
#define AF_ALG 38 /* Algorithm sockets */
|
2011-07-02 05:31:35 +07:00
|
|
|
#define AF_NFC 39 /* NFC sockets */
|
VSOCK: Introduce VM Sockets
VM Sockets allows communication between virtual machines and the hypervisor.
User level applications both in a virtual machine and on the host can use the
VM Sockets API, which facilitates fast and efficient communication between
guest virtual machines and their host. A socket address family, designed to be
compatible with UDP and TCP at the interface level, is provided.
Today, VM Sockets is used by various VMware Tools components inside the guest
for zero-config, network-less access to VMware host services. In addition to
this, VMware's users are using VM Sockets for various applications, where
network access of the virtual machine is restricted or non-existent. Examples
of this are VMs communicating with device proxies for proprietary hardware
running as host applications and automated testing of applications running
within virtual machines.
The VMware VM Sockets are similar to other socket types, like Berkeley UNIX
socket interface. The VM Sockets module supports both connection-oriented
stream sockets like TCP, and connectionless datagram sockets like UDP. The VM
Sockets protocol family is defined as "AF_VSOCK" and the socket operations
split for SOCK_DGRAM and SOCK_STREAM.
For additional information about the use of VM Sockets, please refer to the
VM Sockets Programming Guide available at:
https://www.vmware.com/support/developer/vmci-sdk/
Signed-off-by: George Zhang <georgezhang@vmware.com>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy king <acking@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-02-06 21:23:56 +07:00
|
|
|
#define AF_VSOCK 40 /* vSockets */
|
|
|
|
#define AF_MAX 41 /* For now.. */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Protocol families, same as address families. */
|
|
|
|
#define PF_UNSPEC AF_UNSPEC
|
|
|
|
#define PF_UNIX AF_UNIX
|
|
|
|
#define PF_LOCAL AF_LOCAL
|
|
|
|
#define PF_INET AF_INET
|
|
|
|
#define PF_AX25 AF_AX25
|
|
|
|
#define PF_IPX AF_IPX
|
|
|
|
#define PF_APPLETALK AF_APPLETALK
|
|
|
|
#define PF_NETROM AF_NETROM
|
|
|
|
#define PF_BRIDGE AF_BRIDGE
|
|
|
|
#define PF_ATMPVC AF_ATMPVC
|
|
|
|
#define PF_X25 AF_X25
|
|
|
|
#define PF_INET6 AF_INET6
|
|
|
|
#define PF_ROSE AF_ROSE
|
|
|
|
#define PF_DECnet AF_DECnet
|
|
|
|
#define PF_NETBEUI AF_NETBEUI
|
|
|
|
#define PF_SECURITY AF_SECURITY
|
|
|
|
#define PF_KEY AF_KEY
|
|
|
|
#define PF_NETLINK AF_NETLINK
|
|
|
|
#define PF_ROUTE AF_ROUTE
|
|
|
|
#define PF_PACKET AF_PACKET
|
|
|
|
#define PF_ASH AF_ASH
|
|
|
|
#define PF_ECONET AF_ECONET
|
|
|
|
#define PF_ATMSVC AF_ATMSVC
|
2009-02-27 14:41:38 +07:00
|
|
|
#define PF_RDS AF_RDS
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PF_SNA AF_SNA
|
|
|
|
#define PF_IRDA AF_IRDA
|
|
|
|
#define PF_PPPOX AF_PPPOX
|
|
|
|
#define PF_WANPIPE AF_WANPIPE
|
|
|
|
#define PF_LLC AF_LLC
|
2013-05-30 00:09:07 +07:00
|
|
|
#define PF_IB AF_IB
|
2007-12-17 06:59:24 +07:00
|
|
|
#define PF_CAN AF_CAN
|
2006-01-03 01:04:38 +07:00
|
|
|
#define PF_TIPC AF_TIPC
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PF_BLUETOOTH AF_BLUETOOTH
|
2007-02-09 04:51:54 +07:00
|
|
|
#define PF_IUCV AF_IUCV
|
2007-04-27 05:48:28 +07:00
|
|
|
#define PF_RXRPC AF_RXRPC
|
2008-07-26 23:52:34 +07:00
|
|
|
#define PF_ISDN AF_ISDN
|
2008-09-23 09:51:15 +07:00
|
|
|
#define PF_PHONET AF_PHONET
|
2009-06-08 19:18:47 +07:00
|
|
|
#define PF_IEEE802154 AF_IEEE802154
|
2010-03-30 20:56:19 +07:00
|
|
|
#define PF_CAIF AF_CAIF
|
2010-10-19 20:04:42 +07:00
|
|
|
#define PF_ALG AF_ALG
|
2011-07-02 05:31:35 +07:00
|
|
|
#define PF_NFC AF_NFC
|
VSOCK: Introduce VM Sockets
VM Sockets allows communication between virtual machines and the hypervisor.
User level applications both in a virtual machine and on the host can use the
VM Sockets API, which facilitates fast and efficient communication between
guest virtual machines and their host. A socket address family, designed to be
compatible with UDP and TCP at the interface level, is provided.
Today, VM Sockets is used by various VMware Tools components inside the guest
for zero-config, network-less access to VMware host services. In addition to
this, VMware's users are using VM Sockets for various applications, where
network access of the virtual machine is restricted or non-existent. Examples
of this are VMs communicating with device proxies for proprietary hardware
running as host applications and automated testing of applications running
within virtual machines.
The VMware VM Sockets are similar to other socket types, like Berkeley UNIX
socket interface. The VM Sockets module supports both connection-oriented
stream sockets like TCP, and connectionless datagram sockets like UDP. The VM
Sockets protocol family is defined as "AF_VSOCK" and the socket operations
split for SOCK_DGRAM and SOCK_STREAM.
For additional information about the use of VM Sockets, please refer to the
VM Sockets Programming Guide available at:
https://www.vmware.com/support/developer/vmci-sdk/
Signed-off-by: George Zhang <georgezhang@vmware.com>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy king <acking@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-02-06 21:23:56 +07:00
|
|
|
#define PF_VSOCK AF_VSOCK
|
2005-04-17 05:20:36 +07:00
|
|
|
#define PF_MAX AF_MAX
|
|
|
|
|
|
|
|
/* Maximum queue length specifiable by listen. */
|
|
|
|
#define SOMAXCONN 128
|
|
|
|
|
|
|
|
/* Flags we can use with send/ and recv.
|
|
|
|
Added those for 1003.1g not all are supported yet
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define MSG_OOB 1
|
|
|
|
#define MSG_PEEK 2
|
|
|
|
#define MSG_DONTROUTE 4
|
|
|
|
#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
|
|
|
|
#define MSG_CTRUNC 8
|
|
|
|
#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
|
|
|
|
#define MSG_TRUNC 0x20
|
|
|
|
#define MSG_DONTWAIT 0x40 /* Nonblocking io */
|
|
|
|
#define MSG_EOR 0x80 /* End of record */
|
|
|
|
#define MSG_WAITALL 0x100 /* Wait for a full request */
|
|
|
|
#define MSG_FIN 0x200
|
|
|
|
#define MSG_SYN 0x400
|
|
|
|
#define MSG_CONFIRM 0x800 /* Confirm path validity */
|
|
|
|
#define MSG_RST 0x1000
|
|
|
|
#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
|
|
|
|
#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
|
|
|
|
#define MSG_MORE 0x8000 /* Sender will send more */
|
2010-03-26 23:18:03 +07:00
|
|
|
#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
|
2012-04-05 10:05:35 +07:00
|
|
|
#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
|
2005-04-17 05:20:36 +07:00
|
|
|
#define MSG_EOF MSG_FIN
|
|
|
|
|
2012-07-19 13:43:09 +07:00
|
|
|
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
|
2014-11-06 03:44:27 +07:00
|
|
|
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
|
O_CLOEXEC for SCM_RIGHTS
Part two in the O_CLOEXEC saga: adding support for file descriptors received
through Unix domain sockets.
The patch is once again pretty minimal, it introduces a new flag for recvmsg
and passes it just like the existing MSG_CMSG_COMPAT flag. I think this bit
is not used otherwise but the networking people will know better.
This new flag is not recognized by recvfrom and recv. These functions cannot
be used for that purpose and the asymmetry this introduces is not worse than
the already existing MSG_CMSG_COMPAT situations.
The patch must be applied on the patch which introduced O_CLOEXEC. It has to
remove static from the new get_unused_fd_flags function but since scm.c cannot
live in a module the function still hasn't to be exported.
Here's a test program to make sure the code works. It's so much longer than
the actual patch...
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/un.h>
#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
#endif
#ifndef MSG_CMSG_CLOEXEC
# define MSG_CMSG_CLOEXEC 0x40000000
#endif
int
main (int argc, char *argv[])
{
if (argc > 1)
{
int fd = atol (argv[1]);
printf ("child: fd = %d\n", fd);
if (fcntl (fd, F_GETFD) == 0 || errno != EBADF)
{
puts ("file descriptor valid in child");
return 1;
}
return 0;
}
struct sockaddr_un sun;
strcpy (sun.sun_path, "./testsocket");
sun.sun_family = AF_UNIX;
char databuf[] = "hello";
struct iovec iov[1];
iov[0].iov_base = databuf;
iov[0].iov_len = sizeof (databuf);
union
{
struct cmsghdr hdr;
char bytes[CMSG_SPACE (sizeof (int))];
} buf;
struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
.msg_control = buf.bytes,
.msg_controllen = sizeof (buf) };
struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN (sizeof (int));
msg.msg_controllen = cmsg->cmsg_len;
pid_t child = fork ();
if (child == -1)
error (1, errno, "fork");
if (child == 0)
{
int sock = socket (PF_UNIX, SOCK_STREAM, 0);
if (sock < 0)
error (1, errno, "socket");
if (bind (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
error (1, errno, "bind");
if (listen (sock, SOMAXCONN) < 0)
error (1, errno, "listen");
int conn = accept (sock, NULL, NULL);
if (conn == -1)
error (1, errno, "accept");
*(int *) CMSG_DATA (cmsg) = sock;
if (sendmsg (conn, &msg, MSG_NOSIGNAL) < 0)
error (1, errno, "sendmsg");
return 0;
}
/* For a test suite this should be more robust like a
barrier in shared memory. */
sleep (1);
int sock = socket (PF_UNIX, SOCK_STREAM, 0);
if (sock < 0)
error (1, errno, "socket");
if (connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
error (1, errno, "connect");
unlink (sun.sun_path);
*(int *) CMSG_DATA (cmsg) = -1;
if (recvmsg (sock, &msg, MSG_CMSG_CLOEXEC) < 0)
error (1, errno, "recvmsg");
int fd = *(int *) CMSG_DATA (cmsg);
if (fd == -1)
error (1, 0, "no descriptor received");
char fdname[20];
snprintf (fdname, sizeof (fdname), "%d", fd);
execl ("/proc/self/exe", argv[0], fdname, NULL);
puts ("execl failed");
return 1;
}
[akpm@linux-foundation.org: Fix fastcall inconsistency noted by Michael Buesch]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 13:40:34 +07:00
|
|
|
descriptor received through
|
|
|
|
SCM_RIGHTS */
|
2005-04-17 05:20:36 +07:00
|
|
|
#if defined(CONFIG_COMPAT)
|
|
|
|
#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */
|
|
|
|
#else
|
|
|
|
#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
|
|
|
|
#define SOL_IP 0
|
|
|
|
/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
|
|
|
|
#define SOL_TCP 6
|
|
|
|
#define SOL_UDP 17
|
|
|
|
#define SOL_IPV6 41
|
|
|
|
#define SOL_ICMPV6 58
|
|
|
|
#define SOL_SCTP 132
|
2006-11-28 02:10:57 +07:00
|
|
|
#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */
|
2005-04-17 05:20:36 +07:00
|
|
|
#define SOL_RAW 255
|
|
|
|
#define SOL_IPX 256
|
|
|
|
#define SOL_AX25 257
|
|
|
|
#define SOL_ATALK 258
|
|
|
|
#define SOL_NETROM 259
|
|
|
|
#define SOL_ROSE 260
|
|
|
|
#define SOL_DECNET 261
|
|
|
|
#define SOL_X25 262
|
|
|
|
#define SOL_PACKET 263
|
|
|
|
#define SOL_ATM 264 /* ATM layer (cell level) */
|
|
|
|
#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */
|
|
|
|
#define SOL_IRDA 266
|
|
|
|
#define SOL_NETBEUI 267
|
|
|
|
#define SOL_LLC 268
|
2005-08-10 10:14:34 +07:00
|
|
|
#define SOL_DCCP 269
|
2005-08-16 02:32:15 +07:00
|
|
|
#define SOL_NETLINK 270
|
2006-01-03 01:04:38 +07:00
|
|
|
#define SOL_TIPC 271
|
2007-04-27 05:48:28 +07:00
|
|
|
#define SOL_RXRPC 272
|
2007-06-28 05:43:43 +07:00
|
|
|
#define SOL_PPPOL2TP 273
|
2007-10-20 20:28:08 +07:00
|
|
|
#define SOL_BLUETOOTH 274
|
2008-10-06 01:16:16 +07:00
|
|
|
#define SOL_PNPIPE 275
|
2009-02-27 14:41:38 +07:00
|
|
|
#define SOL_RDS 276
|
2009-04-22 06:26:22 +07:00
|
|
|
#define SOL_IUCV 277
|
2010-03-30 20:56:19 +07:00
|
|
|
#define SOL_CAIF 278
|
2010-10-19 20:04:42 +07:00
|
|
|
#define SOL_ALG 279
|
2013-02-22 16:53:25 +07:00
|
|
|
#define SOL_NFC 280
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* IPX options */
|
|
|
|
#define IPX_TYPE 1
|
|
|
|
|
|
|
|
extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
|
|
|
|
struct iovec *iov,
|
|
|
|
int offset,
|
2006-11-15 12:36:14 +07:00
|
|
|
unsigned int len, __wsum *csump);
|
2013-08-06 16:45:03 +07:00
|
|
|
extern unsigned long iov_pages(const struct iovec *iov, int offset,
|
|
|
|
unsigned long nr_segs);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2012-03-11 19:51:50 +07:00
|
|
|
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
|
|
|
|
|
2009-10-13 13:40:10 +07:00
|
|
|
struct timespec;
|
|
|
|
|
2013-06-06 02:38:26 +07:00
|
|
|
/* The __sys_...msg variants allow MSG_CMSG_COMPAT */
|
separate kernel- and userland-side msghdr
Kernel-side struct msghdr is (currently) using the same layout as
userland one, but it's not a one-to-one copy - even without considering
32bit compat issues, we have msg_iov, msg_name and msg_control copied
to kernel[1]. It's fairly localized, so we get away with a few functions
where that knowledge is needed (and we could shrink that set even
more). Pretty much everything deals with the kernel-side variant and
the few places that want userland one just use a bunch of force-casts
to paper over the differences.
The thing is, kernel-side definition of struct msghdr is *not* exposed
in include/uapi - libc doesn't see it, etc. So we can add struct user_msghdr,
with proper annotations and let the few places that ever deal with those
beasts use it for userland pointers. Saner typechecking aside, that will
allow to change the layout of kernel-side msghdr - e.g. replace
msg_iov/msg_iovlen there with struct iov_iter, getting rid of the need
to modify the iovec as we copy data to/from it, etc.
We could introduce kernel_msghdr instead, but that would create much more
noise - the absolute majority of the instances would need to have the
type switched to kernel_msghdr and definition of struct msghdr in
include/linux/socket.h is not going to be seen by userland anyway.
This commit just introduces user_msghdr and switches the few places that
are dealing with userland-side msghdr to it.
[1] actually, it's even trickier than that - we copy msg_control for
sendmsg, but keep the userland address on recvmsg.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-04-07 01:03:05 +07:00
|
|
|
extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
|
|
|
|
extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
|
2009-10-13 13:40:10 +07:00
|
|
|
extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
|
|
|
|
unsigned int flags, struct timespec *timeout);
|
2011-05-03 03:21:35 +07:00
|
|
|
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
|
|
|
|
unsigned int vlen, unsigned int flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif /* _LINUX_SOCKET_H */
|