mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 10:40:53 +07:00
mptcp: Handle MP_CAPABLE options for outgoing connections
Add hooks to tcp_output.c to add MP_CAPABLE to an outgoing SYN request, to capture the MP_CAPABLE in the received SYN-ACK, to add MP_CAPABLE to the final ACK of the three-way handshake. Use the .sk_rx_dst_set() handler in the subflow proto to capture when the responding SYN-ACK is received and notify the MPTCP connection layer. Co-developed-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
2303f994b3
commit
cec37a6e41
@ -137,6 +137,9 @@ struct tcp_request_sock {
|
||||
const struct tcp_request_sock_ops *af_specific;
|
||||
u64 snt_synack; /* first SYNACK sent time */
|
||||
bool tfo_listener;
|
||||
#if IS_ENABLED(CONFIG_MPTCP)
|
||||
bool is_mptcp;
|
||||
#endif
|
||||
u32 txhash;
|
||||
u32 rcv_isn;
|
||||
u32 snt_isn;
|
||||
|
@ -39,8 +39,27 @@ struct mptcp_out_options {
|
||||
|
||||
void mptcp_init(void);
|
||||
|
||||
static inline bool sk_is_mptcp(const struct sock *sk)
|
||||
{
|
||||
return tcp_sk(sk)->is_mptcp;
|
||||
}
|
||||
|
||||
static inline bool rsk_is_mptcp(const struct request_sock *req)
|
||||
{
|
||||
return tcp_rsk(req)->is_mptcp;
|
||||
}
|
||||
|
||||
void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
||||
struct tcp_options_received *opt_rx);
|
||||
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||
struct mptcp_out_options *opts);
|
||||
void mptcp_rcv_synsent(struct sock *sk);
|
||||
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
|
||||
struct mptcp_out_options *opts);
|
||||
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
|
||||
unsigned int *size, unsigned int remaining,
|
||||
struct mptcp_out_options *opts);
|
||||
|
||||
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
|
||||
|
||||
/* move the skb extension owership, with the assumption that 'to' is
|
||||
@ -89,11 +108,47 @@ static inline void mptcp_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool sk_is_mptcp(const struct sock *sk)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool rsk_is_mptcp(const struct request_sock *req)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
||||
struct tcp_options_received *opt_rx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mptcp_rcv_synsent(struct sock *sk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool mptcp_synack_options(const struct request_sock *req,
|
||||
unsigned int *size,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mptcp_established_options(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
unsigned int *size,
|
||||
unsigned int remaining,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mptcp_skb_ext_move(struct sk_buff *to,
|
||||
const struct sk_buff *from)
|
||||
{
|
||||
@ -107,6 +162,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
|
||||
|
||||
#endif /* CONFIG_MPTCP */
|
||||
|
||||
void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped);
|
||||
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
int mptcpv6_init(void);
|
||||
#elif IS_ENABLED(CONFIG_IPV6)
|
||||
|
@ -5978,6 +5978,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
||||
tcp_initialize_rcv_mss(sk);
|
||||
|
||||
if (sk_is_mptcp(sk))
|
||||
mptcp_rcv_synsent(sk);
|
||||
|
||||
/* Remember, tcp_poll() does not lock socket!
|
||||
* Change state from SYN-SENT only after copied_seq
|
||||
* is initialized. */
|
||||
@ -6600,6 +6603,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
||||
|
||||
tcp_rsk(req)->af_specific = af_ops;
|
||||
tcp_rsk(req)->ts_off = 0;
|
||||
#if IS_ENABLED(CONFIG_MPTCP)
|
||||
tcp_rsk(req)->is_mptcp = 0;
|
||||
#endif
|
||||
|
||||
tcp_clear_options(&tmp_opt);
|
||||
tmp_opt.mss_clamp = af_ops->mss_clamp;
|
||||
|
@ -597,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp,
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mptcp_set_option_cond(const struct request_sock *req,
|
||||
struct tcp_out_options *opts,
|
||||
unsigned int *remaining)
|
||||
{
|
||||
if (rsk_is_mptcp(req)) {
|
||||
unsigned int size;
|
||||
|
||||
if (mptcp_synack_options(req, &size, &opts->mptcp)) {
|
||||
if (*remaining >= size) {
|
||||
opts->options |= OPTION_MPTCP;
|
||||
*remaining -= size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute TCP options for SYN packets. This is not the final
|
||||
* network wire format yet.
|
||||
*/
|
||||
@ -666,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
|
||||
|
||||
smc_set_option(tp, opts, &remaining);
|
||||
|
||||
if (sk_is_mptcp(sk)) {
|
||||
unsigned int size;
|
||||
|
||||
if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
|
||||
opts->options |= OPTION_MPTCP;
|
||||
remaining -= size;
|
||||
}
|
||||
}
|
||||
|
||||
return MAX_TCP_OPTION_SPACE - remaining;
|
||||
}
|
||||
|
||||
@ -727,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
|
||||
}
|
||||
}
|
||||
|
||||
mptcp_set_option_cond(req, opts, &remaining);
|
||||
|
||||
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
|
||||
|
||||
return MAX_TCP_OPTION_SPACE - remaining;
|
||||
@ -764,6 +791,23 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
|
||||
size += TCPOLEN_TSTAMP_ALIGNED;
|
||||
}
|
||||
|
||||
/* MPTCP options have precedence over SACK for the limited TCP
|
||||
* option space because a MPTCP connection would be forced to
|
||||
* fall back to regular TCP if a required multipath option is
|
||||
* missing. SACK still gets a chance to use whatever space is
|
||||
* left.
|
||||
*/
|
||||
if (sk_is_mptcp(sk)) {
|
||||
unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
||||
unsigned int opt_size = 0;
|
||||
|
||||
if (mptcp_established_options(sk, skb, &opt_size, remaining,
|
||||
&opts->mptcp)) {
|
||||
opts->options |= OPTION_MPTCP;
|
||||
size += opt_size;
|
||||
}
|
||||
}
|
||||
|
||||
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
|
||||
if (unlikely(eff_sacks)) {
|
||||
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
||||
|
@ -238,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
|
||||
|
||||
icsk->icsk_af_ops = &ipv6_mapped;
|
||||
if (sk_is_mptcp(sk))
|
||||
mptcp_handle_ipv6_mapped(sk, true);
|
||||
sk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||
@ -248,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
if (err) {
|
||||
icsk->icsk_ext_hdr_len = exthdrlen;
|
||||
icsk->icsk_af_ops = &ipv6_specific;
|
||||
if (sk_is_mptcp(sk))
|
||||
mptcp_handle_ipv6_mapped(sk, false);
|
||||
sk->sk_backlog_rcv = tcp_v6_do_rcv;
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
tp->af_specific = &tcp_sock_ipv6_specific;
|
||||
@ -1203,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
||||
newnp->saddr = newsk->sk_v6_rcv_saddr;
|
||||
|
||||
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
|
||||
if (sk_is_mptcp(newsk))
|
||||
mptcp_handle_ipv6_mapped(newsk, true);
|
||||
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||
|
@ -72,14 +72,114 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
||||
}
|
||||
}
|
||||
|
||||
void mptcp_get_options(const struct sk_buff *skb,
|
||||
struct tcp_options_received *opt_rx)
|
||||
{
|
||||
const unsigned char *ptr;
|
||||
const struct tcphdr *th = tcp_hdr(skb);
|
||||
int length = (th->doff * 4) - sizeof(struct tcphdr);
|
||||
|
||||
ptr = (const unsigned char *)(th + 1);
|
||||
|
||||
while (length > 0) {
|
||||
int opcode = *ptr++;
|
||||
int opsize;
|
||||
|
||||
switch (opcode) {
|
||||
case TCPOPT_EOL:
|
||||
return;
|
||||
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
|
||||
length--;
|
||||
continue;
|
||||
default:
|
||||
opsize = *ptr++;
|
||||
if (opsize < 2) /* "silly options" */
|
||||
return;
|
||||
if (opsize > length)
|
||||
return; /* don't parse partial options */
|
||||
if (opcode == TCPOPT_MPTCP)
|
||||
mptcp_parse_option(ptr, opsize, opt_rx);
|
||||
ptr += opsize - 2;
|
||||
length -= opsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
|
||||
if (subflow->request_mptcp) {
|
||||
pr_debug("local_key=%llu", subflow->local_key);
|
||||
opts->suboptions = OPTION_MPTCP_MPC_SYN;
|
||||
opts->sndr_key = subflow->local_key;
|
||||
*size = TCPOLEN_MPTCP_MPC_SYN;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void mptcp_rcv_synsent(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
pr_debug("subflow=%p", subflow);
|
||||
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
|
||||
subflow->mp_capable = 1;
|
||||
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
|
||||
} else {
|
||||
tcp_sk(sk)->is_mptcp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
|
||||
unsigned int *size, unsigned int remaining,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
|
||||
if (subflow->mp_capable && !subflow->fourth_ack) {
|
||||
opts->suboptions = OPTION_MPTCP_MPC_ACK;
|
||||
opts->sndr_key = subflow->local_key;
|
||||
opts->rcvr_key = subflow->remote_key;
|
||||
*size = TCPOLEN_MPTCP_MPC_ACK;
|
||||
subflow->fourth_ack = 1;
|
||||
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
|
||||
subflow, subflow->local_key, subflow->remote_key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
|
||||
struct mptcp_out_options *opts)
|
||||
{
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
|
||||
if (subflow_req->mp_capable) {
|
||||
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
|
||||
opts->sndr_key = subflow_req->local_key;
|
||||
*size = TCPOLEN_MPTCP_MPC_SYNACK;
|
||||
pr_debug("subflow_req=%p, local_key=%llu",
|
||||
subflow_req, subflow_req->local_key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
|
||||
{
|
||||
if ((OPTION_MPTCP_MPC_SYN |
|
||||
OPTION_MPTCP_MPC_SYNACK |
|
||||
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
|
||||
u8 len;
|
||||
|
||||
if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
|
||||
len = TCPOLEN_MPTCP_MPC_SYN;
|
||||
else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
|
||||
len = TCPOLEN_MPTCP_MPC_SYNACK;
|
||||
else
|
||||
len = TCPOLEN_MPTCP_MPC_ACK;
|
||||
|
||||
|
@ -25,12 +25,28 @@
|
||||
*/
|
||||
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
|
||||
{
|
||||
if (!msk->subflow)
|
||||
if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack)
|
||||
return NULL;
|
||||
|
||||
return msk->subflow;
|
||||
}
|
||||
|
||||
/* if msk has a single subflow, and the mp_capable handshake is failed,
|
||||
* return it.
|
||||
* Otherwise returns NULL
|
||||
*/
|
||||
static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
|
||||
{
|
||||
struct socket *ssock = __mptcp_nmpc_socket(msk);
|
||||
|
||||
sock_owned_by_me((const struct sock *)msk);
|
||||
|
||||
if (!ssock || sk_is_mptcp(ssock->sk))
|
||||
return NULL;
|
||||
|
||||
return ssock;
|
||||
}
|
||||
|
||||
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
|
||||
{
|
||||
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
|
||||
@ -56,6 +72,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
|
||||
|
||||
msk->subflow = ssock;
|
||||
subflow = mptcp_subflow_ctx(ssock->sk);
|
||||
list_add(&subflow->node, &msk->conn_list);
|
||||
subflow->request_mptcp = 1;
|
||||
|
||||
set_state:
|
||||
@ -64,66 +81,169 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
|
||||
return ssock;
|
||||
}
|
||||
|
||||
static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
|
||||
sock_owned_by_me((const struct sock *)msk);
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
return mptcp_subflow_tcp_sock(subflow);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct socket *subflow = msk->subflow;
|
||||
struct socket *ssock;
|
||||
struct sock *ssk;
|
||||
int ret;
|
||||
|
||||
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return sock_sendmsg(subflow, msg);
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_tcp_fallback(msk);
|
||||
if (ssock) {
|
||||
pr_debug("fallback passthrough");
|
||||
ret = sock_sendmsg(ssock, msg);
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssk = mptcp_subflow_get(msk);
|
||||
if (!ssk) {
|
||||
release_sock(sk);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
ret = sock_sendmsg(ssk->sk_socket, msg);
|
||||
|
||||
release_sock(sk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
int nonblock, int flags, int *addr_len)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct socket *subflow = msk->subflow;
|
||||
struct socket *ssock;
|
||||
struct sock *ssk;
|
||||
int copied = 0;
|
||||
|
||||
if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return sock_recvmsg(subflow, msg, flags);
|
||||
lock_sock(sk);
|
||||
ssock = __mptcp_tcp_fallback(msk);
|
||||
if (ssock) {
|
||||
pr_debug("fallback-read subflow=%p",
|
||||
mptcp_subflow_ctx(ssock->sk));
|
||||
copied = sock_recvmsg(ssock, msg, flags);
|
||||
release_sock(sk);
|
||||
return copied;
|
||||
}
|
||||
|
||||
ssk = mptcp_subflow_get(msk);
|
||||
if (!ssk) {
|
||||
release_sock(sk);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
|
||||
copied = sock_recvmsg(ssk->sk_socket, msg, flags);
|
||||
|
||||
release_sock(sk);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
/* subflow sockets can be either outgoing (connect) or incoming
|
||||
* (accept).
|
||||
*
|
||||
* Outgoing subflows use in-kernel sockets.
|
||||
* Incoming subflows do not have their own 'struct socket' allocated,
|
||||
* so we need to use tcp_close() after detaching them from the mptcp
|
||||
* parent socket.
|
||||
*/
|
||||
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
struct mptcp_subflow_context *subflow,
|
||||
long timeout)
|
||||
{
|
||||
struct socket *sock = READ_ONCE(ssk->sk_socket);
|
||||
|
||||
list_del(&subflow->node);
|
||||
|
||||
if (sock && sock != sk->sk_socket) {
|
||||
/* outgoing subflow */
|
||||
sock_release(sock);
|
||||
} else {
|
||||
/* incoming subflow */
|
||||
tcp_close(ssk, timeout);
|
||||
}
|
||||
}
|
||||
|
||||
static int mptcp_init_sock(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
INIT_LIST_HEAD(&msk->conn_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mptcp_close(struct sock *sk, long timeout)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct socket *ssock;
|
||||
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
if (ssock) {
|
||||
pr_debug("subflow=%p", mptcp_subflow_ctx(ssock->sk));
|
||||
sock_release(ssock);
|
||||
lock_sock(sk);
|
||||
|
||||
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
__mptcp_close_ssk(sk, ssk, subflow, timeout);
|
||||
}
|
||||
|
||||
sock_orphan(sk);
|
||||
sock_put(sk);
|
||||
release_sock(sk);
|
||||
sk_common_release(sk);
|
||||
}
|
||||
|
||||
static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
|
||||
static int mptcp_get_port(struct sock *sk, unsigned short snum)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
int err;
|
||||
struct socket *ssock;
|
||||
|
||||
saddr->sa_family = AF_INET;
|
||||
ssock = __mptcp_nmpc_socket(msk);
|
||||
pr_debug("msk=%p, subflow=%p", msk, ssock);
|
||||
if (WARN_ON_ONCE(!ssock))
|
||||
return -EINVAL;
|
||||
|
||||
pr_debug("msk=%p, subflow=%p", msk,
|
||||
mptcp_subflow_ctx(msk->subflow->sk));
|
||||
return inet_csk_get_port(ssock->sk, snum);
|
||||
}
|
||||
|
||||
err = kernel_connect(msk->subflow, saddr, len, 0);
|
||||
void mptcp_finish_connect(struct sock *ssk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct mptcp_sock *msk;
|
||||
struct sock *sk;
|
||||
|
||||
sk->sk_state = TCP_ESTABLISHED;
|
||||
subflow = mptcp_subflow_ctx(ssk);
|
||||
|
||||
return err;
|
||||
if (!subflow->mp_capable)
|
||||
return;
|
||||
|
||||
sk = subflow->conn;
|
||||
msk = mptcp_sk(sk);
|
||||
|
||||
/* the socket is not connected yet, no msk/subflow ops can access/race
|
||||
* accessing the field below
|
||||
*/
|
||||
WRITE_ONCE(msk->remote_key, subflow->remote_key);
|
||||
WRITE_ONCE(msk->local_key, subflow->local_key);
|
||||
}
|
||||
|
||||
static struct proto mptcp_prot = {
|
||||
@ -132,13 +252,12 @@ static struct proto mptcp_prot = {
|
||||
.init = mptcp_init_sock,
|
||||
.close = mptcp_close,
|
||||
.accept = inet_csk_accept,
|
||||
.connect = mptcp_connect,
|
||||
.shutdown = tcp_shutdown,
|
||||
.sendmsg = mptcp_sendmsg,
|
||||
.recvmsg = mptcp_recvmsg,
|
||||
.hash = inet_hash,
|
||||
.unhash = inet_unhash,
|
||||
.get_port = inet_csk_get_port,
|
||||
.get_port = mptcp_get_port,
|
||||
.obj_size = sizeof(struct mptcp_sock),
|
||||
.no_autobind = true,
|
||||
};
|
||||
|
@ -40,19 +40,47 @@
|
||||
struct mptcp_sock {
|
||||
/* inet_connection_sock must be the first member */
|
||||
struct inet_connection_sock sk;
|
||||
u64 local_key;
|
||||
u64 remote_key;
|
||||
struct list_head conn_list;
|
||||
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
|
||||
};
|
||||
|
||||
#define mptcp_for_each_subflow(__msk, __subflow) \
|
||||
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
|
||||
|
||||
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
|
||||
{
|
||||
return (struct mptcp_sock *)sk;
|
||||
}
|
||||
|
||||
struct mptcp_subflow_request_sock {
|
||||
struct tcp_request_sock sk;
|
||||
u8 mp_capable : 1,
|
||||
mp_join : 1,
|
||||
backup : 1;
|
||||
u64 local_key;
|
||||
u64 remote_key;
|
||||
};
|
||||
|
||||
static inline struct mptcp_subflow_request_sock *
|
||||
mptcp_subflow_rsk(const struct request_sock *rsk)
|
||||
{
|
||||
return (struct mptcp_subflow_request_sock *)rsk;
|
||||
}
|
||||
|
||||
/* MPTCP subflow context */
|
||||
struct mptcp_subflow_context {
|
||||
u32 request_mptcp : 1; /* send MP_CAPABLE */
|
||||
struct list_head node;/* conn_list of subflows */
|
||||
u64 local_key;
|
||||
u64 remote_key;
|
||||
u32 request_mptcp : 1, /* send MP_CAPABLE */
|
||||
mp_capable : 1, /* remote is MPTCP capable */
|
||||
fourth_ack : 1, /* send initial DSS */
|
||||
conn_finished : 1;
|
||||
struct sock *tcp_sock; /* tcp sk backpointer */
|
||||
struct sock *conn; /* parent mptcp_sock */
|
||||
const struct inet_connection_sock_af_ops *icsk_af_ops;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
@ -74,4 +102,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
|
||||
void mptcp_subflow_init(void);
|
||||
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
|
||||
|
||||
extern const struct inet_connection_sock_af_ops ipv4_specific;
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
extern const struct inet_connection_sock_af_ops ipv6_specific;
|
||||
#endif
|
||||
|
||||
void mptcp_get_options(const struct sk_buff *skb,
|
||||
struct tcp_options_received *opt_rx);
|
||||
|
||||
void mptcp_finish_connect(struct sock *sk);
|
||||
|
||||
#endif /* __MPTCP_PROTOCOL_H */
|
||||
|
@ -12,9 +12,188 @@
|
||||
#include <net/inet_hashtables.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/tcp.h>
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
#include <net/ip6_route.h>
|
||||
#endif
|
||||
#include <net/mptcp.h>
|
||||
#include "protocol.h"
|
||||
|
||||
static void subflow_init_req(struct request_sock *req,
|
||||
const struct sock *sk_listener,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
struct tcp_options_received rx_opt;
|
||||
|
||||
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
|
||||
|
||||
memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
|
||||
mptcp_get_options(skb, &rx_opt);
|
||||
|
||||
subflow_req->mp_capable = 0;
|
||||
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
|
||||
* TCP option space.
|
||||
*/
|
||||
if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
|
||||
subflow_req->mp_capable = 1;
|
||||
subflow_req->remote_key = rx_opt.mptcp.sndr_key;
|
||||
}
|
||||
}
|
||||
|
||||
static void subflow_v4_init_req(struct request_sock *req,
|
||||
const struct sock *sk_listener,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
tcp_rsk(req)->is_mptcp = 1;
|
||||
|
||||
tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
|
||||
|
||||
subflow_init_req(req, sk_listener, skb);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
static void subflow_v6_init_req(struct request_sock *req,
|
||||
const struct sock *sk_listener,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
tcp_rsk(req)->is_mptcp = 1;
|
||||
|
||||
tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
|
||||
|
||||
subflow_init_req(req, sk_listener, skb);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
|
||||
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
|
||||
|
||||
if (subflow->conn && !subflow->conn_finished) {
|
||||
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
|
||||
subflow->remote_key);
|
||||
mptcp_finish_connect(sk);
|
||||
subflow->conn_finished = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct request_sock_ops subflow_request_sock_ops;
|
||||
static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
|
||||
|
||||
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
|
||||
pr_debug("subflow=%p", subflow);
|
||||
|
||||
/* Never answer to SYNs sent to broadcast or multicast */
|
||||
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
|
||||
goto drop;
|
||||
|
||||
return tcp_conn_request(&subflow_request_sock_ops,
|
||||
&subflow_request_sock_ipv4_ops,
|
||||
sk, skb);
|
||||
drop:
|
||||
tcp_listendrop(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
|
||||
static struct inet_connection_sock_af_ops subflow_v6_specific;
|
||||
static struct inet_connection_sock_af_ops subflow_v6m_specific;
|
||||
|
||||
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
|
||||
pr_debug("subflow=%p", subflow);
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
return subflow_v4_conn_request(sk, skb);
|
||||
|
||||
if (!ipv6_unicast_destination(skb))
|
||||
goto drop;
|
||||
|
||||
return tcp_conn_request(&subflow_request_sock_ops,
|
||||
&subflow_request_sock_ipv6_ops, sk, skb);
|
||||
|
||||
drop:
|
||||
tcp_listendrop(sk);
|
||||
return 0; /* don't send reset */
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
struct request_sock *req,
|
||||
struct dst_entry *dst,
|
||||
struct request_sock *req_unhash,
|
||||
bool *own_req)
|
||||
{
|
||||
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
|
||||
struct sock *child;
|
||||
|
||||
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
|
||||
|
||||
/* if the sk is MP_CAPABLE, we already received the client key */
|
||||
|
||||
child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
|
||||
req_unhash, own_req);
|
||||
|
||||
if (child && *own_req) {
|
||||
if (!mptcp_subflow_ctx(child)) {
|
||||
pr_debug("Closing child socket");
|
||||
inet_sk_set_state(child, TCP_CLOSE);
|
||||
sock_set_flag(child, SOCK_DEAD);
|
||||
inet_csk_destroy_sock(child);
|
||||
child = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return child;
|
||||
}
|
||||
|
||||
static struct inet_connection_sock_af_ops subflow_specific;
|
||||
|
||||
static struct inet_connection_sock_af_ops *
|
||||
subflow_default_af_ops(struct sock *sk)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
if (sk->sk_family == AF_INET6)
|
||||
return &subflow_v6_specific;
|
||||
#endif
|
||||
return &subflow_specific;
|
||||
}
|
||||
|
||||
void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct inet_connection_sock_af_ops *target;
|
||||
|
||||
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
|
||||
|
||||
pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
|
||||
subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
|
||||
|
||||
if (likely(icsk->icsk_af_ops == target))
|
||||
return;
|
||||
|
||||
subflow->icsk_af_ops = icsk->icsk_af_ops;
|
||||
icsk->icsk_af_ops = target;
|
||||
#endif
|
||||
}
|
||||
|
||||
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
@ -22,7 +201,8 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
||||
struct socket *sf;
|
||||
int err;
|
||||
|
||||
err = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sf);
|
||||
err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
|
||||
&sf);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -60,6 +240,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
|
||||
return NULL;
|
||||
|
||||
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
|
||||
INIT_LIST_HEAD(&ctx->node);
|
||||
|
||||
pr_debug("subflow=%p", ctx);
|
||||
|
||||
@ -70,6 +251,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
|
||||
|
||||
static int subflow_ulp_init(struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct mptcp_subflow_context *ctx;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int err = 0;
|
||||
@ -91,6 +273,8 @@ static int subflow_ulp_init(struct sock *sk)
|
||||
pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
|
||||
|
||||
tp->is_mptcp = 1;
|
||||
ctx->icsk_af_ops = icsk->icsk_af_ops;
|
||||
icsk->icsk_af_ops = subflow_default_af_ops(sk);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@ -105,15 +289,97 @@ static void subflow_ulp_release(struct sock *sk)
|
||||
kfree_rcu(ctx, rcu);
|
||||
}
|
||||
|
||||
static void subflow_ulp_fallback(struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
icsk->icsk_ulp_ops = NULL;
|
||||
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
|
||||
tcp_sk(sk)->is_mptcp = 0;
|
||||
}
|
||||
|
||||
static void subflow_ulp_clone(const struct request_sock *req,
|
||||
struct sock *newsk,
|
||||
const gfp_t priority)
|
||||
{
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
|
||||
struct mptcp_subflow_context *new_ctx;
|
||||
|
||||
if (!subflow_req->mp_capable) {
|
||||
subflow_ulp_fallback(newsk);
|
||||
return;
|
||||
}
|
||||
|
||||
new_ctx = subflow_create_ctx(newsk, priority);
|
||||
if (new_ctx == NULL) {
|
||||
subflow_ulp_fallback(newsk);
|
||||
return;
|
||||
}
|
||||
|
||||
new_ctx->conn_finished = 1;
|
||||
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
|
||||
new_ctx->mp_capable = 1;
|
||||
new_ctx->fourth_ack = 1;
|
||||
new_ctx->remote_key = subflow_req->remote_key;
|
||||
new_ctx->local_key = subflow_req->local_key;
|
||||
}
|
||||
|
||||
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
|
||||
.name = "mptcp",
|
||||
.owner = THIS_MODULE,
|
||||
.init = subflow_ulp_init,
|
||||
.release = subflow_ulp_release,
|
||||
.clone = subflow_ulp_clone,
|
||||
};
|
||||
|
||||
static int subflow_ops_init(struct request_sock_ops *subflow_ops)
|
||||
{
|
||||
subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
|
||||
subflow_ops->slab_name = "request_sock_subflow";
|
||||
|
||||
subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
|
||||
subflow_ops->obj_size, 0,
|
||||
SLAB_ACCOUNT |
|
||||
SLAB_TYPESAFE_BY_RCU,
|
||||
NULL);
|
||||
if (!subflow_ops->slab)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mptcp_subflow_init(void)
|
||||
{
|
||||
subflow_request_sock_ops = tcp_request_sock_ops;
|
||||
if (subflow_ops_init(&subflow_request_sock_ops) != 0)
|
||||
panic("MPTCP: failed to init subflow request sock ops\n");
|
||||
|
||||
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
|
||||
subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
|
||||
|
||||
subflow_specific = ipv4_specific;
|
||||
subflow_specific.conn_request = subflow_v4_conn_request;
|
||||
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
|
||||
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
|
||||
|
||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
|
||||
subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
|
||||
|
||||
subflow_v6_specific = ipv6_specific;
|
||||
subflow_v6_specific.conn_request = subflow_v6_conn_request;
|
||||
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
|
||||
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
|
||||
|
||||
subflow_v6m_specific = subflow_v6_specific;
|
||||
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
|
||||
subflow_v6m_specific.send_check = ipv4_specific.send_check;
|
||||
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
|
||||
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
|
||||
subflow_v6m_specific.net_frag_header_len = 0;
|
||||
#endif
|
||||
|
||||
if (tcp_register_ulp(&subflow_ulp_ops) != 0)
|
||||
panic("MPTCP: failed to register subflows to ULP\n");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user