mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-16 01:56:57 +07:00
Merge branch 'sctp-add-support-for-sk_reuseport'
Xin Long says: ==================== sctp: add support for sk_reuseport sctp sk_reuseport allows multiple socks to listen on the same port and addresses, as long as these socks have the same uid. This works pretty much as TCP/UDP does, the only difference is that sctp is multi-homing and all the bind_addrs in these socks will have to completely matched, otherwise listen() will return err. The below is when 5 sockets are listening on 172.16.254.254:6400 on a server, 26 sockets on a client connect to 172.16.254.254:6400 and each may be processed by a different socket on the server which is selected by hash(lport, pport, paddr) in reuseport_select_sock(): # ss --sctp -nn State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 10 172.16.254.254:6400 *:* `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.2.1:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.2.4:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.3.3:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.3.4:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.5.2:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.5.3:1234 LISTEN 0 10 172.16.254.254:6400 *:* `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.1.3:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.1.4:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.3.2:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.4.1:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.4.2:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.4.3:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.4.4:1234 LISTEN 0 10 172.16.254.254:6400 *:* `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.1.2:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.3.5:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.4.5:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.253.253:1234 LISTEN 0 10 172.16.254.254:6400 *:* `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.2.2:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.2.3:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.5.4:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.5.5:1234 LISTEN 0 10 172.16.254.254:6400 *:* `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.1.1:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.1.5:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.2.5:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.3.1:1234 `- ESTAB 0 0 172.16.254.254%eth1:6400 172.16.5.1:1234 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
261501d94e
@ -152,7 +152,7 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc,
|
||||
*/
|
||||
int sctp_rcv(struct sk_buff *skb);
|
||||
int sctp_v4_err(struct sk_buff *skb, u32 info);
|
||||
void sctp_hash_endpoint(struct sctp_endpoint *);
|
||||
int sctp_hash_endpoint(struct sctp_endpoint *ep);
|
||||
void sctp_unhash_endpoint(struct sctp_endpoint *);
|
||||
struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
|
||||
struct sctphdr *, struct sctp_association **,
|
||||
|
@ -96,7 +96,9 @@ struct sctp_stream;
|
||||
|
||||
struct sctp_bind_bucket {
|
||||
unsigned short port;
|
||||
unsigned short fastreuse;
|
||||
signed char fastreuse;
|
||||
signed char fastreuseport;
|
||||
kuid_t fastuid;
|
||||
struct hlist_node node;
|
||||
struct hlist_head owner;
|
||||
struct net *net;
|
||||
@ -1190,6 +1192,8 @@ int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *,
|
||||
struct sctp_sock *, struct sctp_sock *);
|
||||
int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
|
||||
const union sctp_addr *addr);
|
||||
int sctp_bind_addrs_check(struct sctp_sock *sp,
|
||||
struct sctp_sock *sp2, int cnt2);
|
||||
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
|
||||
const union sctp_addr *addrs,
|
||||
int addrcnt,
|
||||
|
@ -187,6 +187,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
|
||||
call_rcu(&old_reuse->rcu, reuseport_free_rcu);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reuseport_add_sock);
|
||||
|
||||
void reuseport_detach_sock(struct sock *sk)
|
||||
{
|
||||
|
@ -337,6 +337,34 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
|
||||
return match;
|
||||
}
|
||||
|
||||
int sctp_bind_addrs_check(struct sctp_sock *sp,
|
||||
struct sctp_sock *sp2, int cnt2)
|
||||
{
|
||||
struct sctp_bind_addr *bp2 = &sp2->ep->base.bind_addr;
|
||||
struct sctp_bind_addr *bp = &sp->ep->base.bind_addr;
|
||||
struct sctp_sockaddr_entry *laddr, *laddr2;
|
||||
bool exist = false;
|
||||
int cnt = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
|
||||
list_for_each_entry_rcu(laddr2, &bp2->address_list, list) {
|
||||
if (sp->pf->af->cmp_addr(&laddr->a, &laddr2->a) &&
|
||||
laddr->valid && laddr2->valid) {
|
||||
exist = true;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
cnt = 0;
|
||||
break;
|
||||
next:
|
||||
cnt++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return (cnt == cnt2) ? 0 : (exist ? -EEXIST : 1);
|
||||
}
|
||||
|
||||
/* Does the address 'addr' conflict with any addresses in
|
||||
* the bp.
|
||||
*/
|
||||
|
129
net/sctp/input.c
129
net/sctp/input.c
@ -57,6 +57,7 @@
|
||||
#include <net/sctp/checksum.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <net/sock_reuseport.h>
|
||||
|
||||
/* Forward declarations for internal helpers. */
|
||||
static int sctp_rcv_ootb(struct sk_buff *);
|
||||
@ -65,8 +66,10 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net,
|
||||
const union sctp_addr *paddr,
|
||||
const union sctp_addr *laddr,
|
||||
struct sctp_transport **transportp);
|
||||
static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
|
||||
const union sctp_addr *laddr);
|
||||
static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
|
||||
struct net *net, struct sk_buff *skb,
|
||||
const union sctp_addr *laddr,
|
||||
const union sctp_addr *daddr);
|
||||
static struct sctp_association *__sctp_lookup_association(
|
||||
struct net *net,
|
||||
const union sctp_addr *local,
|
||||
@ -171,7 +174,7 @@ int sctp_rcv(struct sk_buff *skb)
|
||||
asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
|
||||
|
||||
if (!asoc)
|
||||
ep = __sctp_rcv_lookup_endpoint(net, &dest);
|
||||
ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src);
|
||||
|
||||
/* Retrieve the common input handling substructure. */
|
||||
rcvr = asoc ? &asoc->base : &ep->base;
|
||||
@ -721,43 +724,87 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* Insert endpoint into the hash table. */
|
||||
static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
|
||||
static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
|
||||
{
|
||||
struct net *net = sock_net(ep->base.sk);
|
||||
struct sctp_ep_common *epb;
|
||||
struct sock *sk = ep->base.sk;
|
||||
struct net *net = sock_net(sk);
|
||||
struct sctp_hashbucket *head;
|
||||
struct sctp_ep_common *epb;
|
||||
|
||||
epb = &ep->base;
|
||||
|
||||
epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
|
||||
head = &sctp_ep_hashtable[epb->hashent];
|
||||
|
||||
if (sk->sk_reuseport) {
|
||||
bool any = sctp_is_ep_boundall(sk);
|
||||
struct sctp_ep_common *epb2;
|
||||
struct list_head *list;
|
||||
int cnt = 0, err = 1;
|
||||
|
||||
list_for_each(list, &ep->base.bind_addr.address_list)
|
||||
cnt++;
|
||||
|
||||
sctp_for_each_hentry(epb2, &head->chain) {
|
||||
struct sock *sk2 = epb2->sk;
|
||||
|
||||
if (!net_eq(sock_net(sk2), net) || sk2 == sk ||
|
||||
!uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) ||
|
||||
!sk2->sk_reuseport)
|
||||
continue;
|
||||
|
||||
err = sctp_bind_addrs_check(sctp_sk(sk2),
|
||||
sctp_sk(sk), cnt);
|
||||
if (!err) {
|
||||
err = reuseport_add_sock(sk, sk2, any);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
} else if (err < 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
err = reuseport_alloc(sk, any);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
write_lock(&head->lock);
|
||||
hlist_add_head(&epb->node, &head->chain);
|
||||
write_unlock(&head->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add an endpoint to the hash. Local BH-safe. */
|
||||
void sctp_hash_endpoint(struct sctp_endpoint *ep)
|
||||
int sctp_hash_endpoint(struct sctp_endpoint *ep)
|
||||
{
|
||||
int err;
|
||||
|
||||
local_bh_disable();
|
||||
__sctp_hash_endpoint(ep);
|
||||
err = __sctp_hash_endpoint(ep);
|
||||
local_bh_enable();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Remove endpoint from the hash table. */
|
||||
static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
|
||||
{
|
||||
struct net *net = sock_net(ep->base.sk);
|
||||
struct sock *sk = ep->base.sk;
|
||||
struct sctp_hashbucket *head;
|
||||
struct sctp_ep_common *epb;
|
||||
|
||||
epb = &ep->base;
|
||||
|
||||
epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
|
||||
epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port);
|
||||
|
||||
head = &sctp_ep_hashtable[epb->hashent];
|
||||
|
||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||
reuseport_detach_sock(sk);
|
||||
|
||||
write_lock(&head->lock);
|
||||
hlist_del_init(&epb->node);
|
||||
write_unlock(&head->lock);
|
||||
@ -771,16 +818,35 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep)
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static inline __u32 sctp_hashfn(const struct net *net, __be16 lport,
|
||||
const union sctp_addr *paddr, __u32 seed)
|
||||
{
|
||||
__u32 addr;
|
||||
|
||||
if (paddr->sa.sa_family == AF_INET6)
|
||||
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
|
||||
else
|
||||
addr = (__force __u32)paddr->v4.sin_addr.s_addr;
|
||||
|
||||
return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
|
||||
(__force __u32)lport, net_hash_mix(net), seed);
|
||||
}
|
||||
|
||||
/* Look up an endpoint. */
|
||||
static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
|
||||
const union sctp_addr *laddr)
|
||||
static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
|
||||
struct net *net, struct sk_buff *skb,
|
||||
const union sctp_addr *laddr,
|
||||
const union sctp_addr *paddr)
|
||||
{
|
||||
struct sctp_hashbucket *head;
|
||||
struct sctp_ep_common *epb;
|
||||
struct sctp_endpoint *ep;
|
||||
struct sock *sk;
|
||||
__be16 lport;
|
||||
int hash;
|
||||
|
||||
hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
|
||||
lport = laddr->v4.sin_port;
|
||||
hash = sctp_ep_hashfn(net, ntohs(lport));
|
||||
head = &sctp_ep_hashtable[hash];
|
||||
read_lock(&head->lock);
|
||||
sctp_for_each_hentry(epb, &head->chain) {
|
||||
@ -792,6 +858,15 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
|
||||
ep = sctp_sk(net->sctp.ctl_sock)->ep;
|
||||
|
||||
hit:
|
||||
sk = ep->base.sk;
|
||||
if (sk->sk_reuseport) {
|
||||
__u32 phash = sctp_hashfn(net, lport, paddr, 0);
|
||||
|
||||
sk = reuseport_select_sock(sk, phash, skb,
|
||||
sizeof(struct sctphdr));
|
||||
if (sk)
|
||||
ep = sctp_sk(sk)->ep;
|
||||
}
|
||||
sctp_endpoint_hold(ep);
|
||||
read_unlock(&head->lock);
|
||||
return ep;
|
||||
@ -830,35 +905,17 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
|
||||
static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct sctp_transport *t = data;
|
||||
const union sctp_addr *paddr = &t->ipaddr;
|
||||
const struct net *net = sock_net(t->asoc->base.sk);
|
||||
__be16 lport = htons(t->asoc->base.bind_addr.port);
|
||||
__u32 addr;
|
||||
|
||||
if (paddr->sa.sa_family == AF_INET6)
|
||||
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
|
||||
else
|
||||
addr = (__force __u32)paddr->v4.sin_addr.s_addr;
|
||||
|
||||
return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
|
||||
(__force __u32)lport, net_hash_mix(net), seed);
|
||||
return sctp_hashfn(sock_net(t->asoc->base.sk),
|
||||
htons(t->asoc->base.bind_addr.port),
|
||||
&t->ipaddr, seed);
|
||||
}
|
||||
|
||||
static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct sctp_hash_cmp_arg *x = data;
|
||||
const union sctp_addr *paddr = x->paddr;
|
||||
const struct net *net = x->net;
|
||||
__be16 lport = x->lport;
|
||||
__u32 addr;
|
||||
|
||||
if (paddr->sa.sa_family == AF_INET6)
|
||||
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
|
||||
else
|
||||
addr = (__force __u32)paddr->v4.sin_addr.s_addr;
|
||||
|
||||
return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
|
||||
(__force __u32)lport, net_hash_mix(net), seed);
|
||||
return sctp_hashfn(x->net, x->lport, x->paddr, seed);
|
||||
}
|
||||
|
||||
static const struct rhashtable_params sctp_hash_params = {
|
||||
|
@ -7644,8 +7644,10 @@ static struct sctp_bind_bucket *sctp_bucket_create(
|
||||
|
||||
static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
|
||||
{
|
||||
bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
|
||||
struct sctp_sock *sp = sctp_sk(sk);
|
||||
bool reuse = (sk->sk_reuse || sp->reuse);
|
||||
struct sctp_bind_hashbucket *head; /* hash list */
|
||||
kuid_t uid = sock_i_uid(sk);
|
||||
struct sctp_bind_bucket *pp;
|
||||
unsigned short snum;
|
||||
int ret;
|
||||
@ -7721,7 +7723,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
|
||||
|
||||
pr_debug("%s: found a possible match\n", __func__);
|
||||
|
||||
if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
|
||||
if ((pp->fastreuse && reuse &&
|
||||
sk->sk_state != SCTP_SS_LISTENING) ||
|
||||
(pp->fastreuseport && sk->sk_reuseport &&
|
||||
uid_eq(pp->fastuid, uid)))
|
||||
goto success;
|
||||
|
||||
/* Run through the list of sockets bound to the port
|
||||
@ -7735,16 +7740,18 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
|
||||
* in an endpoint.
|
||||
*/
|
||||
sk_for_each_bound(sk2, &pp->owner) {
|
||||
struct sctp_endpoint *ep2;
|
||||
ep2 = sctp_sk(sk2)->ep;
|
||||
struct sctp_sock *sp2 = sctp_sk(sk2);
|
||||
struct sctp_endpoint *ep2 = sp2->ep;
|
||||
|
||||
if (sk == sk2 ||
|
||||
(reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
|
||||
sk2->sk_state != SCTP_SS_LISTENING))
|
||||
(reuse && (sk2->sk_reuse || sp2->reuse) &&
|
||||
sk2->sk_state != SCTP_SS_LISTENING) ||
|
||||
(sk->sk_reuseport && sk2->sk_reuseport &&
|
||||
uid_eq(uid, sock_i_uid(sk2))))
|
||||
continue;
|
||||
|
||||
if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr,
|
||||
sctp_sk(sk2), sctp_sk(sk))) {
|
||||
if (sctp_bind_addr_conflict(&ep2->base.bind_addr,
|
||||
addr, sp2, sp)) {
|
||||
ret = (long)sk2;
|
||||
goto fail_unlock;
|
||||
}
|
||||
@ -7767,19 +7774,32 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
|
||||
pp->fastreuse = 1;
|
||||
else
|
||||
pp->fastreuse = 0;
|
||||
} else if (pp->fastreuse &&
|
||||
|
||||
if (sk->sk_reuseport) {
|
||||
pp->fastreuseport = 1;
|
||||
pp->fastuid = uid;
|
||||
} else {
|
||||
pp->fastreuseport = 0;
|
||||
}
|
||||
} else {
|
||||
if (pp->fastreuse &&
|
||||
(!reuse || sk->sk_state == SCTP_SS_LISTENING))
|
||||
pp->fastreuse = 0;
|
||||
|
||||
if (pp->fastreuseport &&
|
||||
(!sk->sk_reuseport || !uid_eq(pp->fastuid, uid)))
|
||||
pp->fastreuseport = 0;
|
||||
}
|
||||
|
||||
/* We are set, so fill up all the data in the hash table
|
||||
* entry, tie the socket list information with the rest of the
|
||||
* sockets FIXME: Blurry, NPI (ipg).
|
||||
*/
|
||||
success:
|
||||
if (!sctp_sk(sk)->bind_hash) {
|
||||
if (!sp->bind_hash) {
|
||||
inet_sk(sk)->inet_num = snum;
|
||||
sk_add_bind_node(sk, &pp->owner);
|
||||
sctp_sk(sk)->bind_hash = pp;
|
||||
sp->bind_hash = pp;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
@ -7852,8 +7872,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
|
||||
}
|
||||
|
||||
sk->sk_max_ack_backlog = backlog;
|
||||
sctp_hash_endpoint(ep);
|
||||
return 0;
|
||||
return sctp_hash_endpoint(ep);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user