mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-18 13:49:36 +07:00
tcp/dccp: do not touch listener sk_refcnt under synflood
When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes. By letting listeners use SOCK_RCU_FREE infrastructure, we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets, only listeners are impacted by this change. Peak performance under SYNFLOOD is increased by ~33% : On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps Most consuming functions are now skb_set_owner_w() and sock_wfree() contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
3a5d1c0e7c
commit
3b24d854cb
@ -66,13 +66,15 @@ static inline struct sock *__inet6_lookup(struct net *net,
|
|||||||
const __be16 sport,
|
const __be16 sport,
|
||||||
const struct in6_addr *daddr,
|
const struct in6_addr *daddr,
|
||||||
const u16 hnum,
|
const u16 hnum,
|
||||||
const int dif)
|
const int dif,
|
||||||
|
bool *refcounted)
|
||||||
{
|
{
|
||||||
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
|
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
|
||||||
sport, daddr, hnum, dif);
|
sport, daddr, hnum, dif);
|
||||||
|
*refcounted = true;
|
||||||
if (sk)
|
if (sk)
|
||||||
return sk;
|
return sk;
|
||||||
|
*refcounted = false;
|
||||||
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
|
return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
|
||||||
daddr, hnum, dif);
|
daddr, hnum, dif);
|
||||||
}
|
}
|
||||||
@ -81,17 +83,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
|
|||||||
struct sk_buff *skb, int doff,
|
struct sk_buff *skb, int doff,
|
||||||
const __be16 sport,
|
const __be16 sport,
|
||||||
const __be16 dport,
|
const __be16 dport,
|
||||||
int iif)
|
int iif,
|
||||||
|
bool *refcounted)
|
||||||
{
|
{
|
||||||
struct sock *sk = skb_steal_sock(skb);
|
struct sock *sk = skb_steal_sock(skb);
|
||||||
|
|
||||||
|
*refcounted = true;
|
||||||
if (sk)
|
if (sk)
|
||||||
return sk;
|
return sk;
|
||||||
|
|
||||||
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
|
return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
|
||||||
doff, &ipv6_hdr(skb)->saddr, sport,
|
doff, &ipv6_hdr(skb)->saddr, sport,
|
||||||
&ipv6_hdr(skb)->daddr, ntohs(dport),
|
&ipv6_hdr(skb)->daddr, ntohs(dport),
|
||||||
iif);
|
iif, refcounted);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
|
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
|
||||||
|
@ -100,14 +100,10 @@ struct inet_bind_hashbucket {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Sockets can be hashed in established or listening table
|
* Sockets can be hashed in established or listening table
|
||||||
* We must use different 'nulls' end-of-chain value for listening
|
|
||||||
* hash table, or we might find a socket that was closed and
|
|
||||||
* reallocated/inserted into established hash table
|
|
||||||
*/
|
*/
|
||||||
#define LISTENING_NULLS_BASE (1U << 29)
|
|
||||||
struct inet_listen_hashbucket {
|
struct inet_listen_hashbucket {
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct hlist_nulls_head head;
|
struct hlist_head head;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This is for listening sockets, thus all sockets which possess wildcards. */
|
/* This is for listening sockets, thus all sockets which possess wildcards. */
|
||||||
@ -304,13 +300,19 @@ static inline struct sock *__inet_lookup(struct net *net,
|
|||||||
struct sk_buff *skb, int doff,
|
struct sk_buff *skb, int doff,
|
||||||
const __be32 saddr, const __be16 sport,
|
const __be32 saddr, const __be16 sport,
|
||||||
const __be32 daddr, const __be16 dport,
|
const __be32 daddr, const __be16 dport,
|
||||||
const int dif)
|
const int dif,
|
||||||
|
bool *refcounted)
|
||||||
{
|
{
|
||||||
u16 hnum = ntohs(dport);
|
u16 hnum = ntohs(dport);
|
||||||
struct sock *sk = __inet_lookup_established(net, hashinfo,
|
struct sock *sk;
|
||||||
saddr, sport, daddr, hnum, dif);
|
|
||||||
|
|
||||||
return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
|
sk = __inet_lookup_established(net, hashinfo, saddr, sport,
|
||||||
|
daddr, hnum, dif);
|
||||||
|
*refcounted = true;
|
||||||
|
if (sk)
|
||||||
|
return sk;
|
||||||
|
*refcounted = false;
|
||||||
|
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
|
||||||
sport, daddr, hnum, dif);
|
sport, daddr, hnum, dif);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,10 +324,13 @@ static inline struct sock *inet_lookup(struct net *net,
|
|||||||
const int dif)
|
const int dif)
|
||||||
{
|
{
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
bool refcounted;
|
||||||
|
|
||||||
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
|
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
|
||||||
dport, dif);
|
dport, dif, &refcounted);
|
||||||
|
|
||||||
|
if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
|
||||||
|
sk = NULL;
|
||||||
return sk;
|
return sk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -333,17 +338,20 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
|
|||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
int doff,
|
int doff,
|
||||||
const __be16 sport,
|
const __be16 sport,
|
||||||
const __be16 dport)
|
const __be16 dport,
|
||||||
|
bool *refcounted)
|
||||||
{
|
{
|
||||||
struct sock *sk = skb_steal_sock(skb);
|
struct sock *sk = skb_steal_sock(skb);
|
||||||
const struct iphdr *iph = ip_hdr(skb);
|
const struct iphdr *iph = ip_hdr(skb);
|
||||||
|
|
||||||
|
*refcounted = true;
|
||||||
if (sk)
|
if (sk)
|
||||||
return sk;
|
return sk;
|
||||||
else
|
|
||||||
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
|
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
|
||||||
doff, iph->saddr, sport,
|
doff, iph->saddr, sport,
|
||||||
iph->daddr, dport, inet_iif(skb));
|
iph->daddr, dport, inet_iif(skb),
|
||||||
|
refcounted);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 sk_ehashfn(const struct sock *sk);
|
u32 sk_ehashfn(const struct sock *sk);
|
||||||
|
@ -764,6 +764,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
|
|||||||
{
|
{
|
||||||
const struct dccp_hdr *dh;
|
const struct dccp_hdr *dh;
|
||||||
const struct iphdr *iph;
|
const struct iphdr *iph;
|
||||||
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
int min_cov;
|
int min_cov;
|
||||||
|
|
||||||
@ -801,7 +802,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
|
|||||||
|
|
||||||
lookup:
|
lookup:
|
||||||
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
|
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
|
||||||
dh->dccph_sport, dh->dccph_dport);
|
dh->dccph_sport, dh->dccph_dport, &refcounted);
|
||||||
if (!sk) {
|
if (!sk) {
|
||||||
dccp_pr_debug("failed to look up flow ID in table and "
|
dccp_pr_debug("failed to look up flow ID in table and "
|
||||||
"get corresponding socket\n");
|
"get corresponding socket\n");
|
||||||
@ -830,6 +831,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
|
|||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
refcounted = true;
|
||||||
nsk = dccp_check_req(sk, skb, req);
|
nsk = dccp_check_req(sk, skb, req);
|
||||||
if (!nsk) {
|
if (!nsk) {
|
||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
@ -886,6 +888,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
discard_and_relse:
|
discard_and_relse:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
goto discard_it;
|
goto discard_it;
|
||||||
}
|
}
|
||||||
|
@ -642,6 +642,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
|
|||||||
static int dccp_v6_rcv(struct sk_buff *skb)
|
static int dccp_v6_rcv(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
const struct dccp_hdr *dh;
|
const struct dccp_hdr *dh;
|
||||||
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
int min_cov;
|
int min_cov;
|
||||||
|
|
||||||
@ -670,7 +671,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
|
|||||||
lookup:
|
lookup:
|
||||||
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
|
sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
|
||||||
dh->dccph_sport, dh->dccph_dport,
|
dh->dccph_sport, dh->dccph_dport,
|
||||||
inet6_iif(skb));
|
inet6_iif(skb), &refcounted);
|
||||||
if (!sk) {
|
if (!sk) {
|
||||||
dccp_pr_debug("failed to look up flow ID in table and "
|
dccp_pr_debug("failed to look up flow ID in table and "
|
||||||
"get corresponding socket\n");
|
"get corresponding socket\n");
|
||||||
@ -699,6 +700,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
|
|||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
refcounted = true;
|
||||||
nsk = dccp_check_req(sk, skb, req);
|
nsk = dccp_check_req(sk, skb, req);
|
||||||
if (!nsk) {
|
if (!nsk) {
|
||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
@ -752,6 +754,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
discard_and_relse:
|
discard_and_relse:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
goto discard_it;
|
goto discard_it;
|
||||||
}
|
}
|
||||||
|
@ -775,13 +775,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
|
|||||||
|
|
||||||
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
|
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
|
||||||
struct inet_listen_hashbucket *ilb;
|
struct inet_listen_hashbucket *ilb;
|
||||||
struct hlist_nulls_node *node;
|
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
|
||||||
num = 0;
|
num = 0;
|
||||||
ilb = &hashinfo->listening_hash[i];
|
ilb = &hashinfo->listening_hash[i];
|
||||||
spin_lock_bh(&ilb->lock);
|
spin_lock_bh(&ilb->lock);
|
||||||
sk_nulls_for_each(sk, node, &ilb->head) {
|
sk_for_each(sk, &ilb->head) {
|
||||||
struct inet_sock *inet = inet_sk(sk);
|
struct inet_sock *inet = inet_sk(sk);
|
||||||
|
|
||||||
if (!net_eq(sock_net(sk), net))
|
if (!net_eq(sock_net(sk), net))
|
||||||
|
@ -198,13 +198,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't inline this cruft. Here are some nice properties to exploit here. The
|
* Here are some nice properties to exploit here. The BSD API
|
||||||
* BSD API does not allow a listening sock to specify the remote port nor the
|
* does not allow a listening sock to specify the remote port nor the
|
||||||
* remote address for the connection. So always assume those are both
|
* remote address for the connection. So always assume those are both
|
||||||
* wildcarded during the search since they can never be otherwise.
|
* wildcarded during the search since they can never be otherwise.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* called with rcu_read_lock() : No refcount taken on the socket */
|
||||||
struct sock *__inet_lookup_listener(struct net *net,
|
struct sock *__inet_lookup_listener(struct net *net,
|
||||||
struct inet_hashinfo *hashinfo,
|
struct inet_hashinfo *hashinfo,
|
||||||
struct sk_buff *skb, int doff,
|
struct sk_buff *skb, int doff,
|
||||||
@ -212,37 +212,27 @@ struct sock *__inet_lookup_listener(struct net *net,
|
|||||||
const __be32 daddr, const unsigned short hnum,
|
const __be32 daddr, const unsigned short hnum,
|
||||||
const int dif)
|
const int dif)
|
||||||
{
|
{
|
||||||
struct sock *sk, *result;
|
|
||||||
struct hlist_nulls_node *node;
|
|
||||||
unsigned int hash = inet_lhashfn(net, hnum);
|
unsigned int hash = inet_lhashfn(net, hnum);
|
||||||
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
|
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
|
||||||
int score, hiscore, matches = 0, reuseport = 0;
|
int score, hiscore = 0, matches = 0, reuseport = 0;
|
||||||
bool select_ok = true;
|
struct sock *sk, *result = NULL;
|
||||||
u32 phash = 0;
|
u32 phash = 0;
|
||||||
|
|
||||||
begin:
|
sk_for_each_rcu(sk, &ilb->head) {
|
||||||
result = NULL;
|
|
||||||
hiscore = 0;
|
|
||||||
sk_nulls_for_each_rcu(sk, node, &ilb->head) {
|
|
||||||
score = compute_score(sk, net, hnum, daddr, dif);
|
score = compute_score(sk, net, hnum, daddr, dif);
|
||||||
if (score > hiscore) {
|
if (score > hiscore) {
|
||||||
result = sk;
|
|
||||||
hiscore = score;
|
|
||||||
reuseport = sk->sk_reuseport;
|
reuseport = sk->sk_reuseport;
|
||||||
if (reuseport) {
|
if (reuseport) {
|
||||||
phash = inet_ehashfn(net, daddr, hnum,
|
phash = inet_ehashfn(net, daddr, hnum,
|
||||||
saddr, sport);
|
saddr, sport);
|
||||||
if (select_ok) {
|
result = reuseport_select_sock(sk, phash,
|
||||||
struct sock *sk2;
|
|
||||||
sk2 = reuseport_select_sock(sk, phash,
|
|
||||||
skb, doff);
|
skb, doff);
|
||||||
if (sk2) {
|
if (result)
|
||||||
result = sk2;
|
return result;
|
||||||
goto found;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
matches = 1;
|
matches = 1;
|
||||||
}
|
}
|
||||||
|
result = sk;
|
||||||
|
hiscore = score;
|
||||||
} else if (score == hiscore && reuseport) {
|
} else if (score == hiscore && reuseport) {
|
||||||
matches++;
|
matches++;
|
||||||
if (reciprocal_scale(phash, matches) == 0)
|
if (reciprocal_scale(phash, matches) == 0)
|
||||||
@ -250,24 +240,6 @@ struct sock *__inet_lookup_listener(struct net *net,
|
|||||||
phash = next_pseudo_random32(phash);
|
phash = next_pseudo_random32(phash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* if the nulls value we got at the end of this lookup is
|
|
||||||
* not the expected one, we must restart lookup.
|
|
||||||
* We probably met an item that was moved to another chain.
|
|
||||||
*/
|
|
||||||
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
|
|
||||||
goto begin;
|
|
||||||
if (result) {
|
|
||||||
found:
|
|
||||||
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
|
|
||||||
result = NULL;
|
|
||||||
else if (unlikely(compute_score(result, net, hnum, daddr,
|
|
||||||
dif) < hiscore)) {
|
|
||||||
sock_put(result);
|
|
||||||
select_ok = false;
|
|
||||||
goto begin;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
|
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
|
||||||
@ -508,7 +480,8 @@ int __inet_hash(struct sock *sk, struct sock *osk,
|
|||||||
if (err)
|
if (err)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
__sk_nulls_add_node_rcu(sk, &ilb->head);
|
hlist_add_head_rcu(&sk->sk_node, &ilb->head);
|
||||||
|
sock_set_flag(sk, SOCK_RCU_FREE);
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||||
unlock:
|
unlock:
|
||||||
spin_unlock(&ilb->lock);
|
spin_unlock(&ilb->lock);
|
||||||
@ -535,19 +508,24 @@ void inet_unhash(struct sock *sk)
|
|||||||
{
|
{
|
||||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||||
spinlock_t *lock;
|
spinlock_t *lock;
|
||||||
|
bool listener = false;
|
||||||
int done;
|
int done;
|
||||||
|
|
||||||
if (sk_unhashed(sk))
|
if (sk_unhashed(sk))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (sk->sk_state == TCP_LISTEN)
|
if (sk->sk_state == TCP_LISTEN) {
|
||||||
lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
|
lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
|
||||||
else
|
listener = true;
|
||||||
|
} else {
|
||||||
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||||
|
}
|
||||||
spin_lock_bh(lock);
|
spin_lock_bh(lock);
|
||||||
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
if (rcu_access_pointer(sk->sk_reuseport_cb))
|
||||||
reuseport_detach_sock(sk);
|
reuseport_detach_sock(sk);
|
||||||
|
if (listener)
|
||||||
|
done = __sk_del_node_init(sk);
|
||||||
|
else
|
||||||
done = __sk_nulls_del_node_init_rcu(sk);
|
done = __sk_nulls_del_node_init_rcu(sk);
|
||||||
if (done)
|
if (done)
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||||
@ -684,8 +662,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
|
|||||||
|
|
||||||
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
|
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
|
||||||
spin_lock_init(&h->listening_hash[i].lock);
|
spin_lock_init(&h->listening_hash[i].lock);
|
||||||
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
|
INIT_HLIST_HEAD(&h->listening_hash[i].head);
|
||||||
i + LISTENING_NULLS_BASE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
|
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
|
||||||
|
@ -628,6 +628,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
|
|
||||||
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
|
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
rcu_read_lock();
|
||||||
hash_location = tcp_parse_md5sig_option(th);
|
hash_location = tcp_parse_md5sig_option(th);
|
||||||
if (sk && sk_fullsock(sk)) {
|
if (sk && sk_fullsock(sk)) {
|
||||||
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
|
key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
|
||||||
@ -646,16 +647,18 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
ntohs(th->source), inet_iif(skb));
|
ntohs(th->source), inet_iif(skb));
|
||||||
/* don't send rst if it can't find key */
|
/* don't send rst if it can't find key */
|
||||||
if (!sk1)
|
if (!sk1)
|
||||||
return;
|
goto out;
|
||||||
rcu_read_lock();
|
|
||||||
key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
|
key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
|
||||||
&ip_hdr(skb)->saddr, AF_INET);
|
&ip_hdr(skb)->saddr, AF_INET);
|
||||||
if (!key)
|
if (!key)
|
||||||
goto release_sk1;
|
goto out;
|
||||||
|
|
||||||
|
|
||||||
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
|
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
|
||||||
if (genhash || memcmp(hash_location, newhash, 16) != 0)
|
if (genhash || memcmp(hash_location, newhash, 16) != 0)
|
||||||
goto release_sk1;
|
goto out;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (key) {
|
if (key) {
|
||||||
@ -698,11 +701,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
|
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
release_sk1:
|
out:
|
||||||
if (sk1) {
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
sock_put(sk1);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1538,11 +1538,12 @@ EXPORT_SYMBOL(tcp_prequeue);
|
|||||||
|
|
||||||
int tcp_v4_rcv(struct sk_buff *skb)
|
int tcp_v4_rcv(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
|
struct net *net = dev_net(skb->dev);
|
||||||
const struct iphdr *iph;
|
const struct iphdr *iph;
|
||||||
const struct tcphdr *th;
|
const struct tcphdr *th;
|
||||||
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
int ret;
|
int ret;
|
||||||
struct net *net = dev_net(skb->dev);
|
|
||||||
|
|
||||||
if (skb->pkt_type != PACKET_HOST)
|
if (skb->pkt_type != PACKET_HOST)
|
||||||
goto discard_it;
|
goto discard_it;
|
||||||
@ -1588,7 +1589,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
|
|
||||||
lookup:
|
lookup:
|
||||||
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
|
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
|
||||||
th->dest);
|
th->dest, &refcounted);
|
||||||
if (!sk)
|
if (!sk)
|
||||||
goto no_tcp_socket;
|
goto no_tcp_socket;
|
||||||
|
|
||||||
@ -1609,7 +1610,11 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
inet_csk_reqsk_queue_drop_and_put(sk, req);
|
||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
|
/* We own a reference on the listener, increase it again
|
||||||
|
* as we might lose it too soon.
|
||||||
|
*/
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
refcounted = true;
|
||||||
nsk = tcp_check_req(sk, skb, req, false);
|
nsk = tcp_check_req(sk, skb, req, false);
|
||||||
if (!nsk) {
|
if (!nsk) {
|
||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
@ -1665,6 +1670,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
|
||||||
put_and_return:
|
put_and_return:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -1688,6 +1694,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
discard_and_relse:
|
discard_and_relse:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
goto discard_it;
|
goto discard_it;
|
||||||
|
|
||||||
@ -1712,6 +1719,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
|||||||
if (sk2) {
|
if (sk2) {
|
||||||
inet_twsk_deschedule_put(inet_twsk(sk));
|
inet_twsk_deschedule_put(inet_twsk(sk));
|
||||||
sk = sk2;
|
sk = sk2;
|
||||||
|
refcounted = false;
|
||||||
goto process;
|
goto process;
|
||||||
}
|
}
|
||||||
/* Fall through to ACK */
|
/* Fall through to ACK */
|
||||||
@ -1845,17 +1853,17 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
|
|||||||
*/
|
*/
|
||||||
static void *listening_get_next(struct seq_file *seq, void *cur)
|
static void *listening_get_next(struct seq_file *seq, void *cur)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk;
|
|
||||||
struct hlist_nulls_node *node;
|
|
||||||
struct sock *sk = cur;
|
|
||||||
struct inet_listen_hashbucket *ilb;
|
|
||||||
struct tcp_iter_state *st = seq->private;
|
struct tcp_iter_state *st = seq->private;
|
||||||
struct net *net = seq_file_net(seq);
|
struct net *net = seq_file_net(seq);
|
||||||
|
struct inet_listen_hashbucket *ilb;
|
||||||
|
struct inet_connection_sock *icsk;
|
||||||
|
struct sock *sk = cur;
|
||||||
|
|
||||||
if (!sk) {
|
if (!sk) {
|
||||||
|
get_head:
|
||||||
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
||||||
spin_lock_bh(&ilb->lock);
|
spin_lock_bh(&ilb->lock);
|
||||||
sk = sk_nulls_head(&ilb->head);
|
sk = sk_head(&ilb->head);
|
||||||
st->offset = 0;
|
st->offset = 0;
|
||||||
goto get_sk;
|
goto get_sk;
|
||||||
}
|
}
|
||||||
@ -1863,28 +1871,20 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
|
|||||||
++st->num;
|
++st->num;
|
||||||
++st->offset;
|
++st->offset;
|
||||||
|
|
||||||
sk = sk_nulls_next(sk);
|
sk = sk_next(sk);
|
||||||
get_sk:
|
get_sk:
|
||||||
sk_nulls_for_each_from(sk, node) {
|
sk_for_each_from(sk) {
|
||||||
if (!net_eq(sock_net(sk), net))
|
if (!net_eq(sock_net(sk), net))
|
||||||
continue;
|
continue;
|
||||||
if (sk->sk_family == st->family) {
|
if (sk->sk_family == st->family)
|
||||||
cur = sk;
|
return sk;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
icsk = inet_csk(sk);
|
icsk = inet_csk(sk);
|
||||||
}
|
}
|
||||||
spin_unlock_bh(&ilb->lock);
|
spin_unlock_bh(&ilb->lock);
|
||||||
st->offset = 0;
|
st->offset = 0;
|
||||||
if (++st->bucket < INET_LHTABLE_SIZE) {
|
if (++st->bucket < INET_LHTABLE_SIZE)
|
||||||
ilb = &tcp_hashinfo.listening_hash[st->bucket];
|
goto get_head;
|
||||||
spin_lock_bh(&ilb->lock);
|
return NULL;
|
||||||
sk = sk_nulls_head(&ilb->head);
|
|
||||||
goto get_sk;
|
|
||||||
}
|
|
||||||
cur = NULL;
|
|
||||||
out:
|
|
||||||
return cur;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
|
||||||
|
@ -120,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
|
|||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* called with rcu_read_lock() */
|
||||||
struct sock *inet6_lookup_listener(struct net *net,
|
struct sock *inet6_lookup_listener(struct net *net,
|
||||||
struct inet_hashinfo *hashinfo,
|
struct inet_hashinfo *hashinfo,
|
||||||
struct sk_buff *skb, int doff,
|
struct sk_buff *skb, int doff,
|
||||||
@ -127,38 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net,
|
|||||||
const __be16 sport, const struct in6_addr *daddr,
|
const __be16 sport, const struct in6_addr *daddr,
|
||||||
const unsigned short hnum, const int dif)
|
const unsigned short hnum, const int dif)
|
||||||
{
|
{
|
||||||
struct sock *sk;
|
|
||||||
const struct hlist_nulls_node *node;
|
|
||||||
struct sock *result;
|
|
||||||
int score, hiscore, matches = 0, reuseport = 0;
|
|
||||||
bool select_ok = true;
|
|
||||||
u32 phash = 0;
|
|
||||||
unsigned int hash = inet_lhashfn(net, hnum);
|
unsigned int hash = inet_lhashfn(net, hnum);
|
||||||
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
|
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
|
||||||
|
int score, hiscore = 0, matches = 0, reuseport = 0;
|
||||||
|
struct sock *sk, *result = NULL;
|
||||||
|
u32 phash = 0;
|
||||||
|
|
||||||
begin:
|
sk_for_each(sk, &ilb->head) {
|
||||||
result = NULL;
|
|
||||||
hiscore = 0;
|
|
||||||
sk_nulls_for_each(sk, node, &ilb->head) {
|
|
||||||
score = compute_score(sk, net, hnum, daddr, dif);
|
score = compute_score(sk, net, hnum, daddr, dif);
|
||||||
if (score > hiscore) {
|
if (score > hiscore) {
|
||||||
hiscore = score;
|
hiscore = score;
|
||||||
result = sk;
|
|
||||||
reuseport = sk->sk_reuseport;
|
|
||||||
if (reuseport) {
|
if (reuseport) {
|
||||||
phash = inet6_ehashfn(net, daddr, hnum,
|
phash = inet6_ehashfn(net, daddr, hnum,
|
||||||
saddr, sport);
|
saddr, sport);
|
||||||
if (select_ok) {
|
result = reuseport_select_sock(sk, phash,
|
||||||
struct sock *sk2;
|
|
||||||
sk2 = reuseport_select_sock(sk, phash,
|
|
||||||
skb, doff);
|
skb, doff);
|
||||||
if (sk2) {
|
if (result)
|
||||||
result = sk2;
|
return result;
|
||||||
goto found;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
matches = 1;
|
matches = 1;
|
||||||
}
|
}
|
||||||
|
result = sk;
|
||||||
|
reuseport = sk->sk_reuseport;
|
||||||
} else if (score == hiscore && reuseport) {
|
} else if (score == hiscore && reuseport) {
|
||||||
matches++;
|
matches++;
|
||||||
if (reciprocal_scale(phash, matches) == 0)
|
if (reciprocal_scale(phash, matches) == 0)
|
||||||
@ -166,24 +156,6 @@ struct sock *inet6_lookup_listener(struct net *net,
|
|||||||
phash = next_pseudo_random32(phash);
|
phash = next_pseudo_random32(phash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* if the nulls value we got at the end of this lookup is
|
|
||||||
* not the expected one, we must restart lookup.
|
|
||||||
* We probably met an item that was moved to another chain.
|
|
||||||
*/
|
|
||||||
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
|
|
||||||
goto begin;
|
|
||||||
if (result) {
|
|
||||||
found:
|
|
||||||
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
|
|
||||||
result = NULL;
|
|
||||||
else if (unlikely(compute_score(result, net, hnum, daddr,
|
|
||||||
dif) < hiscore)) {
|
|
||||||
sock_put(result);
|
|
||||||
select_ok = false;
|
|
||||||
goto begin;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
|
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
|
||||||
@ -195,10 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
|
|||||||
const int dif)
|
const int dif)
|
||||||
{
|
{
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
bool refcounted;
|
||||||
|
|
||||||
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
|
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
|
||||||
ntohs(dport), dif);
|
ntohs(dport), dif, &refcounted);
|
||||||
|
if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
|
||||||
|
sk = NULL;
|
||||||
return sk;
|
return sk;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(inet6_lookup);
|
EXPORT_SYMBOL_GPL(inet6_lookup);
|
||||||
|
@ -858,6 +858,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
rcu_read_lock();
|
||||||
hash_location = tcp_parse_md5sig_option(th);
|
hash_location = tcp_parse_md5sig_option(th);
|
||||||
if (sk && sk_fullsock(sk)) {
|
if (sk && sk_fullsock(sk)) {
|
||||||
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
|
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
|
||||||
@ -875,16 +876,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
th->source, &ipv6h->daddr,
|
th->source, &ipv6h->daddr,
|
||||||
ntohs(th->source), tcp_v6_iif(skb));
|
ntohs(th->source), tcp_v6_iif(skb));
|
||||||
if (!sk1)
|
if (!sk1)
|
||||||
return;
|
goto out;
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
|
key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
|
||||||
if (!key)
|
if (!key)
|
||||||
goto release_sk1;
|
goto out;
|
||||||
|
|
||||||
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
|
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
|
||||||
if (genhash || memcmp(hash_location, newhash, 16) != 0)
|
if (genhash || memcmp(hash_location, newhash, 16) != 0)
|
||||||
goto release_sk1;
|
goto out;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -898,11 +898,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
|
|||||||
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
|
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
|
||||||
|
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
release_sk1:
|
out:
|
||||||
if (sk1) {
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
sock_put(sk1);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1351,6 +1348,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
{
|
{
|
||||||
const struct tcphdr *th;
|
const struct tcphdr *th;
|
||||||
const struct ipv6hdr *hdr;
|
const struct ipv6hdr *hdr;
|
||||||
|
bool refcounted;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
int ret;
|
int ret;
|
||||||
struct net *net = dev_net(skb->dev);
|
struct net *net = dev_net(skb->dev);
|
||||||
@ -1381,7 +1379,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
|
|
||||||
lookup:
|
lookup:
|
||||||
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
|
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
|
||||||
th->source, th->dest, inet6_iif(skb));
|
th->source, th->dest, inet6_iif(skb),
|
||||||
|
&refcounted);
|
||||||
if (!sk)
|
if (!sk)
|
||||||
goto no_tcp_socket;
|
goto no_tcp_socket;
|
||||||
|
|
||||||
@ -1404,6 +1403,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
goto lookup;
|
goto lookup;
|
||||||
}
|
}
|
||||||
sock_hold(sk);
|
sock_hold(sk);
|
||||||
|
refcounted = true;
|
||||||
nsk = tcp_check_req(sk, skb, req, false);
|
nsk = tcp_check_req(sk, skb, req, false);
|
||||||
if (!nsk) {
|
if (!nsk) {
|
||||||
reqsk_put(req);
|
reqsk_put(req);
|
||||||
@ -1460,6 +1460,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
bh_unlock_sock(sk);
|
bh_unlock_sock(sk);
|
||||||
|
|
||||||
put_and_return:
|
put_and_return:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
return ret ? -1 : 0;
|
return ret ? -1 : 0;
|
||||||
|
|
||||||
@ -1483,6 +1484,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
discard_and_relse:
|
discard_and_relse:
|
||||||
|
if (refcounted)
|
||||||
sock_put(sk);
|
sock_put(sk);
|
||||||
goto discard_it;
|
goto discard_it;
|
||||||
|
|
||||||
@ -1514,6 +1516,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
|
|||||||
inet_twsk_deschedule_put(tw);
|
inet_twsk_deschedule_put(tw);
|
||||||
sk = sk2;
|
sk = sk2;
|
||||||
tcp_v6_restore_cb(skb);
|
tcp_v6_restore_cb(skb);
|
||||||
|
refcounted = false;
|
||||||
goto process;
|
goto process;
|
||||||
}
|
}
|
||||||
/* Fall through to ACK */
|
/* Fall through to ACK */
|
||||||
|
@ -120,7 +120,7 @@ xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
|
|||||||
{
|
{
|
||||||
switch (protocol) {
|
switch (protocol) {
|
||||||
case IPPROTO_TCP:
|
case IPPROTO_TCP:
|
||||||
return __inet_lookup(net, &tcp_hashinfo, skb, doff,
|
return inet_lookup(net, &tcp_hashinfo, skb, doff,
|
||||||
saddr, sport, daddr, dport,
|
saddr, sport, daddr, dport,
|
||||||
in->ifindex);
|
in->ifindex);
|
||||||
case IPPROTO_UDP:
|
case IPPROTO_UDP:
|
||||||
|
Loading…
Reference in New Issue
Block a user