mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 17:17:25 +07:00
d894ba18d4
With the SO_REUSEPORT socket option, it is possible to create sockets
in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address.
This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on
the AF_INET6 sockets.
Prior to the commits referenced below, an incoming IPv4 packet would
always be routed to a socket of type AF_INET when this mixed-mode was used.
After those changes, the same packet would be routed to the most recently
bound socket (if this happened to be an AF_INET6 socket, it would
have an IPv4 mapped IPv6 address).
The change in behavior occurred because the recent SO_REUSEPORT optimizations
short-circuit the socket scoring logic as soon as they find a match. They
did not take into account the scoring logic that favors AF_INET sockets
over AF_INET6 sockets in the event of a tie.
To fix this problem, this patch changes the insertion order of AF_INET
and AF_INET6 addresses in the TCP and UDP socket lists when the sockets
have SO_REUSEPORT set. AF_INET sockets will be inserted at the head of the
list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at
the tail of the list. This will force AF_INET sockets to always be
considered first.
Fixes: e32ea7e747
("soreuseport: fast reuseport UDP socket selection")
Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection")
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Craig Gallek <kraig@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
161 lines
5.5 KiB
C
161 lines
5.5 KiB
C
#ifndef _LINUX_RCULIST_NULLS_H
|
|
#define _LINUX_RCULIST_NULLS_H
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* RCU-protected list version
|
|
*/
|
|
#include <linux/list_nulls.h>
|
|
#include <linux/rcupdate.h>
|
|
|
|
/**
|
|
* hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
|
|
* @n: the element to delete from the hash list.
|
|
*
|
|
* Note: hlist_nulls_unhashed() on the node return true after this. It is
|
|
* useful for RCU based read lockfree traversal if the writer side
|
|
* must know if the list entry is still hashed or already unhashed.
|
|
*
|
|
* In particular, it means that we can not poison the forward pointers
|
|
* that may still be used for walking the hash list and we can only
|
|
* zero the pprev pointer so list_unhashed() will return true after
|
|
* this.
|
|
*
|
|
* The caller must take whatever precautions are necessary (such as
|
|
* holding appropriate locks) to avoid racing with another
|
|
* list-mutation primitive, such as hlist_nulls_add_head_rcu() or
|
|
* hlist_nulls_del_rcu(), running on this same list. However, it is
|
|
* perfectly legal to run concurrently with the _rcu list-traversal
|
|
* primitives, such as hlist_nulls_for_each_entry_rcu().
|
|
*/
|
|
static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
|
|
{
|
|
if (!hlist_nulls_unhashed(n)) {
|
|
__hlist_nulls_del(n);
|
|
n->pprev = NULL;
|
|
}
|
|
}
|
|
|
|
#define hlist_nulls_first_rcu(head) \
|
|
(*((struct hlist_nulls_node __rcu __force **)&(head)->first))
|
|
|
|
#define hlist_nulls_next_rcu(node) \
|
|
(*((struct hlist_nulls_node __rcu __force **)&(node)->next))
|
|
|
|
/**
|
|
* hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
|
|
* @n: the element to delete from the hash list.
|
|
*
|
|
* Note: hlist_nulls_unhashed() on entry does not return true after this,
|
|
* the entry is in an undefined state. It is useful for RCU based
|
|
* lockfree traversal.
|
|
*
|
|
* In particular, it means that we can not poison the forward
|
|
* pointers that may still be used for walking the hash list.
|
|
*
|
|
* The caller must take whatever precautions are necessary
|
|
* (such as holding appropriate locks) to avoid racing
|
|
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
|
|
* or hlist_nulls_del_rcu(), running on this same list.
|
|
* However, it is perfectly legal to run concurrently with
|
|
* the _rcu list-traversal primitives, such as
|
|
* hlist_nulls_for_each_entry().
|
|
*/
|
|
static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
|
|
{
|
|
__hlist_nulls_del(n);
|
|
n->pprev = LIST_POISON2;
|
|
}
|
|
|
|
/**
|
|
* hlist_nulls_add_head_rcu
|
|
* @n: the element to add to the hash list.
|
|
* @h: the list to add to.
|
|
*
|
|
* Description:
|
|
* Adds the specified element to the specified hlist_nulls,
|
|
* while permitting racing traversals.
|
|
*
|
|
* The caller must take whatever precautions are necessary
|
|
* (such as holding appropriate locks) to avoid racing
|
|
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
|
|
* or hlist_nulls_del_rcu(), running on this same list.
|
|
* However, it is perfectly legal to run concurrently with
|
|
* the _rcu list-traversal primitives, such as
|
|
* hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
|
|
* problems on Alpha CPUs. Regardless of the type of CPU, the
|
|
* list-traversal primitive must be guarded by rcu_read_lock().
|
|
*/
|
|
static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
|
|
struct hlist_nulls_head *h)
|
|
{
|
|
struct hlist_nulls_node *first = h->first;
|
|
|
|
n->next = first;
|
|
n->pprev = &h->first;
|
|
rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
|
|
if (!is_a_nulls(first))
|
|
first->pprev = &n->next;
|
|
}
|
|
|
|
/**
|
|
* hlist_nulls_add_tail_rcu
|
|
* @n: the element to add to the hash list.
|
|
* @h: the list to add to.
|
|
*
|
|
* Description:
|
|
* Adds the specified element to the end of the specified hlist_nulls,
|
|
* while permitting racing traversals. NOTE: tail insertion requires
|
|
* list traversal.
|
|
*
|
|
* The caller must take whatever precautions are necessary
|
|
* (such as holding appropriate locks) to avoid racing
|
|
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
|
|
* or hlist_nulls_del_rcu(), running on this same list.
|
|
* However, it is perfectly legal to run concurrently with
|
|
* the _rcu list-traversal primitives, such as
|
|
* hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
|
|
* problems on Alpha CPUs. Regardless of the type of CPU, the
|
|
* list-traversal primitive must be guarded by rcu_read_lock().
|
|
*/
|
|
static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
|
|
struct hlist_nulls_head *h)
|
|
{
|
|
struct hlist_nulls_node *i, *last = NULL;
|
|
|
|
for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
|
|
i = hlist_nulls_next_rcu(i))
|
|
last = i;
|
|
|
|
if (last) {
|
|
n->next = last->next;
|
|
n->pprev = &last->next;
|
|
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
|
|
} else {
|
|
hlist_nulls_add_head_rcu(n, h);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
|
|
* @tpos: the type * to use as a loop cursor.
|
|
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
|
|
* @head: the head for your list.
|
|
* @member: the name of the hlist_nulls_node within the struct.
|
|
*
|
|
* The barrier() is needed to make sure compiler doesn't cache first element [1],
|
|
* as this loop can be restarted [2]
|
|
* [1] Documentation/atomic_ops.txt around line 114
|
|
* [2] Documentation/RCU/rculist_nulls.txt around line 146
|
|
*/
|
|
#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
|
|
for (({barrier();}), \
|
|
pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
|
|
(!is_a_nulls(pos)) && \
|
|
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
|
|
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
|
|
|
|
#endif
|
|
#endif
|