linux_dsm_epyc7002/net/core/secure_seq.c
Eric Dumazet 73f156a6e8 inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.

linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.

1) each inet_peer struct consumes 192 bytes

2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.

3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.

4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())

5) We garbage collect inet_peer aggressively.

IP ID generation do not have to be 'perfect'

Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.

We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.

ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)

secure_ip_id() and secure_ipv6_id() no longer are needed.

Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-02 11:00:41 -07:00

174 lines
4.1 KiB
C

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/cryptohash.h>
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/random.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
#include <linux/string.h>
#include <linux/net.h>
#include <net/secure_seq.h>
#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
static __always_inline void net_secret_init(void)
{
net_get_random_once(net_secret, sizeof(net_secret));
}
#endif
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
{
/*
* As close as possible to RFC 793, which
* suggests using a 250 kHz clock.
* Further reading shows this assumes 2 Mb/s networks.
* For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
* For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
* we also need to limit the resolution so that the u32 seq
* overlaps less than one time per MSL (2 minutes).
* Choosing a clock of 64 ns period is OK. (period of 274 s)
*/
return seq + (ktime_to_ns(ktime_get_real()) >> 6);
}
#endif
#if IS_ENABLED(CONFIG_IPV6)
__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport)
{
u32 secret[MD5_MESSAGE_BYTES / 4];
u32 hash[MD5_DIGEST_WORDS];
u32 i;
net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
secret[4] = net_secret[4] +
(((__force u16)sport << 16) + (__force u16)dport);
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
secret[i] = net_secret[i];
md5_transform(hash, secret);
return seq_scale(hash[0]);
}
EXPORT_SYMBOL(secure_tcpv6_sequence_number);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
u32 secret[MD5_MESSAGE_BYTES / 4];
u32 hash[MD5_DIGEST_WORDS];
u32 i;
net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
secret[4] = net_secret[4] + (__force u32)dport;
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
secret[i] = net_secret[i];
md5_transform(hash, secret);
return hash[0];
}
EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
hash[3] = net_secret[15];
md5_transform(hash, net_secret);
return seq_scale(hash[0]);
}
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
hash[3] = net_secret[15];
md5_transform(hash, net_secret);
return hash[0];
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
#if IS_ENABLED(CONFIG_IP_DCCP)
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
hash[3] = net_secret[15];
md5_transform(hash, net_secret);
seq = hash[0] | (((u64)hash[1]) << 32);
seq += ktime_to_ns(ktime_get_real());
seq &= (1ull << 48) - 1;
return seq;
}
EXPORT_SYMBOL(secure_dccp_sequence_number);
#if IS_ENABLED(CONFIG_IPV6)
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
__be16 sport, __be16 dport)
{
u32 secret[MD5_MESSAGE_BYTES / 4];
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
u32 i;
net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
secret[4] = net_secret[4] +
(((__force u16)sport << 16) + (__force u16)dport);
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
secret[i] = net_secret[i];
md5_transform(hash, secret);
seq = hash[0] | (((u64)hash[1]) << 32);
seq += ktime_to_ns(ktime_get_real());
seq &= (1ull << 48) - 1;
return seq;
}
EXPORT_SYMBOL(secure_dccpv6_sequence_number);
#endif
#endif