linux_dsm_epyc7002/net/ipv4/esp4.c
David S. Miller 3639339553 ipv4: Handle PMTU in all ICMP error handlers.
With ip_rt_frag_needed() removed, we have to explicitly update PMTU
information in every ICMP error handler.

Create two helper functions to facilitate this.

1) ipv4_sk_update_pmtu()

   This updates the PMTU when we have a socket context to
   work with.

2) ipv4_update_pmtu()

   Raw version, used when no socket context is available.  For this
   interface, we essentially just pass in explicit arguments for
   the flow identity information we would have extracted from the
   socket.

   And you'll notice that ipv4_sk_update_pmtu() is simply implemented
   in terms of ipv4_update_pmtu()

Note that __ip_route_output_key() is used, rather than something like
ip_route_output_flow() or ip_route_output_key().  This is because we
absolutely do not want to end up with a route that does IPSEC
encapsulation and the like.  Instead, we only want the route that
would get us to the node described by the outermost IP header.

Reported-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-06-14 22:22:07 -07:00

729 lines
17 KiB
C

#define pr_fmt(fmt) "IPsec: " fmt
#include <crypto/aead.h>
#include <crypto/authenc.h>
#include <linux/err.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/esp.h>
#include <linux/scatterlist.h>
#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/in6.h>
#include <net/icmp.h>
#include <net/protocol.h>
#include <net/udp.h>
struct esp_skb_cb {
struct xfrm_skb_cb xfrm;
void *tmp;
};
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
/*
* Allocate an AEAD request structure with extra space for SG and IV.
*
* For alignment considerations the IV is placed at the front, followed
* by the request and finally the SG list.
*
* TODO: Use spare space in skb for this where possible.
*/
static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
{
unsigned int len;
len = seqhilen;
len += crypto_aead_ivsize(aead);
if (len) {
len += crypto_aead_alignmask(aead) &
~(crypto_tfm_ctx_alignment() - 1);
len = ALIGN(len, crypto_tfm_ctx_alignment());
}
len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
len = ALIGN(len, __alignof__(struct scatterlist));
len += sizeof(struct scatterlist) * nfrags;
return kmalloc(len, GFP_ATOMIC);
}
static inline __be32 *esp_tmp_seqhi(void *tmp)
{
return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
}
static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
{
return crypto_aead_ivsize(aead) ?
PTR_ALIGN((u8 *)tmp + seqhilen,
crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
}
static inline struct aead_givcrypt_request *esp_tmp_givreq(
struct crypto_aead *aead, u8 *iv)
{
struct aead_givcrypt_request *req;
req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
crypto_tfm_ctx_alignment());
aead_givcrypt_set_tfm(req, aead);
return req;
}
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
{
struct aead_request *req;
req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
crypto_tfm_ctx_alignment());
aead_request_set_tfm(req, aead);
return req;
}
static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
struct aead_request *req)
{
return (void *)ALIGN((unsigned long)(req + 1) +
crypto_aead_reqsize(aead),
__alignof__(struct scatterlist));
}
static inline struct scatterlist *esp_givreq_sg(
struct crypto_aead *aead, struct aead_givcrypt_request *req)
{
return (void *)ALIGN((unsigned long)(req + 1) +
crypto_aead_reqsize(aead),
__alignof__(struct scatterlist));
}
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
kfree(ESP_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
}
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_givcrypt_request *req;
struct scatterlist *sg;
struct scatterlist *asg;
struct esp_data *esp;
struct sk_buff *trailer;
void *tmp;
u8 *iv;
u8 *tail;
int blksize;
int clen;
int alen;
int plen;
int tfclen;
int nfrags;
int assoclen;
int sglists;
int seqhilen;
__be32 *seqhi;
/* skb is pure payload to encrypt */
err = -ENOMEM;
esp = x->data;
aead = esp->aead;
alen = crypto_aead_authsize(aead);
tfclen = 0;
if (x->tfcpad) {
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
tfclen = padto - skb->len;
}
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
if (esp->padlen)
clen = ALIGN(clen, esp->padlen);
plen = clen - skb->len - tfclen;
err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
if (err < 0)
goto error;
nfrags = err;
assoclen = sizeof(*esph);
sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
if (!tmp)
goto error;
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_givreq(aead, iv);
asg = esp_givreq_sg(aead, req);
sg = asg + sglists;
/* Fill padding... */
tail = skb_tail_pointer(trailer);
if (tfclen) {
memset(tail, 0, tfclen);
tail += tfclen;
}
do {
int i;
for (i = 0; i < plen - 2; i++)
tail[i] = i + 1;
} while (0);
tail[plen - 2] = plen - 2;
tail[plen - 1] = *skb_mac_header(skb);
pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh;
__be32 *udpdata32;
__be16 sport, dport;
int encap_type;
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
encap_type = encap->encap_type;
spin_unlock_bh(&x->lock);
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
uh->len = htons(skb->len - skb_transport_offset(skb));
uh->check = 0;
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
esph = (struct ip_esp_hdr *)(uh + 1);
break;
case UDP_ENCAP_ESPINUDP_NON_IKE:
udpdata32 = (__be32 *)(uh + 1);
udpdata32[0] = udpdata32[1] = 0;
esph = (struct ip_esp_hdr *)(udpdata32 + 2);
break;
}
*skb_mac_header(skb) = IPPROTO_UDP;
}
esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
clen + alen);
if ((x->props.flags & XFRM_STATE_ESN)) {
sg_init_table(asg, 3);
sg_set_buf(asg, &esph->spi, sizeof(__be32));
*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
sg_set_buf(asg + 1, seqhi, seqhilen);
sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
} else
sg_init_one(asg, esph, sizeof(*esph));
aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
aead_givcrypt_set_assoc(req, asg, assoclen);
aead_givcrypt_set_giv(req, esph->enc_data,
XFRM_SKB_CB(skb)->seq.output.low);
ESP_SKB_CB(skb)->tmp = tmp;
err = crypto_aead_givencrypt(req);
if (err == -EINPROGRESS)
goto error;
if (err == -EBUSY)
err = NET_XMIT_DROP;
kfree(tmp);
error:
return err;
}
static int esp_input_done2(struct sk_buff *skb, int err)
{
const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
struct esp_data *esp = x->data;
struct crypto_aead *aead = esp->aead;
int alen = crypto_aead_authsize(aead);
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int elen = skb->len - hlen;
int ihl;
u8 nexthdr[2];
int padlen;
kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
goto out;
if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
BUG();
err = -EINVAL;
padlen = nexthdr[0];
if (padlen + 2 + alen >= elen)
goto out;
/* ... check padding bits here. Silly. :-) */
iph = ip_hdr(skb);
ihl = iph->ihl * 4;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
/*
* 1) if the NAT-T peer's IP or port changed then
* advertize the change to the keying daemon.
* This is an inbound SA, so just compare
* SRC ports.
*/
if (iph->saddr != x->props.saddr.a4 ||
uh->source != encap->encap_sport) {
xfrm_address_t ipaddr;
ipaddr.a4 = iph->saddr;
km_new_mapping(x, &ipaddr, uh->source);
/* XXX: perhaps add an extra
* policy check here, to see
* if we should allow or
* reject a packet from a
* different source
* address/port.
*/
}
/*
* 2) ignore UDP/TCP checksums in case
* of NAT-T in Transport Mode, or
* perform other post-processing fixes
* as per draft-ietf-ipsec-udp-encaps-06,
* section 3.1.2
*/
if (x->props.mode == XFRM_MODE_TRANSPORT)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
pskb_trim(skb, skb->len - alen - padlen - 2);
__skb_pull(skb, hlen);
skb_set_transport_header(skb, -ihl);
err = nexthdr[1];
/* RFC4303: Drop dummy packets without any error */
if (err == IPPROTO_NONE)
err = -EINVAL;
out:
return err;
}
static void esp_input_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
xfrm_input_resume(skb, esp_input_done2(skb, err));
}
/*
* Note: detecting truncated vs. non-truncated authentication data is very
* expensive, so we only support truncated data, which is the recommended
* and common case.
*/
static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ip_esp_hdr *esph;
struct esp_data *esp = x->data;
struct crypto_aead *aead = esp->aead;
struct aead_request *req;
struct sk_buff *trailer;
int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
int nfrags;
int assoclen;
int sglists;
int seqhilen;
__be32 *seqhi;
void *tmp;
u8 *iv;
struct scatterlist *sg;
struct scatterlist *asg;
int err = -EINVAL;
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
goto out;
if (elen <= 0)
goto out;
if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
goto out;
nfrags = err;
assoclen = sizeof(*esph);
sglists = 1;
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
sglists += 2;
seqhilen += sizeof(__be32);
assoclen += seqhilen;
}
err = -ENOMEM;
tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
if (!tmp)
goto out;
ESP_SKB_CB(skb)->tmp = tmp;
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
asg = esp_req_sg(aead, req);
sg = asg + sglists;
skb->ip_summed = CHECKSUM_NONE;
esph = (struct ip_esp_hdr *)skb->data;
/* Get ivec. This can be wrong, check against another impls. */
iv = esph->enc_data;
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
if ((x->props.flags & XFRM_STATE_ESN)) {
sg_init_table(asg, 3);
sg_set_buf(asg, &esph->spi, sizeof(__be32));
*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
sg_set_buf(asg + 1, seqhi, seqhilen);
sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32));
} else
sg_init_one(asg, esph, sizeof(*esph));
aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen, iv);
aead_request_set_assoc(req, asg, assoclen);
err = crypto_aead_decrypt(req);
if (err == -EINPROGRESS)
goto out;
err = esp_input_done2(skb, err);
out:
return err;
}
static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
u32 align = max_t(u32, blksize, esp->padlen);
unsigned int net_adj;
switch (x->props.mode) {
case XFRM_MODE_TRANSPORT:
case XFRM_MODE_BEET:
net_adj = sizeof(struct iphdr);
break;
case XFRM_MODE_TUNNEL:
net_adj = 0;
break;
default:
BUG();
}
return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
net_adj) & ~(align - 1)) + (net_adj - 2);
}
static void esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
ntohl(esph->spi), ntohl(iph->daddr));
ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
xfrm_state_put(x);
}
static void esp_destroy(struct xfrm_state *x)
{
struct esp_data *esp = x->data;
if (!esp)
return;
crypto_free_aead(esp->aead);
kfree(esp);
}
static int esp_init_aead(struct xfrm_state *x)
{
struct esp_data *esp = x->data;
struct crypto_aead *aead;
int err;
aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
esp->aead = aead;
err = crypto_aead_setkey(aead, x->aead->alg_key,
(x->aead->alg_key_len + 7) / 8);
if (err)
goto error;
err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
if (err)
goto error;
error:
return err;
}
static int esp_init_authenc(struct xfrm_state *x)
{
struct esp_data *esp = x->data;
struct crypto_aead *aead;
struct crypto_authenc_key_param *param;
struct rtattr *rta;
char *key;
char *p;
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
err = -EINVAL;
if (x->ealg == NULL)
goto error;
err = -ENAMETOOLONG;
if ((x->props.flags & XFRM_STATE_ESN)) {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
"authencesn(%s,%s)",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
} else {
if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME,
"authenc(%s,%s)",
x->aalg ? x->aalg->alg_name : "digest_null",
x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
}
aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
esp->aead = aead;
keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
(x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
err = -ENOMEM;
key = kmalloc(keylen, GFP_KERNEL);
if (!key)
goto error;
p = key;
rta = (void *)p;
rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
rta->rta_len = RTA_LENGTH(sizeof(*param));
param = RTA_DATA(rta);
p += RTA_SPACE(sizeof(*param));
if (x->aalg) {
struct xfrm_algo_desc *aalg_desc;
memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
p += (x->aalg->alg_key_len + 7) / 8;
aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
BUG_ON(!aalg_desc);
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_aead_authsize(aead)) {
NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
x->aalg->alg_name,
crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits/8);
goto free_key;
}
err = crypto_aead_setauthsize(
aead, x->aalg->alg_trunc_len / 8);
if (err)
goto free_key;
}
param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
err = crypto_aead_setkey(aead, key, keylen);
free_key:
kfree(key);
error:
return err;
}
static int esp_init_state(struct xfrm_state *x)
{
struct esp_data *esp;
struct crypto_aead *aead;
u32 align;
int err;
esp = kzalloc(sizeof(*esp), GFP_KERNEL);
if (esp == NULL)
return -ENOMEM;
x->data = esp;
if (x->aead)
err = esp_init_aead(x);
else
err = esp_init_authenc(x);
if (err)
goto error;
aead = esp->aead;
esp->padlen = 0;
x->props.header_len = sizeof(struct ip_esp_hdr) +
crypto_aead_ivsize(aead);
if (x->props.mode == XFRM_MODE_TUNNEL)
x->props.header_len += sizeof(struct iphdr);
else if (x->props.mode == XFRM_MODE_BEET && x->sel.family != AF_INET6)
x->props.header_len += IPV4_BEET_PHMAXLEN;
if (x->encap) {
struct xfrm_encap_tmpl *encap = x->encap;
switch (encap->encap_type) {
default:
goto error;
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
case UDP_ENCAP_ESPINUDP_NON_IKE:
x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
break;
}
}
align = ALIGN(crypto_aead_blocksize(aead), 4);
if (esp->padlen)
align = max_t(u32, align, esp->padlen);
x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
error:
return err;
}
static const struct xfrm_type esp_type =
{
.description = "ESP4",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
.init_state = esp_init_state,
.destructor = esp_destroy,
.get_mtu = esp4_get_mtu,
.input = esp_input,
.output = esp_output
};
static const struct net_protocol esp4_protocol = {
.handler = xfrm4_rcv,
.err_handler = esp4_err,
.no_policy = 1,
.netns_ok = 1,
};
static int __init esp4_init(void)
{
if (xfrm_register_type(&esp_type, AF_INET) < 0) {
pr_info("%s: can't add xfrm type\n", __func__);
return -EAGAIN;
}
if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
pr_info("%s: can't add protocol\n", __func__);
xfrm_unregister_type(&esp_type, AF_INET);
return -EAGAIN;
}
return 0;
}
static void __exit esp4_fini(void)
{
if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
pr_info("%s: can't remove protocol\n", __func__);
if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
pr_info("%s: can't remove xfrm type\n", __func__);
}
module_init(esp4_init);
module_exit(esp4_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);