diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 31947b9c21d6..d9a81dcef53e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,8 @@ struct xfrm_state { /* Last used time */ unsigned long lastused; + struct page_frag xfrag; + /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; @@ -343,7 +345,7 @@ struct xfrm_state_afinfo { int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); +struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { unsigned int family; diff --git a/net/core/dev.c b/net/core/dev.c index be11abac89b3..e61528c50209 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4593,6 +4593,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { skb_dst_drop(skb); + secpath_reset(skb); kmem_cache_free(skbuff_head_cache, skb); } else { __kfree_skb(skb); @@ -4633,6 +4634,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + secpath_reset(skb); napi->skb = skb; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index f2a71025a770..22377c8ff14b 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err) int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + if (err) + goto out; + work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, ihl); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 20fb25e3027b..b1e24446e297 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -18,6 +18,8 @@ #include #include +#include + struct esp_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; @@ -92,11 +94,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } +static void esp_ssg_unref(struct xfrm_state *x, void *tmp) +{ + struct esp_output_extra *extra = esp_tmp_extra(tmp); + struct crypto_aead *aead = x->data; + int extralen = 0; + u8 *iv; + struct aead_request *req; + struct scatterlist *sg; + + if (x->props.flags & XFRM_STATE_ESN) + extralen += sizeof(*extra); + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + + /* Unref skb_frag_pages in the src scatterlist if necessary. + * Skip the first sg which comes from skb->data. + */ + if (req->src != req->dst) + for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + put_page(sg_page(sg)); +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + void *tmp; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; - kfree(ESP_SKB_CB(skb)->tmp); + tmp = ESP_SKB_CB(skb)->tmp; + esp_ssg_unref(x, tmp); + kfree(tmp); xfrm_output_resume(skb, err); } @@ -120,6 +151,29 @@ static void esp_output_restore_header(struct sk_buff *skb) sizeof(__be32)); } +static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, + struct ip_esp_hdr *esph, + struct esp_output_extra *extra) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + } + + esph->spi = x->id.spi; + + return esph; +} + static void esp_output_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -128,18 +182,36 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) +{ + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; +} + static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { - int err; struct esp_output_extra *extra; + int err = -ENOMEM; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; - struct scatterlist *sg; + struct scatterlist *sg, *dsg; struct sk_buff *trailer; + struct page *page; void *tmp; u8 *iv; u8 *tail; + u8 *vaddr; int blksize; int clen; int alen; @@ -149,7 +221,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; int assoclen; int extralen; + int tailen; __be64 seqno; + __u8 proto = *skb_mac_header(skb); /* skb is pure payload to encrypt */ @@ -169,12 +243,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); plen = clen - skb->len - tfclen; - - err = skb_cow_data(skb, tfclen + plen + alen, &trailer); - if (err < 0) - goto error; - nfrags = err; - + tailen = tfclen + plen + alen; assoclen = sizeof(*esph); extralen = 0; @@ -183,35 +252,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, extralen); - if (!tmp) { - err = -ENOMEM; - goto error; - } - - extra = esp_tmp_extra(tmp); - iv = esp_tmp_iv(aead, tmp, extralen); - req = esp_tmp_req(aead, iv); - sg = esp_req_sg(aead, req); - - /* Fill padding... */ - tail = skb_tail_pointer(trailer); - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = *skb_mac_header(skb); - pskb_put(skb, trailer, clen - skb->len + alen); - - skb_push(skb, -skb_network_offset(skb)); - esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; + esph = ip_esp_hdr(skb); /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { @@ -230,7 +272,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len - skb_transport_offset(skb)); + uh->len = htons(skb->len + tailen + - skb_transport_offset(skb)); uh->check = 0; switch (encap_type) { @@ -248,31 +291,148 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_UDP; } - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + if (!skb_cloned(skb)) { + if (tailen <= skb_availroom(skb)) { + nfrags = 1; + trailer = skb; + tail = skb_tail_pointer(trailer); - aead_request_set_callback(req, 0, esp_output_done, skb); + goto skip_cow; + } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) + && !skb_has_frag_list(skb)) { + int allocsize; + struct sock *sk = skb->sk; + struct page_frag *pfrag = &x->xfrag; - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * encryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - extra->esphoff = (unsigned char *)esph - - skb_transport_header(skb); - esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); - extra->seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - aead_request_set_callback(req, 0, esp_output_done_esn, skb); + allocsize = ALIGN(tailen, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto cow; + } + + page = pfrag->page; + get_page(page); + + vaddr = kmap_atomic(page); + + tail = vaddr + pfrag->offset; + + esp_output_fill_trailer(tail, tfclen, plen, proto); + + kunmap_atomic(vaddr); + + nfrags = skb_shinfo(skb)->nr_frags; + + __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, + tailen); + skb_shinfo(skb)->nr_frags = ++nfrags; + + pfrag->offset = pfrag->offset + allocsize; + nfrags++; + + skb->len += tailen; + skb->data_len += tailen; + skb->truesize += tailen; + if (sk) + atomic_add(tailen, &sk->sk_wmem_alloc); + + skb_push(skb, -skb_network_offset(skb)); + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; + + tmp = esp_alloc_tmp(aead, nfrags + 2, extralen); + if (!tmp) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = &sg[nfrags]; + + esph = esp_output_set_extra(skb, esph, extra); + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + spin_unlock_bh(&x->lock); + + goto skip_cow2; + } } +cow: + err = skb_cow_data(skb, tailen, &trailer); + if (err < 0) + goto error; + nfrags = err; + tail = skb_tail_pointer(trailer); + esph = ip_esp_hdr(skb); + +skip_cow: + esp_output_fill_trailer(tail, tfclen, plen, proto); + + pskb_put(skb, trailer, clen - skb->len + alen); + skb_push(skb, -skb_network_offset(skb)); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); esph->spi = x->id.spi; + tmp = esp_alloc_tmp(aead, nfrags, extralen); + if (!tmp) { + err = -ENOMEM; + goto error; + } + + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = sg; + + esph = esp_output_set_extra(skb, esph, extra); + sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, assoclen + ivlen + clen + alen); - aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); +skip_cow2: + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_output_done, skb); + + aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + @@ -298,6 +458,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp_output_restore_header(skb); } + if (sg != dsg) + esp_ssg_unref(x, tmp); kfree(tmp); error: @@ -401,6 +563,23 @@ static void esp_input_restore_header(struct sk_buff *skb) __skb_pull(skb, 4); } +static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + } +} + static void esp_input_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -437,12 +616,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) if (elen <= 0) goto out; - err = skb_cow_data(skb, 0, &trailer); - if (err < 0) - goto out; - - nfrags = err; - assoclen = sizeof(*esph); seqhilen = 0; @@ -451,6 +624,26 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } + if (!skb_cloned(skb)) { + if (!skb_is_nonlinear(skb)) { + nfrags = 1; + + goto skip_cow; + } else if (!skb_has_frag_list(skb)) { + nfrags = skb_shinfo(skb)->nr_frags; + nfrags++; + + goto skip_cow; + } + } + + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) + goto out; + + nfrags = err; + +skip_cow: err = -ENOMEM; tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) @@ -462,27 +655,18 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - skb->ip_summed = CHECKSUM_NONE; - - esph = (struct ip_esp_hdr *)skb->data; - - aead_request_set_callback(req, 0, esp_input_done, skb); - - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * decryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); - *seqhi = esph->spi; - esph->spi = esph->seq_no; - esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; - aead_request_set_callback(req, 0, esp_input_done_esn, skb); - } + esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); + skb->ip_summed = CHECKSUM_NONE; + + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_input_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_input_done, skb); + aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 542074c00c78..d6660a8c0ea5 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -90,11 +90,3 @@ void __init xfrm4_state_init(void) { xfrm_state_register_afinfo(&xfrm4_state_afinfo); } - -#if 0 -void __exit xfrm4_state_fini(void) -{ - xfrm_state_unregister_afinfo(&xfrm4_state_afinfo); -} -#endif /* 0 */ - diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 189eb10b742d..dda6035e3b84 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -474,6 +474,9 @@ static void ah6_input_done(struct crypto_async_request *base, int err) int hdr_len = skb_network_header_len(skb); int ah_hlen = (ah->hdrlen + 2) << 2; + if (err) + goto out; + work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index cbcdd5db31f4..ff54faa75631 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -44,6 +44,8 @@ #include #include +#include + struct esp_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; @@ -114,11 +116,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } +static void esp_ssg_unref(struct xfrm_state *x, void *tmp) +{ + __be32 *seqhi; + struct crypto_aead *aead = x->data; + int seqhilen = 0; + u8 *iv; + struct aead_request *req; + struct scatterlist *sg; + + if (x->props.flags & XFRM_STATE_ESN) + seqhilen += sizeof(__be32); + + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); + req = esp_tmp_req(aead, iv); + + /* Unref skb_frag_pages in the src scatterlist if necessary. + * Skip the first sg which comes from skb->data. + */ + if (req->src != req->dst) + for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + put_page(sg_page(sg)); +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + void *tmp; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; - kfree(ESP_SKB_CB(skb)->tmp); + tmp = ESP_SKB_CB(skb)->tmp; + esp_ssg_unref(x, tmp); + kfree(tmp); xfrm_output_resume(skb, err); } @@ -138,6 +169,27 @@ static void esp_output_restore_header(struct sk_buff *skb) esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); } +static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, + struct ip_esp_hdr *esph, + __be32 *seqhi) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); + *seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + } + + esph->spi = x->id.spi; + + return esph; +} + static void esp_output_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -146,14 +198,31 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) +{ + /* Fill padding... */ + if (tfclen) { + memset(tail, 0, tfclen); + tail += tfclen; + } + do { + int i; + for (i = 0; i < plen - 2; i++) + tail[i] = i + 1; + } while (0); + tail[plen - 2] = plen - 2; + tail[plen - 1] = proto; +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; - struct scatterlist *sg; + struct scatterlist *sg, *dsg; struct sk_buff *trailer; + struct page *page; void *tmp; int blksize; int clen; @@ -164,10 +233,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int nfrags; int assoclen; int seqhilen; + int tailen; u8 *iv; u8 *tail; + u8 *vaddr; __be32 *seqhi; __be64 seqno; + __u8 proto = *skb_mac_header(skb); /* skb is pure payload to encrypt */ aead = x->data; @@ -186,11 +258,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(skb->len + 2 + tfclen, blksize); plen = clen - skb->len - tfclen; - - err = skb_cow_data(skb, tfclen + plen + alen, &trailer); - if (err < 0) - goto error; - nfrags = err; + tailen = tfclen + plen + alen; assoclen = sizeof(*esph); seqhilen = 0; @@ -200,6 +268,126 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } + *skb_mac_header(skb) = IPPROTO_ESP; + esph = ip_esp_hdr(skb); + + if (!skb_cloned(skb)) { + if (tailen <= skb_availroom(skb)) { + nfrags = 1; + trailer = skb; + tail = skb_tail_pointer(trailer); + + goto skip_cow; + } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS) + && !skb_has_frag_list(skb)) { + int allocsize; + struct sock *sk = skb->sk; + struct page_frag *pfrag = &x->xfrag; + + allocsize = ALIGN(tailen, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto cow; + } + + page = pfrag->page; + get_page(page); + + vaddr = kmap_atomic(page); + + tail = vaddr + pfrag->offset; + + esp_output_fill_trailer(tail, tfclen, plen, proto); + + kunmap_atomic(vaddr); + + nfrags = skb_shinfo(skb)->nr_frags; + + __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, + tailen); + skb_shinfo(skb)->nr_frags = ++nfrags; + + pfrag->offset = pfrag->offset + allocsize; + nfrags++; + + skb->len += tailen; + skb->data_len += tailen; + skb->truesize += tailen; + if (sk) + atomic_add(tailen, &sk->sk_wmem_alloc); + + skb_push(skb, -skb_network_offset(skb)); + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; + + tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen); + if (!tmp) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); + dsg = &sg[nfrags]; + + esph = esp_output_set_esn(skb, esph, seqhi); + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + err = -ENOMEM; + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + + spin_unlock_bh(&x->lock); + + goto skip_cow2; + } + } + +cow: + err = skb_cow_data(skb, tailen, &trailer); + if (err < 0) + goto error; + nfrags = err; + + tail = skb_tail_pointer(trailer); + esph = ip_esp_hdr(skb); + +skip_cow: + esp_output_fill_trailer(tail, tfclen, plen, proto); + + pskb_put(skb, trailer, clen - skb->len + alen); + skb_push(skb, -skb_network_offset(skb)); + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esph->spi = x->id.spi; + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) { err = -ENOMEM; @@ -210,49 +398,22 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); + dsg = sg; - /* Fill padding... */ - tail = skb_tail_pointer(trailer); - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = *skb_mac_header(skb); - pskb_put(skb, trailer, clen - skb->len + alen); - - skb_push(skb, -skb_network_offset(skb)); - esph = ip_esp_hdr(skb); - *skb_mac_header(skb) = IPPROTO_ESP; - - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - - aead_request_set_callback(req, 0, esp_output_done, skb); - - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * encryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - aead_request_set_callback(req, 0, esp_output_done_esn, skb); - } - - esph->spi = x->id.spi; + esph = esp_output_set_esn(skb, esph, seqhi); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, assoclen + ivlen + clen + alen); - aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); +skip_cow2: + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_output_done, skb); + + aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + @@ -278,6 +439,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp_output_restore_header(skb); } + if (sg != dsg) + esp_ssg_unref(x, tmp); kfree(tmp); error: @@ -343,6 +506,23 @@ static void esp_input_restore_header(struct sk_buff *skb) __skb_pull(skb, 4); } +static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; + } +} + static void esp_input_done_esn(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -378,14 +558,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - nfrags = skb_cow_data(skb, 0, &trailer); - if (nfrags < 0) { - ret = -EINVAL; - goto out; - } - - ret = -ENOMEM; - assoclen = sizeof(*esph); seqhilen = 0; @@ -394,6 +566,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) assoclen += seqhilen; } + if (!skb_cloned(skb)) { + if (!skb_is_nonlinear(skb)) { + nfrags = 1; + + goto skip_cow; + } else if (!skb_has_frag_list(skb)) { + nfrags = skb_shinfo(skb)->nr_frags; + nfrags++; + + goto skip_cow; + } + } + + nfrags = skb_cow_data(skb, 0, &trailer); + if (nfrags < 0) { + ret = -EINVAL; + goto out; + } + +skip_cow: + ret = -ENOMEM; tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; @@ -404,27 +597,18 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - skb->ip_summed = CHECKSUM_NONE; - - esph = (struct ip_esp_hdr *)skb->data; - - aead_request_set_callback(req, 0, esp_input_done, skb); - - /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after - * decryption. - */ - if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)skb_push(skb, 4); - *seqhi = esph->spi; - esph->spi = esph->seq_no; - esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; - aead_request_set_callback(req, 0, esp_input_done_esn, skb); - } + esp_input_set_header(skb, seqhi); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); + skb->ip_summed = CHECKSUM_NONE; + + if ((x->props.flags & XFRM_STATE_ESN)) + aead_request_set_callback(req, 0, esp_input_done_esn, skb); + else + aead_request_set_callback(req, 0, esp_input_done, skb); + aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 6e3f0254d8a1..3213fe8027be 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -21,6 +21,9 @@ static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; +static struct gro_cells gro_cells; +static struct net_device xfrm_napi_dev; + int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) { int err = 0; @@ -371,7 +374,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (decaps) { skb_dst_drop(skb); - netif_rx(skb); + gro_cells_receive(&gro_cells, skb); return 0; } else { return x->inner_mode->afinfo->transport_finish(skb, async); @@ -394,6 +397,13 @@ EXPORT_SYMBOL(xfrm_input_resume); void __init xfrm_input_init(void) { + int err; + + init_dummy_netdev(&xfrm_napi_dev); + err = gro_cells_init(&gro_cells, &xfrm_napi_dev); + if (err) + gro_cells.cells = NULL; + secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 637387bbaaea..8ba29fe58352 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -246,10 +246,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) return; afinfo = xfrm_state_get_afinfo(proto); - if (!afinfo) - return; - - afinfo->local_error(skb, mtu); - xfrm_state_put_afinfo(afinfo); + if (afinfo) + afinfo->local_error(skb, mtu); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xfrm_local_error); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 177e208e8ff5..99ad1af2927f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -330,7 +330,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) } EXPORT_SYMBOL(xfrm_policy_destroy); -/* Rule must be locked. Release descentant resources, announce +/* Rule must be locked. Release descendant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 64e3c82eedf6..5a597dbbe564 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -192,7 +192,7 @@ int xfrm_register_type(const struct xfrm_type *type, unsigned short family) else err = -EEXIST; spin_unlock_bh(&xfrm_type_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_type); @@ -213,7 +213,7 @@ int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) else typemap[type->proto] = NULL; spin_unlock_bh(&xfrm_type_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -231,17 +231,18 @@ static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) return NULL; typemap = afinfo->type_map; - type = typemap[proto]; + type = READ_ONCE(typemap[proto]); if (unlikely(type && !try_module_get(type->owner))) type = NULL; + + rcu_read_unlock(); + if (!type && !modload_attempted) { - xfrm_state_put_afinfo(afinfo); request_module("xfrm-type-%d-%d", family, proto); modload_attempted = 1; goto retry; } - xfrm_state_put_afinfo(afinfo); return type; } @@ -280,7 +281,7 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family) out: spin_unlock_bh(&xfrm_mode_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_register_mode); @@ -308,7 +309,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family) } spin_unlock_bh(&xfrm_mode_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_unregister_mode); @@ -327,17 +328,17 @@ static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) if (unlikely(afinfo == NULL)) return NULL; - mode = afinfo->mode_map[encap]; + mode = READ_ONCE(afinfo->mode_map[encap]); if (unlikely(mode && !try_module_get(mode->owner))) mode = NULL; + + rcu_read_unlock(); if (!mode && !modload_attempted) { - xfrm_state_put_afinfo(afinfo); request_module("xfrm-mode-%d-%d", family, encap); modload_attempted = 1; goto retry; } - xfrm_state_put_afinfo(afinfo); return mode; } @@ -409,7 +410,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) if (x->xflags & XFRM_SOFT_EXPIRE) { /* enter hard expire without soft expire first?! * setting a new date could trigger this. - * workarbound: fix x->curflt.add_time by below: + * workaround: fix x->curflt.add_time by below: */ x->curlft.add_time = now - x->saved_tmo - 1; tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; @@ -639,26 +640,25 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) } EXPORT_SYMBOL(xfrm_sad_getinfo); -static int +static void xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, const struct xfrm_tmpl *tmpl, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family); + if (!afinfo) - return -1; + return; + afinfo->init_tempsel(&x->sel, fl); if (family != tmpl->encap_family) { - xfrm_state_put_afinfo(afinfo); - afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family); if (!afinfo) - return -1; + return; } afinfo->init_temprop(x, tmpl, daddr, saddr); - xfrm_state_put_afinfo(afinfo); - return 0; } static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, @@ -1474,7 +1474,7 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); spin_unlock_bh(&net->xfrm.xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_tmpl_sort); @@ -1494,7 +1494,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); spin_unlock_bh(&net->xfrm.xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(xfrm_state_sort); @@ -1932,10 +1932,10 @@ EXPORT_SYMBOL(xfrm_unregister_km); int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + + if (WARN_ON(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -EEXIST; @@ -1948,14 +1948,14 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo); int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) { - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + int err = 0, family = afinfo->family; + + if (WARN_ON(family >= NPROTO)) return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_state_afinfo_lock); if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) + if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo) err = -EINVAL; else RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL); @@ -1966,6 +1966,14 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_state_unregister_afinfo); +struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family) +{ + if (unlikely(family >= NPROTO)) + return NULL; + + return rcu_dereference(xfrm_state_afinfo[family]); +} + struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { struct xfrm_state_afinfo *afinfo; @@ -1978,11 +1986,6 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) return afinfo; } -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) -{ - rcu_read_unlock(); -} - /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { @@ -2000,16 +2003,13 @@ EXPORT_SYMBOL(xfrm_state_delete_tunnel); int xfrm_state_mtu(struct xfrm_state *x, int mtu) { - int res; + const struct xfrm_type *type = READ_ONCE(x->type); - spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_VALID && - x->type && x->type->get_mtu) - res = x->type->get_mtu(x, mtu); - else - res = mtu - x->props.header_len; - spin_unlock_bh(&x->lock); - return res; + type && type->get_mtu) + return type->get_mtu(x, mtu); + + return mtu - x->props.header_len; } int __xfrm_init_state(struct xfrm_state *x, bool init_replay) @@ -2028,7 +2028,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) if (afinfo->init_flags) err = afinfo->init_flags(x); - xfrm_state_put_afinfo(afinfo); + rcu_read_unlock(); if (err) goto error;