linux_dsm_epyc7002/net/sunrpc/socklib.c

330 lines
7.9 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/net/sunrpc/socklib.c
*
* Common socket helper routines for RPC client and server
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/compiler.h>
#include <linux/netdevice.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/gfp.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/pagemap.h>
#include <linux/udp.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/xdr.h>
#include <linux/export.h>
#include "socklib.h"
/*
* Helper structure for copying from an sk_buff.
*/
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
size_t count;
__wsum csum;
};
typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
size_t len);
/**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
* Possibly called several times to iterate over an sk_buff and copy
* data out of it.
*/
static size_t
xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
return 0;
desc->count -= len;
desc->offset += len;
return len;
}
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
* Same as skb_read_bits, but calculate a checksum at the same time.
*/
static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
unsigned int pos;
__wsum csum2;
if (len > desc->count)
len = desc->count;
pos = desc->offset;
csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
/**
* xdr_partial_copy_from_skb - copy data out of an skb
* @xdr: target XDR buffer
* @base: starting offset
* @desc: sk_buff copy helper
* @copy_actor: virtual method for copying data
*
*/
static ssize_t
xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
ssize_t copied = 0;
size_t ret;
len = xdr->head[0].iov_len;
if (base < len) {
len -= base;
ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
copied += ret;
if (ret != len || !desc->count)
goto out;
base = 0;
} else
base -= len;
if (unlikely(pglen == 0))
goto copy_tail;
if (unlikely(base >= pglen)) {
base -= pglen;
goto copy_tail;
}
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
ppage += base >> PAGE_SHIFT;
base &= ~PAGE_MASK;
}
do {
char *kaddr;
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
copied = -ENOMEM;
goto out;
}
}
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 19:29:47 +07:00
len = PAGE_SIZE;
kaddr = kmap_atomic(*ppage);
if (base) {
len -= base;
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr + base, len);
base = 0;
} else {
if (pglen < len)
len = pglen;
ret = copy_actor(desc, kaddr, len);
}
flush_dcache_page(*ppage);
kunmap_atomic(kaddr);
copied += ret;
if (ret != len || !desc->count)
goto out;
ppage++;
} while ((pglen -= len) != 0);
copy_tail:
len = xdr->tail[0].iov_len;
if (base < len)
copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
out:
return copied;
}
/**
* csum_partial_copy_to_xdr - checksum and copy data
* @xdr: target XDR buffer
* @skb: source skb
*
* We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
struct xdr_skb_reader desc;
desc.skb = skb;
desc.offset = 0;
desc.count = skb->len - desc.offset;
if (skb_csum_unnecessary(skb))
goto no_checksum;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
return -1;
if (desc.offset != skb->len) {
__wsum csum2;
csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
}
if (desc.count)
return -1;
if (csum_fold(desc.csum))
return -1;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev, skb);
return 0;
no_checksum:
if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
return -1;
if (desc.count)
return -1;
return 0;
}
EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
size_t seek)
{
if (seek)
iov_iter_advance(&msg->msg_iter, seek);
return sock_sendmsg(sock, msg);
}
static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t seek)
{
iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
return xprt_sendmsg(sock, msg, seek);
}
static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, size_t base)
{
int err;
err = xdr_alloc_bvec(xdr, GFP_KERNEL);
if (err < 0)
return err;
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
xdr->page_len + xdr->page_base);
return xprt_sendmsg(sock, msg, base + xdr->page_base);
}
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
rpc_fraghdr marker, struct kvec *vec,
size_t base)
{
struct kvec iov[2] = {
[0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
[1] = *vec,
};
size_t len = iov[0].iov_len + iov[1].iov_len;
iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
return xprt_sendmsg(sock, msg, base);
}
/**
* xprt_sock_sendmsg - write an xdr_buf directly to a socket
* @sock: open socket to send on
* @msg: socket message metadata
* @xdr: xdr_buf containing this request
* @base: starting position in the buffer
* @marker: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
* Return values:
* On success, returns zero and fills in @sent_p.
* %-ENOTSOCK if @sock is not a struct socket.
*/
int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
struct xdr_buf *xdr, unsigned int base,
rpc_fraghdr marker, unsigned int *sent_p)
{
unsigned int rmsize = marker ? sizeof(marker) : 0;
unsigned int remainder = rmsize + xdr->len - base;
unsigned int want;
int err = 0;
*sent_p = 0;
if (unlikely(!sock))
return -ENOTSOCK;
msg->msg_flags |= MSG_MORE;
want = xdr->head[0].iov_len + rmsize;
if (base < want) {
unsigned int len = want - base;
remainder -= len;
if (remainder == 0)
msg->msg_flags &= ~MSG_MORE;
if (rmsize)
err = xprt_send_rm_and_kvec(sock, msg, marker,
&xdr->head[0], base);
else
err = xprt_send_kvec(sock, msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else {
base -= want;
}
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
if (remainder == 0)
msg->msg_flags &= ~MSG_MORE;
err = xprt_send_pagedata(sock, msg, xdr, base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else {
base -= xdr->page_len;
}
if (base >= xdr->tail[0].iov_len)
return 0;
msg->msg_flags &= ~MSG_MORE;
err = xprt_send_kvec(sock, msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
err = 0;
}
return err;
}