linux_dsm_epyc7002/net/phonet/pep.c

1376 lines
30 KiB
C
Raw Normal View History

/*
* File: pep.c
*
* Phonet pipe protocol end point socket
*
* Copyright (C) 2008 Nokia Corporation.
*
* Author: Rémi Denis-Courmont
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include <linux/kernel.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/socket.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <asm/ioctls.h>
#include <linux/phonet.h>
#include <linux/module.h>
#include <net/phonet/phonet.h>
#include <net/phonet/pep.h>
#include <net/phonet/gprs.h>
/* sk_state values:
* TCP_CLOSE sock not in use yet
* TCP_CLOSE_WAIT disconnected pipe
* TCP_LISTEN listening pipe endpoint
* TCP_SYN_RECV connected pipe in disabled state
* TCP_ESTABLISHED connected pipe in enabled state
*
* pep_sock locking:
* - sk_state, hlist: sock lock needed
* - listener: read only
* - pipe_handle: read only
*/
#define CREDITS_MAX 10
#define CREDITS_THR 7
#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */
/* Get the next TLV sub-block. */
static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen,
void *buf)
{
void *data = NULL;
struct {
u8 sb_type;
u8 sb_len;
} *ph, h;
int buflen = *plen;
ph = skb_header_pointer(skb, 0, 2, &h);
if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len))
return NULL;
ph->sb_len -= 2;
*ptype = ph->sb_type;
*plen = ph->sb_len;
if (buflen > ph->sb_len)
buflen = ph->sb_len;
data = skb_header_pointer(skb, 2, buflen, buf);
__skb_pull(skb, 2 + ph->sb_len);
return data;
}
static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload,
int len, gfp_t priority)
{
struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
if (!skb)
return NULL;
skb_set_owner_w(skb, sk);
skb_reserve(skb, MAX_PNPIPE_HEADER);
__skb_put(skb, len);
skb_copy_to_linear_data(skb, payload, len);
__skb_push(skb, sizeof(struct pnpipehdr));
skb_reset_transport_header(skb);
return skb;
}
static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code,
const void *data, int len, gfp_t priority)
{
const struct pnpipehdr *oph = pnp_hdr(oskb);
struct pnpipehdr *ph;
struct sk_buff *skb;
struct sockaddr_pn peer;
skb = pep_alloc_skb(sk, data, len, priority);
if (!skb)
return -ENOMEM;
ph = pnp_hdr(skb);
ph->utid = oph->utid;
ph->message_id = oph->message_id + 1; /* REQ -> RESP */
ph->pipe_handle = oph->pipe_handle;
ph->error_code = code;
pn_skb_get_src_sockaddr(oskb, &peer);
return pn_skb_send(sk, skb, &peer);
}
static int pep_indicate(struct sock *sk, u8 id, u8 code,
const void *data, int len, gfp_t priority)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *ph;
struct sk_buff *skb;
skb = pep_alloc_skb(sk, data, len, priority);
if (!skb)
return -ENOMEM;
ph = pnp_hdr(skb);
ph->utid = 0;
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
ph->data[0] = code;
return pn_skb_send(sk, skb, NULL);
}
#define PAD 0x00
static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
const void *data, int len)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *ph;
struct sk_buff *skb;
skb = pep_alloc_skb(sk, data, len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
ph = pnp_hdr(skb);
ph->utid = id; /* whatever */
ph->message_id = id;
ph->pipe_handle = pn->pipe_handle;
ph->data[0] = code;
return pn_skb_send(sk, skb, NULL);
}
static int pipe_handler_send_created_ind(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
u8 data[4] = {
PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2),
pn->tx_fc, pn->rx_fc,
};
return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */,
data, 4, GFP_ATOMIC);
}
static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
{
static const u8 data[20] = {
PAD, PAD, PAD, 2 /* sub-blocks */,
PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD,
PN_MULTI_CREDIT_FLOW_CONTROL,
PN_ONE_CREDIT_FLOW_CONTROL,
PN_LEGACY_FLOW_CONTROL,
PAD,
PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD,
PN_MULTI_CREDIT_FLOW_CONTROL,
PN_ONE_CREDIT_FLOW_CONTROL,
PN_LEGACY_FLOW_CONTROL,
PAD,
};
might_sleep();
return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data),
GFP_KERNEL);
}
static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code,
gfp_t priority)
{
static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ };
WARN_ON(code == PN_PIPE_NO_ERROR);
return pep_reply(sk, skb, code, data, sizeof(data), priority);
}
/* Control requests are not sent by the pipe service and have a specific
* message format. */
static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
gfp_t priority)
{
const struct pnpipehdr *oph = pnp_hdr(oskb);
struct sk_buff *skb;
struct pnpipehdr *ph;
struct sockaddr_pn dst;
u8 data[4] = {
oph->data[0], /* PEP type */
code, /* error code, at an unusual offset */
PAD, PAD,
};
skb = pep_alloc_skb(sk, data, 4, priority);
if (!skb)
return -ENOMEM;
ph = pnp_hdr(skb);
ph->utid = oph->utid;
ph->message_id = PNS_PEP_CTRL_RESP;
ph->pipe_handle = oph->pipe_handle;
ph->data[0] = oph->data[1]; /* CTRL id */
pn_skb_get_src_sockaddr(oskb, &dst);
return pn_skb_send(sk, skb, &dst);
}
static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
{
u8 data[4] = { type, PAD, PAD, status };
return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON,
data, 4, priority);
}
/* Send our RX flow control information to the sender.
* Socket must be locked. */
static void pipe_grant_credits(struct sock *sk, gfp_t priority)
{
struct pep_sock *pn = pep_sk(sk);
BUG_ON(sk->sk_state != TCP_ESTABLISHED);
switch (pn->rx_fc) {
case PN_LEGACY_FLOW_CONTROL: /* TODO */
break;
case PN_ONE_CREDIT_FLOW_CONTROL:
if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL,
PEP_IND_READY, priority) == 0)
pn->rx_credits = 1;
break;
case PN_MULTI_CREDIT_FLOW_CONTROL:
if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX)
break;
if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS,
CREDITS_MAX - pn->rx_credits,
priority) == 0)
pn->rx_credits = CREDITS_MAX;
break;
}
}
static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr;
int wake = 0;
if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
return -EINVAL;
hdr = pnp_hdr(skb);
if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
(unsigned int)hdr->data[0]);
return -EOPNOTSUPP;
}
switch (hdr->data[1]) {
case PN_PEP_IND_FLOW_CONTROL:
switch (pn->tx_fc) {
case PN_LEGACY_FLOW_CONTROL:
switch (hdr->data[4]) {
case PEP_IND_BUSY:
atomic_set(&pn->tx_credits, 0);
break;
case PEP_IND_READY:
atomic_set(&pn->tx_credits, wake = 1);
break;
}
break;
case PN_ONE_CREDIT_FLOW_CONTROL:
if (hdr->data[4] == PEP_IND_READY)
atomic_set(&pn->tx_credits, wake = 1);
break;
}
break;
case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
break;
atomic_add(wake = hdr->data[4], &pn->tx_credits);
break;
default:
net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
(unsigned int)hdr->data[1]);
return -EOPNOTSUPP;
}
if (wake)
sk->sk_write_space(sk);
return 0;
}
static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr = pnp_hdr(skb);
u8 n_sb = hdr->data[0];
pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
__skb_pull(skb, sizeof(*hdr));
while (n_sb > 0) {
u8 type, buf[2], len = sizeof(buf);
u8 *data = pep_get_sb(skb, &type, &len, buf);
if (data == NULL)
return -EINVAL;
switch (type) {
case PN_PIPE_SB_NEGOTIATED_FC:
if (len < 2 || (data[0] | data[1]) > 3)
break;
pn->tx_fc = data[0] & 3;
pn->rx_fc = data[1] & 3;
break;
}
n_sb--;
}
return 0;
}
/* Queue an skb to a connected sock.
* Socket lock must be held. */
static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr = pnp_hdr(skb);
struct sk_buff_head *queue;
int err = 0;
BUG_ON(sk->sk_state == TCP_CLOSE_WAIT);
switch (hdr->message_id) {
case PNS_PEP_CONNECT_REQ:
pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC);
break;
case PNS_PEP_DISCONNECT_REQ:
pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
sk->sk_state = TCP_CLOSE_WAIT;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
break;
case PNS_PEP_ENABLE_REQ:
/* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
break;
case PNS_PEP_RESET_REQ:
switch (hdr->state_after_reset) {
case PN_PIPE_DISABLE:
pn->init_enable = 0;
break;
case PN_PIPE_ENABLE:
pn->init_enable = 1;
break;
default: /* not allowed to send an error here!? */
err = -EINVAL;
goto out;
}
/* fall through */
case PNS_PEP_DISABLE_REQ:
atomic_set(&pn->tx_credits, 0);
pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
break;
case PNS_PEP_CTRL_REQ:
if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
atomic_inc(&sk->sk_drops);
break;
}
__skb_pull(skb, 4);
queue = &pn->ctrlreq_queue;
goto queue;
case PNS_PIPE_ALIGNED_DATA:
__skb_pull(skb, 1);
/* fall through */
case PNS_PIPE_DATA:
__skb_pull(skb, 3); /* Pipe data header */
if (!pn_flow_safe(pn->rx_fc)) {
err = sock_queue_rcv_skb(sk, skb);
if (!err)
return NET_RX_SUCCESS;
err = -ENOBUFS;
break;
}
if (pn->rx_credits == 0) {
atomic_inc(&sk->sk_drops);
err = -ENOBUFS;
break;
}
pn->rx_credits--;
queue = &sk->sk_receive_queue;
goto queue;
case PNS_PEP_STATUS_IND:
pipe_rcv_status(sk, skb);
break;
case PNS_PIPE_REDIRECTED_IND:
err = pipe_rcv_created(sk, skb);
break;
case PNS_PIPE_CREATED_IND:
err = pipe_rcv_created(sk, skb);
if (err)
break;
/* fall through */
case PNS_PIPE_RESET_IND:
if (!pn->init_enable)
break;
/* fall through */
case PNS_PIPE_ENABLED_IND:
if (!pn_flow_safe(pn->tx_fc)) {
atomic_set(&pn->tx_credits, 1);
sk->sk_write_space(sk);
}
if (sk->sk_state == TCP_ESTABLISHED)
break; /* Nothing to do */
sk->sk_state = TCP_ESTABLISHED;
pipe_grant_credits(sk, GFP_ATOMIC);
break;
case PNS_PIPE_DISABLED_IND:
sk->sk_state = TCP_SYN_RECV;
pn->rx_credits = 0;
break;
default:
net_dbg_ratelimited("Phonet unknown PEP message: %u\n",
hdr->message_id);
err = -EINVAL;
}
out:
kfree_skb(skb);
return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS;
queue:
skb->dev = NULL;
skb_set_owner_r(skb, sk);
skb_queue_tail(queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
}
/* Destroy connected sock. */
static void pipe_destruct(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&pn->ctrlreq_queue);
}
static u8 pipe_negotiate_fc(const u8 *fcs, unsigned int n)
{
unsigned int i;
u8 final_fc = PN_NO_FLOW_CONTROL;
for (i = 0; i < n; i++) {
u8 fc = fcs[i];
if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL)
final_fc = fc;
}
return final_fc;
}
static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr;
u8 n_sb;
if (!pskb_pull(skb, sizeof(*hdr) + 4))
return -EINVAL;
hdr = pnp_hdr(skb);
if (hdr->error_code != PN_PIPE_NO_ERROR)
return -ECONNREFUSED;
/* Parse sub-blocks */
n_sb = hdr->data[4];
while (n_sb > 0) {
u8 type, buf[6], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
if (data == NULL)
return -EINVAL;
switch (type) {
case PN_PIPE_SB_REQUIRED_FC_TX:
if (len < 2 || len < data[0])
break;
pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2);
break;
case PN_PIPE_SB_PREFERRED_FC_RX:
if (len < 2 || len < data[0])
break;
pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2);
break;
}
n_sb--;
}
return pipe_handler_send_created_ind(sk);
}
static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
{
struct pnpipehdr *hdr = pnp_hdr(skb);
if (hdr->error_code != PN_PIPE_NO_ERROR)
return -ECONNREFUSED;
return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
NULL, 0, GFP_ATOMIC);
}
static void pipe_start_flow_control(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
if (!pn_flow_safe(pn->tx_fc)) {
atomic_set(&pn->tx_credits, 1);
sk->sk_write_space(sk);
}
pipe_grant_credits(sk, GFP_ATOMIC);
}
/* Queue an skb to an actively connected sock.
* Socket lock must be held. */
static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *hdr = pnp_hdr(skb);
int err = NET_RX_SUCCESS;
switch (hdr->message_id) {
case PNS_PIPE_ALIGNED_DATA:
__skb_pull(skb, 1);
/* fall through */
case PNS_PIPE_DATA:
__skb_pull(skb, 3); /* Pipe data header */
if (!pn_flow_safe(pn->rx_fc)) {
err = sock_queue_rcv_skb(sk, skb);
if (!err)
return NET_RX_SUCCESS;
err = NET_RX_DROP;
break;
}
if (pn->rx_credits == 0) {
atomic_inc(&sk->sk_drops);
err = NET_RX_DROP;
break;
}
pn->rx_credits--;
skb->dev = NULL;
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
case PNS_PEP_CONNECT_RESP:
if (sk->sk_state != TCP_SYN_SENT)
break;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
if (pep_connresp_rcv(sk, skb)) {
sk->sk_state = TCP_CLOSE_WAIT;
break;
}
if (pn->init_enable == PN_PIPE_DISABLE)
sk->sk_state = TCP_SYN_RECV;
else {
sk->sk_state = TCP_ESTABLISHED;
pipe_start_flow_control(sk);
}
break;
case PNS_PEP_ENABLE_RESP:
if (sk->sk_state != TCP_SYN_SENT)
break;
if (pep_enableresp_rcv(sk, skb)) {
sk->sk_state = TCP_CLOSE_WAIT;
break;
}
sk->sk_state = TCP_ESTABLISHED;
pipe_start_flow_control(sk);
break;
case PNS_PEP_DISCONNECT_RESP:
/* sock should already be dead, nothing to do */
break;
case PNS_PEP_STATUS_IND:
pipe_rcv_status(sk, skb);
break;
}
kfree_skb(skb);
return err;
}
/* Listening sock must be locked */
static struct sock *pep_find_pipe(const struct hlist_head *hlist,
const struct sockaddr_pn *dst,
u8 pipe_handle)
{
struct sock *sknode;
u16 dobj = pn_sockaddr_get_object(dst);
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 08:06:00 +07:00
sk_for_each(sknode, hlist) {
struct pep_sock *pnnode = pep_sk(sknode);
/* Ports match, but addresses might not: */
if (pnnode->pn_sk.sobject != dobj)
continue;
if (pnnode->pipe_handle != pipe_handle)
continue;
if (sknode->sk_state == TCP_CLOSE_WAIT)
continue;
sock_hold(sknode);
return sknode;
}
return NULL;
}
/*
* Deliver an skb to a listening sock.
* Socket lock must be held.
* We then queue the skb to the right connected sock (if any).
*/
static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct sock *sknode;
struct pnpipehdr *hdr;
struct sockaddr_pn dst;
u8 pipe_handle;
if (!pskb_may_pull(skb, sizeof(*hdr)))
goto drop;
hdr = pnp_hdr(skb);
pipe_handle = hdr->pipe_handle;
if (pipe_handle == PN_PIPE_INVALID_HANDLE)
goto drop;
pn_skb_get_dst_sockaddr(skb, &dst);
/* Look for an existing pipe handle */
sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
if (sknode)
return sk_receive_skb(sknode, skb, 1);
switch (hdr->message_id) {
case PNS_PEP_CONNECT_REQ:
if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE,
GFP_ATOMIC);
break;
}
skb_queue_head(&sk->sk_receive_queue, skb);
sk_acceptq_added(sk);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk);
return NET_RX_SUCCESS;
case PNS_PEP_DISCONNECT_REQ:
pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
break;
case PNS_PEP_CTRL_REQ:
pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC);
break;
case PNS_PEP_RESET_REQ:
case PNS_PEP_ENABLE_REQ:
case PNS_PEP_DISABLE_REQ:
/* invalid handle is not even allowed here! */
break;
default:
if ((1 << sk->sk_state)
& ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT))
/* actively connected socket */
return pipe_handler_do_rcv(sk, skb);
}
drop:
kfree_skb(skb);
return NET_RX_SUCCESS;
}
static int pipe_do_remove(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *ph;
struct sk_buff *skb;
skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL);
if (!skb)
return -ENOMEM;
ph = pnp_hdr(skb);
ph->utid = 0;
ph->message_id = PNS_PIPE_REMOVE_REQ;
ph->pipe_handle = pn->pipe_handle;
ph->data[0] = PAD;
return pn_skb_send(sk, skb, NULL);
}
/* associated socket ceases to exist */
static void pep_sock_close(struct sock *sk, long timeout)
{
struct pep_sock *pn = pep_sk(sk);
int ifindex = 0;
sock_hold(sk); /* keep a reference after sk_common_release() */
sk_common_release(sk);
lock_sock(sk);
if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) {
if (sk->sk_backlog_rcv == pipe_do_rcv)
/* Forcefully remove dangling Phonet pipe */
pipe_do_remove(sk);
else
pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD,
NULL, 0);
}
sk->sk_state = TCP_CLOSE;
ifindex = pn->ifindex;
pn->ifindex = 0;
release_sock(sk);
if (ifindex)
gprs_detach(sk);
sock_put(sk);
}
static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
struct sk_buff *skb;
struct pnpipehdr *hdr;
struct sockaddr_pn dst, src;
int err;
u16 peer_type;
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
if (!skb)
return NULL;
lock_sock(sk);
if (sk->sk_state != TCP_LISTEN) {
err = -EINVAL;
goto drop;
}
sk_acceptq_removed(sk);
err = -EPROTO;
if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
goto drop;
hdr = pnp_hdr(skb);
pipe_handle = hdr->pipe_handle;
switch (hdr->state_after_connect) {
case PN_PIPE_DISABLE:
enabled = 0;
break;
case PN_PIPE_ENABLE:
enabled = 1;
break;
default:
pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM,
GFP_KERNEL);
goto drop;
}
peer_type = hdr->other_pep_type << 8;
/* Parse sub-blocks (options) */
n_sb = hdr->data[4];
while (n_sb > 0) {
u8 type, buf[1], len = sizeof(buf);
const u8 *data = pep_get_sb(skb, &type, &len, buf);
if (data == NULL)
goto drop;
switch (type) {
case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE:
if (len < 1)
goto drop;
peer_type = (peer_type & 0xff00) | data[0];
break;
case PN_PIPE_SB_ALIGNED_DATA:
aligned = data[0] != 0;
break;
}
n_sb--;
}
/* Check for duplicate pipe handle */
newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
if (unlikely(newsk)) {
__sock_put(newsk);
newsk = NULL;
pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL);
goto drop;
}
/* Create a new to-be-accepted sock */
newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
goto drop;
}
sock_init_data(NULL, newsk);
newsk->sk_state = TCP_SYN_RECV;
newsk->sk_backlog_rcv = pipe_do_rcv;
newsk->sk_protocol = sk->sk_protocol;
newsk->sk_destruct = pipe_destruct;
newpn = pep_sk(newsk);
pn_skb_get_dst_sockaddr(skb, &dst);
pn_skb_get_src_sockaddr(skb, &src);
newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst);
sock_hold(sk);
newpn->listener = sk;
skb_queue_head_init(&newpn->ctrlreq_queue);
newpn->pipe_handle = pipe_handle;
atomic_set(&newpn->tx_credits, 0);
newpn->ifindex = 0;
newpn->peer_type = peer_type;
newpn->rx_credits = 0;
newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
newpn->init_enable = enabled;
newpn->aligned = aligned;
err = pep_accept_conn(newsk, skb);
if (err) {
sock_put(newsk);
newsk = NULL;
goto drop;
}
sk_add_node(newsk, &pn->hlist);
drop:
release_sock(sk);
kfree_skb(skb);
*errp = err;
return newsk;
}
static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
{
struct pep_sock *pn = pep_sk(sk);
int err;
u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
pn->init_enable, data, 4);
if (err) {
pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
return err;
}
sk->sk_state = TCP_SYN_SENT;
return 0;
}
static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
{
int err;
err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
NULL, 0);
if (err)
return err;
sk->sk_state = TCP_SYN_SENT;
return 0;
}
static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
struct pep_sock *pn = pep_sk(sk);
int answ;
int ret = -ENOIOCTLCMD;
switch (cmd) {
case SIOCINQ:
if (sk->sk_state == TCP_LISTEN) {
ret = -EINVAL;
break;
}
lock_sock(sk);
if (sock_flag(sk, SOCK_URGINLINE) &&
!skb_queue_empty(&pn->ctrlreq_queue))
answ = skb_peek(&pn->ctrlreq_queue)->len;
else if (!skb_queue_empty(&sk->sk_receive_queue))
answ = skb_peek(&sk->sk_receive_queue)->len;
else
answ = 0;
release_sock(sk);
ret = put_user(answ, (int __user *)arg);
break;
case SIOCPNENABLEPIPE:
lock_sock(sk);
if (sk->sk_state == TCP_SYN_SENT)
ret = -EBUSY;
else if (sk->sk_state == TCP_ESTABLISHED)
ret = -EISCONN;
else
ret = pep_sock_enable(sk, NULL, 0);
release_sock(sk);
break;
}
return ret;
}
static int pep_init(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
sk->sk_destruct = pipe_destruct;
INIT_HLIST_HEAD(&pn->hlist);
pn->listener = NULL;
skb_queue_head_init(&pn->ctrlreq_queue);
atomic_set(&pn->tx_credits, 0);
pn->ifindex = 0;
pn->peer_type = 0;
pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
pn->rx_credits = 0;
pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
pn->init_enable = 1;
pn->aligned = 0;
return 0;
}
static int pep_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct pep_sock *pn = pep_sk(sk);
int val = 0, err = 0;
if (level != SOL_PNPIPE)
return -ENOPROTOOPT;
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
}
lock_sock(sk);
switch (optname) {
case PNPIPE_ENCAP:
if (val && val != PNPIPE_ENCAP_IP) {
err = -EINVAL;
break;
}
if (!pn->ifindex == !val)
break; /* Nothing to do! */
if (!capable(CAP_NET_ADMIN)) {
err = -EPERM;
break;
}
if (val) {
release_sock(sk);
err = gprs_attach(sk);
if (err > 0) {
pn->ifindex = err;
err = 0;
}
} else {
pn->ifindex = 0;
release_sock(sk);
gprs_detach(sk);
err = 0;
}
goto out_norel;
case PNPIPE_HANDLE:
if ((sk->sk_state == TCP_CLOSE) &&
(val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
pn->pipe_handle = val;
else
err = -EINVAL;
break;
case PNPIPE_INITSTATE:
pn->init_enable = !!val;
break;
default:
err = -ENOPROTOOPT;
}
release_sock(sk);
out_norel:
return err;
}
static int pep_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
struct pep_sock *pn = pep_sk(sk);
int len, val;
if (level != SOL_PNPIPE)
return -ENOPROTOOPT;
if (get_user(len, optlen))
return -EFAULT;
switch (optname) {
case PNPIPE_ENCAP:
val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
break;
case PNPIPE_IFINDEX:
val = pn->ifindex;
break;
case PNPIPE_HANDLE:
val = pn->pipe_handle;
if (val == PN_PIPE_INVALID_HANDLE)
return -EINVAL;
break;
case PNPIPE_INITSTATE:
val = pn->init_enable;
break;
default:
return -ENOPROTOOPT;
}
len = min_t(unsigned int, sizeof(int), len);
if (put_user(len, optlen))
return -EFAULT;
if (put_user(val, (int __user *) optval))
return -EFAULT;
return 0;
}
static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
{
struct pep_sock *pn = pep_sk(sk);
struct pnpipehdr *ph;
int err;
if (pn_flow_safe(pn->tx_fc) &&
!atomic_add_unless(&pn->tx_credits, -1, 0)) {
kfree_skb(skb);
return -ENOBUFS;
}
skb_push(skb, 3 + pn->aligned);
skb_reset_transport_header(skb);
ph = pnp_hdr(skb);
ph->utid = 0;
if (pn->aligned) {
ph->message_id = PNS_PIPE_ALIGNED_DATA;
ph->data[0] = 0; /* padding */
} else
ph->message_id = PNS_PIPE_DATA;
ph->pipe_handle = pn->pipe_handle;
err = pn_skb_send(sk, skb, NULL);
if (err && pn_flow_safe(pn->tx_fc))
atomic_inc(&pn->tx_credits);
return err;
}
static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
struct pep_sock *pn = pep_sk(sk);
struct sk_buff *skb;
long timeo;
int flags = msg->msg_flags;
int err, done;
phonet: Check input from user before allocating A phonet packet is limited to USHRT_MAX bytes, this is never checked during tx which means that the user can specify any size he wishes, and the kernel will attempt to allocate that size. In the good case, it'll lead to the following warning, but it may also cause the kernel to kick in the OOM and kill a random task on the server. [ 8921.744094] WARNING: at mm/page_alloc.c:2255 __alloc_pages_slowpath+0x65/0x730() [ 8921.749770] Pid: 5081, comm: trinity Tainted: G W 3.4.0-rc1-next-20120402-sasha #46 [ 8921.756672] Call Trace: [ 8921.758185] [<ffffffff810b2ba7>] warn_slowpath_common+0x87/0xb0 [ 8921.762868] [<ffffffff810b2be5>] warn_slowpath_null+0x15/0x20 [ 8921.765399] [<ffffffff8117eae5>] __alloc_pages_slowpath+0x65/0x730 [ 8921.769226] [<ffffffff81179c8a>] ? zone_watermark_ok+0x1a/0x20 [ 8921.771686] [<ffffffff8117d045>] ? get_page_from_freelist+0x625/0x660 [ 8921.773919] [<ffffffff8117f3a8>] __alloc_pages_nodemask+0x1f8/0x240 [ 8921.776248] [<ffffffff811c03e0>] kmalloc_large_node+0x70/0xc0 [ 8921.778294] [<ffffffff811c4bd4>] __kmalloc_node_track_caller+0x34/0x1c0 [ 8921.780847] [<ffffffff821b0e3c>] ? sock_alloc_send_pskb+0xbc/0x260 [ 8921.783179] [<ffffffff821b3c65>] __alloc_skb+0x75/0x170 [ 8921.784971] [<ffffffff821b0e3c>] sock_alloc_send_pskb+0xbc/0x260 [ 8921.787111] [<ffffffff821b002e>] ? release_sock+0x7e/0x90 [ 8921.788973] [<ffffffff821b0ff0>] sock_alloc_send_skb+0x10/0x20 [ 8921.791052] [<ffffffff824cfc20>] pep_sendmsg+0x60/0x380 [ 8921.792931] [<ffffffff824cb4a6>] ? pn_socket_bind+0x156/0x180 [ 8921.794917] [<ffffffff824cb50f>] ? pn_socket_autobind+0x3f/0x90 [ 8921.797053] [<ffffffff824cb63f>] pn_socket_sendmsg+0x4f/0x70 [ 8921.798992] [<ffffffff821ab8e7>] sock_aio_write+0x187/0x1b0 [ 8921.801395] [<ffffffff810e325e>] ? sub_preempt_count+0xae/0xf0 [ 8921.803501] [<ffffffff8111842c>] ? __lock_acquire+0x42c/0x4b0 [ 8921.805505] [<ffffffff821ab760>] ? __sock_recv_ts_and_drops+0x140/0x140 [ 8921.807860] [<ffffffff811e07cc>] do_sync_readv_writev+0xbc/0x110 [ 8921.809986] [<ffffffff811958e7>] ? might_fault+0x97/0xa0 [ 8921.811998] [<ffffffff817bd99e>] ? security_file_permission+0x1e/0x90 [ 8921.814595] [<ffffffff811e17e2>] do_readv_writev+0xe2/0x1e0 [ 8921.816702] [<ffffffff810b8dac>] ? do_setitimer+0x1ac/0x200 [ 8921.818819] [<ffffffff810e2ec1>] ? get_parent_ip+0x11/0x50 [ 8921.820863] [<ffffffff810e325e>] ? sub_preempt_count+0xae/0xf0 [ 8921.823318] [<ffffffff811e1926>] vfs_writev+0x46/0x60 [ 8921.825219] [<ffffffff811e1a3f>] sys_writev+0x4f/0xb0 [ 8921.827127] [<ffffffff82658039>] system_call_fastpath+0x16/0x1b [ 8921.829384] ---[ end trace dffe390f30db9eb7 ]--- Signed-off-by: Sasha Levin <levinsasha928@gmail.com> Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-04-05 19:07:45 +07:00
if (len > USHRT_MAX)
return -EMSGSIZE;
if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|
MSG_CMSG_COMPAT)) ||
!(msg->msg_flags & MSG_EOR))
return -EOPNOTSUPP;
skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
flags & MSG_DONTWAIT, &err);
if (!skb)
return err;
skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned);
err = memcpy_from_msg(skb_put(skb, len), msg, len);
if (err < 0)
goto outfree;
lock_sock(sk);
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) {
err = -ENOTCONN;
goto out;
}
if (sk->sk_state != TCP_ESTABLISHED) {
/* Wait until the pipe gets to enabled state */
disabled:
err = sk_stream_wait_connect(sk, &timeo);
if (err)
goto out;
if (sk->sk_state == TCP_CLOSE_WAIT) {
err = -ECONNRESET;
goto out;
}
}
BUG_ON(sk->sk_state != TCP_ESTABLISHED);
/* Wait until flow control allows TX */
done = atomic_read(&pn->tx_credits);
while (!done) {
DEFINE_WAIT(wait);
if (!timeo) {
err = -EAGAIN;
goto out;
}
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
goto out;
}
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
prepare_to_wait(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
net: sock_def_readable() and friends RCU conversion sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-29 18:01:49 +07:00
finish_wait(sk_sleep(sk), &wait);
if (sk->sk_state != TCP_ESTABLISHED)
goto disabled;
}
err = pipe_skb_send(sk, skb);
if (err >= 0)
err = len; /* success! */
skb = NULL;
out:
release_sock(sk);
outfree:
kfree_skb(skb);
return err;
}
int pep_writeable(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
return atomic_read(&pn->tx_credits);
}
int pep_write(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff *rskb, *fs;
int flen = 0;
if (pep_sk(sk)->aligned)
return pipe_skb_send(sk, skb);
rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
if (!rskb) {
kfree_skb(skb);
return -ENOMEM;
}
skb_shinfo(rskb)->frag_list = skb;
rskb->len += skb->len;
rskb->data_len += rskb->len;
rskb->truesize += rskb->len;
/* Avoid nested fragments */
skb_walk_frags(skb, fs)
flen += fs->len;
skb->next = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
skb->len -= flen;
skb->data_len -= flen;
skb->truesize -= flen;
skb_reserve(rskb, MAX_PHONET_HEADER + 3);
return pipe_skb_send(sk, rskb);
}
struct sk_buff *pep_read(struct sock *sk)
{
struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
if (sk->sk_state == TCP_ESTABLISHED)
pipe_grant_credits(sk, GFP_ATOMIC);
return skb;
}
static int pep_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len)
{
struct sk_buff *skb;
int err;
if (flags & ~(MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_WAITALL|
MSG_NOSIGNAL|MSG_CMSG_COMPAT))
return -EOPNOTSUPP;
if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE)))
return -ENOTCONN;
if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) {
/* Dequeue and acknowledge control request */
struct pep_sock *pn = pep_sk(sk);
if (flags & MSG_PEEK)
return -EOPNOTSUPP;
skb = skb_dequeue(&pn->ctrlreq_queue);
if (skb) {
pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR,
GFP_KERNEL);
msg->msg_flags |= MSG_OOB;
goto copy;
}
if (flags & MSG_OOB)
return -EINVAL;
}
skb = skb_recv_datagram(sk, flags, noblock, &err);
lock_sock(sk);
if (skb == NULL) {
if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT)
err = -ECONNRESET;
release_sock(sk);
return err;
}
if (sk->sk_state == TCP_ESTABLISHED)
pipe_grant_credits(sk, GFP_KERNEL);
release_sock(sk);
copy:
msg->msg_flags |= MSG_EOR;
if (skb->len > len)
msg->msg_flags |= MSG_TRUNC;
else
len = skb->len;
err = skb_copy_datagram_msg(skb, 0, msg, len);
if (!err)
err = (flags & MSG_TRUNC) ? skb->len : len;
skb_free_datagram(sk, skb);
return err;
}
static void pep_sock_unhash(struct sock *sk)
{
struct pep_sock *pn = pep_sk(sk);
struct sock *skparent = NULL;
lock_sock(sk);
if (pn->listener != NULL) {
skparent = pn->listener;
pn->listener = NULL;
release_sock(sk);
pn = pep_sk(skparent);
lock_sock(skparent);
sk_del_node_init(sk);
sk = skparent;
}
/* Unhash a listening sock only when it is closed
* and all of its active connected pipes are closed. */
if (hlist_empty(&pn->hlist))
pn_sock_unhash(&pn->pn_sk.sk);
release_sock(sk);
if (skparent)
sock_put(skparent);
}
static struct proto pep_proto = {
.close = pep_sock_close,
.accept = pep_sock_accept,
.connect = pep_sock_connect,
.ioctl = pep_ioctl,
.init = pep_init,
.setsockopt = pep_setsockopt,
.getsockopt = pep_getsockopt,
.sendmsg = pep_sendmsg,
.recvmsg = pep_recvmsg,
.backlog_rcv = pep_do_rcv,
.hash = pn_sock_hash,
.unhash = pep_sock_unhash,
.get_port = pn_sock_get_port,
.obj_size = sizeof(struct pep_sock),
.owner = THIS_MODULE,
.name = "PNPIPE",
};
static struct phonet_protocol pep_pn_proto = {
.ops = &phonet_stream_ops,
.prot = &pep_proto,
.sock_type = SOCK_SEQPACKET,
};
static int __init pep_register(void)
{
return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto);
}
static void __exit pep_unregister(void)
{
phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto);
}
module_init(pep_register);
module_exit(pep_unregister);
MODULE_AUTHOR("Remi Denis-Courmont, Nokia");
MODULE_DESCRIPTION("Phonet pipe protocol");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE);