linux_dsm_epyc7002/net/mptcp/pm.c
Paolo Abeni 01cacb00b3 mptcp: add netlink-based PM
Expose a new netlink family to userspace to control the PM, setting:

 - list of local addresses to be signalled.
 - list of local addresses used to created subflows.
 - maximum number of add_addr option to react

When the msk is fully established, the PM netlink attempts to
announce the 'signal' list via the ADD_ADDR option. Since we
currently lack the ADD_ADDR echo (and related event) only the
first addr is sent.

After exhausting the 'announce' list, the PM tries to create
subflow for each addr in 'local' list, waiting for each
connection to be completed before attempting the next one.

Idea is to add an additional PM hook for ADD_ADDR echo, to allow
the PM netlink announcing multiple addresses, in sequence.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-03-29 22:14:49 -07:00

243 lines
5.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP
*
* Copyright (c) 2019, Intel Corporation.
*/
#include <linux/kernel.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
static struct workqueue_struct *pm_wq;
/* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
pr_debug("msk=%p, local_id=%d", msk, addr->id);
msk->pm.local = *addr;
WRITE_ONCE(msk->pm.addr_signal, true);
return 0;
}
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id)
{
return -ENOTSUPP;
}
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id)
{
return -ENOTSUPP;
}
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
int ret;
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
pm->subflows_max, READ_ONCE(pm->accept_subflow));
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->accept_subflow))
return false;
spin_lock_bh(&pm->lock);
ret = pm->subflows < pm->subflows_max;
if (ret && ++pm->subflows == pm->subflows_max)
WRITE_ONCE(pm->accept_subflow, false);
spin_unlock_bh(&pm->lock);
return ret;
}
/* return true if the new status bit is currently cleared, that is, this event
* can be server, eventually by an already scheduled work
*/
static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
enum mptcp_pm_status new_status)
{
pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
BIT(new_status));
if (msk->pm.status & BIT(new_status))
return false;
msk->pm.status |= BIT(new_status);
if (queue_work(pm_wq, &msk->pm.work))
sock_hold((struct sock *)msk);
return true;
}
void mptcp_pm_fully_established(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p", msk);
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->work_pending))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p", msk);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p", msk);
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->work_pending))
mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
spin_unlock_bh(&pm->lock);
}
void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id)
{
pr_debug("msk=%p", msk);
}
void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
/* avoid acquiring the lock if there is no room for fouther addresses */
if (!READ_ONCE(pm->accept_addr))
return;
spin_lock_bh(&pm->lock);
/* be sure there is something to signal re-checking under PM lock */
if (READ_ONCE(pm->accept_addr) &&
mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED))
pm->remote = *addr;
spin_unlock_bh(&pm->lock);
}
/* path manager helpers */
bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_addr_info *saddr)
{
int ret = false;
spin_lock_bh(&msk->pm.lock);
/* double check after the lock is acquired */
if (!mptcp_pm_should_signal(msk))
goto out_unlock;
if (remaining < mptcp_add_addr_len(msk->pm.local.family))
goto out_unlock;
*saddr = msk->pm.local;
WRITE_ONCE(msk->pm.addr_signal, false);
ret = true;
out_unlock:
spin_unlock_bh(&msk->pm.lock);
return ret;
}
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
{
return mptcp_pm_nl_get_local_id(msk, skc);
}
static void pm_worker(struct work_struct *work)
{
struct mptcp_pm_data *pm = container_of(work, struct mptcp_pm_data,
work);
struct mptcp_sock *msk = container_of(pm, struct mptcp_sock, pm);
struct sock *sk = (struct sock *)msk;
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
mptcp_pm_nl_fully_established(msk);
}
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
mptcp_pm_nl_subflow_established(msk);
}
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
sock_put(sk);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
msk->pm.add_addr_accepted = 0;
msk->pm.local_addr_used = 0;
msk->pm.subflows = 0;
WRITE_ONCE(msk->pm.work_pending, false);
WRITE_ONCE(msk->pm.addr_signal, false);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
INIT_WORK(&msk->pm.work, pm_worker);
mptcp_pm_nl_data_init(msk);
}
void mptcp_pm_close(struct mptcp_sock *msk)
{
if (cancel_work_sync(&msk->pm.work))
sock_put((struct sock *)msk);
}
void mptcp_pm_init(void)
{
pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
if (!pm_wq)
panic("Failed to allocate workqueue");
mptcp_pm_nl_init();
}