Merge branch 'ovs-sample-action-optimization'

Andy Zhou says:

====================
net-next sample action optimization v4

The sample action can be used for translating Openflow 'clone' action.
However its implementation has not been sufficiently optimized for this
use case. This series attempts to close the gap.

Patch 3 commit message has more details on the specific optimizations
implemented.

---
v3->v4: Enhance patch 4.
        Fix two bugs pointed out by Pravin,
        Remove 'is_sample' variable.

v2->v3: Enhance patch 4, Rafctor to move more common logic to clone_execute().

v1->v2: Address Pravin's comment, Refactor recirc and sample
        to share more common code
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-03-22 11:28:35 -07:00
commit b2a1674aa1
4 changed files with 261 additions and 164 deletions

View File

@ -578,10 +578,25 @@ enum ovs_sample_attr {
OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
__OVS_SAMPLE_ATTR_MAX,
#ifdef __KERNEL__
OVS_SAMPLE_ATTR_ARG /* struct sample_arg */
#endif
};
#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
#ifdef __KERNEL__
struct sample_arg {
bool exec; /* When true, actions in sample will not
* change flow keys. False otherwise.
*/
u32 probability; /* Same value as
* 'OVS_SAMPLE_ATTR_PROBABILITY'.
*/
};
#endif
/**
* enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
* @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2014 Nicira, Inc.
* Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -44,13 +44,10 @@
#include "conntrack.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr, int len);
struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
int actions_len;
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
@ -82,14 +79,31 @@ struct action_fifo {
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
struct recirc_keys {
struct action_flow_keys {
struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};
static struct action_fifo __percpu *action_fifos;
static struct recirc_keys __percpu *recirc_keys;
static struct action_flow_keys __percpu *flow_keys;
static DEFINE_PER_CPU(int, exec_actions_level);
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
int level = this_cpu_read(exec_actions_level);
struct sw_flow_key *key = NULL;
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
key = &keys->key[level - 1];
*key = *key_;
}
return key;
}
static void action_fifo_init(struct action_fifo *fifo)
{
fifo->head = 0;
@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct sw_flow_key *key,
const struct nlattr *attr)
const struct sw_flow_key *key,
const struct nlattr *actions,
const int actions_len)
{
struct action_fifo *fifo;
struct deferred_action *da;
@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
da->actions = attr;
da->actions = actions;
da->actions_len = actions_len;
da->pkt_key = *key;
}
@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
return !(key->mac_proto & SW_FLOW_KEY_INVALID);
}
static int clone_execute(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
u32 recirc_id,
const struct nlattr *actions, int len,
bool last, bool clone_flow_key);
static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
__be16 ethertype)
{
@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
}
/* When 'last' is true, sample() should always consume the 'skb'.
* Otherwise, sample() should keep 'skb' intact regardless what
* actions are executed within sample().
*/
static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
bool last)
{
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
int rem;
u32 cutlen = 0;
struct nlattr *actions;
struct nlattr *sample_arg;
int rem = nla_len(attr);
const struct sample_arg *arg;
bool clone_flow_key;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
u32 probability;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
switch (nla_type(a)) {
case OVS_SAMPLE_ATTR_PROBABILITY:
probability = nla_get_u32(a);
if (!probability || prandom_u32() > probability)
return 0;
break;
case OVS_SAMPLE_ATTR_ACTIONS:
acts_list = a;
break;
}
}
rem = nla_len(acts_list);
a = nla_data(acts_list);
/* Actions list is empty, do nothing */
if (unlikely(!rem))
if ((arg->probability != U32_MAX) &&
(!arg->probability || prandom_u32() > arg->probability)) {
if (last)
consume_skb(skb);
return 0;
/* The only known usage of sample action is having a single user-space
* action, or having a truncate action followed by a single user-space
* action. Treat this usage as a special case.
* The output_userspace() should clone the skb to be sent to the
* user space. This skb will be consumed by its caller.
*/
if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
struct ovs_action_trunc *trunc = nla_data(a);
if (skb->len > trunc->max_len)
cutlen = skb->len - trunc->max_len;
a = nla_next(a, &rem);
}
if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
nla_is_last(a, rem)))
return output_userspace(dp, skb, key, a, actions,
actions_len, cutlen);
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb)
/* Skip the sample action when out of memory. */
return 0;
if (!add_deferred_actions(skb, key, a)) {
if (net_ratelimit())
pr_warn("%s: deferred actions limit reached, dropping sample action\n",
ovs_dp_name(dp));
kfree_skb(skb);
}
return 0;
clone_flow_key = !arg->exec;
return clone_execute(dp, skb, key, 0, actions, rem, last,
clone_flow_key);
}
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb,
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *a, int rem)
const struct nlattr *a, bool last)
{
struct deferred_action *da;
int level;
u32 recirc_id;
if (!is_flow_key_valid(key)) {
int err;
@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
}
BUG_ON(!is_flow_key_valid(key));
if (!nla_is_last(a, rem)) {
/* Recirc action is the not the last action
* of the action list, need to clone the skb.
*/
skb = skb_clone(skb, GFP_ATOMIC);
/* Skip the recirc action when out of memory, but
* continue on with the rest of the action list.
*/
if (!skb)
return 0;
}
level = this_cpu_read(exec_actions_level);
if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
struct sw_flow_key *recirc_key = &rks->key[level - 1];
*recirc_key = *key;
recirc_key->recirc_id = nla_get_u32(a);
ovs_dp_process_packet(skb, recirc_key);
return 0;
}
da = add_deferred_actions(skb, key, NULL);
if (da) {
da->pkt_key.recirc_id = nla_get_u32(a);
} else {
kfree_skb(skb);
if (net_ratelimit())
pr_warn("%s: deferred action limit reached, drop recirc action\n",
ovs_dp_name(dp));
}
return 0;
recirc_id = nla_get_u32(a);
return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
}
/* Execute a list of actions against 'skb'. */
@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb, key);
break;
case OVS_ACTION_ATTR_RECIRC:
err = execute_recirc(dp, skb, key, a, rem);
if (nla_is_last(a, rem)) {
case OVS_ACTION_ATTR_RECIRC: {
bool last = nla_is_last(a, rem);
err = execute_recirc(dp, skb, key, a, last);
if (last) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err;
}
break;
}
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, key, nla_data(a));
@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_masked_set_action(skb, key, nla_data(a));
break;
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a, attr, len);
case OVS_ACTION_ATTR_SAMPLE: {
bool last = nla_is_last(a, rem);
err = sample(dp, skb, key, a, last);
if (last)
return err;
break;
}
case OVS_ACTION_ATTR_CT:
if (!is_flow_key_valid(key)) {
@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return 0;
}
/* Execute the actions on the clone of the packet. The effect of the
* execution does not affect the original 'skb' nor the original 'key'.
*
* The execution may be deferred in case the actions can not be executed
* immediately.
*/
static int clone_execute(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, u32 recirc_id,
const struct nlattr *actions, int len,
bool last, bool clone_flow_key)
{
struct deferred_action *da;
struct sw_flow_key *clone;
skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
if (!skb) {
/* Out of memory, skip this action.
*/
return 0;
}
/* When clone_flow_key is false, the 'key' will not be change
* by the actions, then the 'key' can be used directly.
* Otherwise, try to clone key from the next recursion level of
* 'flow_keys'. If clone is successful, execute the actions
* without deferring.
*/
clone = clone_flow_key ? clone_key(key) : key;
if (clone) {
int err = 0;
if (actions) { /* Sample action */
if (clone_flow_key)
__this_cpu_inc(exec_actions_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
__this_cpu_dec(exec_actions_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
}
return err;
}
/* Out of 'flow_keys' space. Defer actions */
da = add_deferred_actions(skb, key, actions, len);
if (da) {
if (!actions) { /* Recirc action */
key = &da->pkt_key;
key->recirc_id = recirc_id;
}
} else {
/* Out of per CPU action FIFO space. Drop the 'skb' and
* log an error.
*/
kfree_skb(skb);
if (net_ratelimit()) {
if (actions) { /* Sample action */
pr_warn("%s: deferred action limit reached, drop sample action\n",
ovs_dp_name(dp));
} else { /* Recirc action */
pr_warn("%s: deferred action limit reached, drop recirc action\n",
ovs_dp_name(dp));
}
}
}
return 0;
}
static void process_deferred_actions(struct datapath *dp)
{
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp)
struct sk_buff *skb = da->skb;
struct sw_flow_key *key = &da->pkt_key;
const struct nlattr *actions = da->actions;
int actions_len = da->actions_len;
if (actions)
do_execute_actions(dp, skb, key, actions,
nla_len(actions));
do_execute_actions(dp, skb, key, actions, actions_len);
else
ovs_dp_process_packet(skb, key);
} while (!action_fifo_is_empty(fifo));
@ -1323,8 +1354,8 @@ int action_fifos_init(void)
if (!action_fifos)
return -ENOMEM;
recirc_keys = alloc_percpu(struct recirc_keys);
if (!recirc_keys) {
flow_keys = alloc_percpu(struct action_flow_keys);
if (!flow_keys) {
free_percpu(action_fifos);
return -ENOMEM;
}
@ -1335,5 +1366,5 @@ int action_fifos_init(void)
void action_fifos_exit(void)
{
free_percpu(action_fifos);
free_percpu(recirc_keys);
free_percpu(flow_keys);
}

View File

@ -34,8 +34,6 @@
#define DP_MAX_PORTS USHRT_MAX
#define DP_VPORT_HASH_BUCKETS 1024
#define SAMPLE_ACTION_DEPTH 3
/**
* struct dp_stats_percpu - per-cpu packet processing statistics for a given
* datapath.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2014 Nicira, Inc.
* Copyright (c) 2007-2017 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -59,6 +59,39 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
static bool actions_may_change_flow(const struct nlattr *actions)
{
struct nlattr *nla;
int rem;
nla_for_each_nested(nla, actions, rem) {
u16 action = nla_type(nla);
switch (action) {
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
break;
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_PUSH_MPLS:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
default:
return true;
}
}
return false;
}
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
@ -2021,18 +2054,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
__be16 eth_type, __be16 vlan_tci,
bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
const struct nlattr *a;
int rem, start, err, st_acts;
int rem, start, err;
struct sample_arg arg;
memset(attrs, 0, sizeof(attrs));
nla_for_each_nested(a, attr, rem) {
@ -2056,20 +2091,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
nla_data(probability), sizeof(u32), log);
/* When both skb and flow may be changed, put the sample
* into a deferred fifo. On the other hand, if only skb
* may be modified, the actions can be executed in place.
*
* Do this analysis at the flow installation time.
* Set 'clone_action->exec' to true if the actions can be
* executed without being deferred.
*
* If the sample is the last action, it can always be excuted
* rather than deferred.
*/
arg.exec = last || !actions_may_change_flow(actions);
arg.probability = nla_get_u32(probability);
err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
log);
if (err)
return err;
st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
err = __ovs_nla_copy_actions(net, actions, key, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
add_nested_action_end(*sfa, st_acts);
add_nested_action_end(*sfa, start);
return 0;
@ -2406,16 +2453,13 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH)
return -EOVERFLOW;
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@ -2553,13 +2597,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
break;
case OVS_ACTION_ATTR_SAMPLE:
err = validate_and_copy_sample(net, a, key, depth, sfa,
eth_type, vlan_tci, log);
case OVS_ACTION_ATTR_SAMPLE: {
bool last = nla_is_last(a, rem);
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
log, last);
if (err)
return err;
skip_copy = true;
break;
}
case OVS_ACTION_ATTR_CT:
err = ovs_ct_copy_action(net, a, key, sfa, log);
@ -2613,7 +2661,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return PTR_ERR(*sfa);
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
key->eth.vlan.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@ -2621,39 +2669,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return err;
}
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
static int sample_action_to_attr(const struct nlattr *attr,
struct sk_buff *skb)
{
const struct nlattr *a;
struct nlattr *start;
int err = 0, rem;
struct nlattr *start, *ac_start = NULL, *sample_arg;
int err = 0, rem = nla_len(attr);
const struct sample_arg *arg;
struct nlattr *actions;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
if (!start)
return -EMSGSIZE;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
struct nlattr *st_sample;
sample_arg = nla_data(attr);
arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem);
switch (type) {
case OVS_SAMPLE_ATTR_PROBABILITY:
if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
sizeof(u32), nla_data(a)))
return -EMSGSIZE;
break;
case OVS_SAMPLE_ATTR_ACTIONS:
st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
if (!st_sample)
return -EMSGSIZE;
err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
if (err)
return err;
nla_nest_end(skb, st_sample);
break;
}
if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
err = -EMSGSIZE;
goto out;
}
ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
if (!ac_start) {
err = -EMSGSIZE;
goto out;
}
err = ovs_nla_put_actions(actions, rem, skb);
out:
if (err) {
nla_nest_cancel(skb, ac_start);
nla_nest_cancel(skb, start);
} else {
nla_nest_end(skb, ac_start);
nla_nest_end(skb, start);
}
nla_nest_end(skb, start);
return err;
}