Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch

Pravin B Shelar says:

====================
Open vSwitch

A set of OVS changes for net-next/3.16.

Most of change are related to improving performance of flow setup by
minimizing critical sections.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-05-23 14:45:18 -04:00
commit 915f15ca52
6 changed files with 566 additions and 471 deletions

View File

@ -395,7 +395,9 @@ struct ovs_key_nd {
* @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
* the actions to take for packets that match the key. Always present in
* notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
* %OVS_FLOW_CMD_SET requests.
* %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without
* %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions,
* an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
* @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
* flow. Present in notifications if the stats would be nonzero. Ignored in
* requests.

File diff suppressed because it is too large Load Diff

View File

@ -64,17 +64,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
{
struct flow_stats *stats;
__be16 tcp_flags = 0;
__be16 tcp_flags = flow->key.tp.flags;
int node = numa_node_id();
stats = rcu_dereference(flow->stats[node]);
if (likely(flow->key.ip.proto == IPPROTO_TCP)) {
if (likely(flow->key.eth.type == htons(ETH_P_IP)))
tcp_flags = flow->key.ipv4.tp.flags;
else if (likely(flow->key.eth.type == htons(ETH_P_IPV6)))
tcp_flags = flow->key.ipv6.tp.flags;
}
/* Check if already have node-specific stats. */
if (likely(stats)) {
spin_lock(&stats->lock);
@ -128,7 +122,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
spin_unlock(&stats->lock);
}
void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
/* Must be called with rcu_read_lock or ovs_mutex. */
void ovs_flow_stats_get(const struct sw_flow *flow,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
int node;
@ -138,7 +134,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
memset(ovs_stats, 0, sizeof(*ovs_stats));
for_each_node(node) {
struct flow_stats *stats = rcu_dereference(flow->stats[node]);
struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
@ -155,12 +151,13 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
}
}
/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
int node;
for_each_node(node) {
struct flow_stats *stats = rcu_dereference(flow->stats[node]);
struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
if (stats) {
spin_lock_bh(&stats->lock);
@ -357,8 +354,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
key->tp.src = htons(icmp->icmp6_type);
key->tp.dst = htons(icmp->icmp6_code);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@ -520,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
key->tp.src = udp->source;
key->tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->ipv4.tp.src = sctp->source;
key->ipv4.tp.dst = sctp->dest;
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
@ -542,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order. */
key->ipv4.tp.src = htons(icmp->type);
key->ipv4.tp.dst = htons(icmp->code);
key->tp.src = htons(icmp->type);
key->tp.dst = htons(icmp->code);
}
}
@ -589,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp.src = udp->source;
key->ipv6.tp.dst = udp->dest;
key->tp.src = udp->source;
key->tp.dst = udp->dest;
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->ipv6.tp.src = sctp->source;
key->ipv6.tp.dst = sctp->dest;
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {

View File

@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel {
__be16 tun_flags;
u8 ipv4_tos;
u8 ipv4_ttl;
};
} __packed __aligned(4); /* Minimize padding. */
static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
const struct iphdr *iph, __be64 tun_id,
@ -71,7 +71,7 @@ struct sw_flow_key {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} phy;
} __packed phy; /* Safe when right after 'tun_key'. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
@ -84,23 +84,21 @@ struct sw_flow_key {
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
__be16 flags; /* TCP flags. */
} tp;
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
union {
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
__be16 flags; /* TCP flags. */
} tp;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
} ipv4;
struct {
struct {
@ -108,11 +106,6 @@ struct sw_flow_key {
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
__be16 flags; /* TCP flags. */
} tp;
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
@ -187,10 +180,10 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
void ovs_flow_stats_clear(struct sw_flow *flow);
void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);

View File

@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match,
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
if (match->key->ipv6.tp.src ==
if (match->key->tp.src ==
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND;
if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND;
}
}
@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_tcp *tcp_key;
tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
SW_FLOW_KEY_PUT(match, ipv4.tp.src,
tcp_key->tcp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
tcp_key->tcp_dst, is_mask);
} else {
SW_FLOW_KEY_PUT(match, ipv6.tp.src,
tcp_key->tcp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
tcp_key->tcp_dst, is_mask);
}
SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP);
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
} else {
SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
SW_FLOW_KEY_PUT(match, tp.flags,
nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
is_mask);
}
@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_udp *udp_key;
udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
SW_FLOW_KEY_PUT(match, ipv4.tp.src,
udp_key->udp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
udp_key->udp_dst, is_mask);
} else {
SW_FLOW_KEY_PUT(match, ipv6.tp.src,
udp_key->udp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
udp_key->udp_dst, is_mask);
}
SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_UDP);
}
@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_sctp *sctp_key;
sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
SW_FLOW_KEY_PUT(match, ipv4.tp.src,
sctp_key->sctp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
sctp_key->sctp_dst, is_mask);
} else {
SW_FLOW_KEY_PUT(match, ipv6.tp.src,
sctp_key->sctp_src, is_mask);
SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
sctp_key->sctp_dst, is_mask);
}
SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
}
@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_icmp *icmp_key;
icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
SW_FLOW_KEY_PUT(match, ipv4.tp.src,
SW_FLOW_KEY_PUT(match, tp.src,
htons(icmp_key->icmp_type), is_mask);
SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmp_key->icmp_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
}
@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct ovs_key_icmpv6 *icmpv6_key;
icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
SW_FLOW_KEY_PUT(match, ipv6.tp.src,
SW_FLOW_KEY_PUT(match, tp.src,
htons(icmpv6_key->icmpv6_type), is_mask);
SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
SW_FLOW_KEY_PUT(match, tp.dst,
htons(icmpv6_key->icmpv6_code), is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
}
@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
tcp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
tcp_key->tcp_src = output->ipv4.tp.src;
tcp_key->tcp_dst = output->ipv4.tp.dst;
if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
output->ipv4.tp.flags))
goto nla_put_failure;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
tcp_key->tcp_src = output->ipv6.tp.src;
tcp_key->tcp_dst = output->ipv6.tp.dst;
if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
output->ipv6.tp.flags))
goto nla_put_failure;
}
tcp_key->tcp_src = output->tp.src;
tcp_key->tcp_dst = output->tp.dst;
if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
output->tp.flags))
goto nla_put_failure;
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
udp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
udp_key->udp_src = output->ipv4.tp.src;
udp_key->udp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
udp_key->udp_src = output->ipv6.tp.src;
udp_key->udp_dst = output->ipv6.tp.dst;
}
udp_key->udp_src = output->tp.src;
udp_key->udp_dst = output->tp.dst;
} else if (swkey->ip.proto == IPPROTO_SCTP) {
struct ovs_key_sctp *sctp_key;
@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
sctp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
sctp_key->sctp_src = output->ipv4.tp.src;
sctp_key->sctp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
sctp_key->sctp_src = output->ipv6.tp.src;
sctp_key->sctp_dst = output->ipv6.tp.dst;
}
sctp_key->sctp_src = output->tp.src;
sctp_key->sctp_dst = output->tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
struct ovs_key_icmp *icmp_key;
@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
icmp_key->icmp_type = ntohs(output->tp.src);
icmp_key->icmp_code = ntohs(output->tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
icmpv6_key->icmpv6_type = ntohs(output->tp.src);
icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@ -1263,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
static int validate_tp_port(const struct sw_flow_key *flow_key)
{
if (flow_key->eth.type == htons(ETH_P_IP)) {
if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
return 0;
} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
return 0;
}
if ((flow_key->eth.type == htons(ETH_P_IP) ||
flow_key->eth.type == htons(ETH_P_IPV6)) &&
(flow_key->tp.src || flow_key->tp.dst))
return 0;
return -EINVAL;
}

View File

@ -139,7 +139,7 @@ static void flow_free(struct sw_flow *flow)
{
int node;
kfree((struct sf_flow_acts __force *)flow->sf_acts);
kfree((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node)
if (flow->stats[node])
kmem_cache_free(flow_stats_cache,
@ -159,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
if (!flow)
return;
if (flow->mask) {
struct sw_flow_mask *mask = flow->mask;
/* ovs-lock is required to protect mask-refcount and
* mask list.
*/
ASSERT_OVSL();
BUG_ON(!mask->ref_count);
mask->ref_count--;
if (!mask->ref_count) {
list_del_rcu(&mask->list);
if (deferred)
kfree_rcu(mask, rcu);
else
kfree(mask);
}
}
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
@ -491,6 +472,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti)
return table_instance_rehash(ti, ti->n_buckets * 2);
}
/* Remove 'mask' from the mask list, if it is not needed any more. */
static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
{
if (mask) {
/* ovs-lock is required to protect mask-refcount and
* mask list.
*/
ASSERT_OVSL();
BUG_ON(!mask->ref_count);
mask->ref_count--;
if (!mask->ref_count) {
list_del_rcu(&mask->list);
kfree_rcu(mask, rcu);
}
}
}
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
@ -498,6 +498,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[ti->node_ver]);
table->count--;
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held.
*/
flow_mask_remove(table, flow->mask);
}
static struct sw_flow_mask *mask_alloc(void)
@ -560,6 +565,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return 0;
}
/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
struct sw_flow_mask *mask)
{