Merge branch 'tso_fix'

Vladislav Yasevich says:

====================
Fix TSO and checksum issues with non-accelerated vlan traffic.

I've recently ran across something rather interesting when testing vlans
from inside VMs.  In some scenarios I was getting awfull thruput.
Some debugging uncovered a very scary packet corruption.  I was
seeing packets that had original TSO length as IP total length
and their ip checksum was 0.  This was with e1000e driver.

A bit more debugging uncovered an assumption made by that driver
that skb->protocol will contain l3 protocol information.  This
was not the case in my setup since I manually turned off vlan
tx acceleration for the device.  This caused the driver to not
initialize the tso information correctly and resulted in
corrupt TSO frames on the wire.

I decided to do some auditing of the usage of skb->protocols
in the drivers.  Out of all the drivers, the included 8 appear
to be effected.  They all allow user to control vlan acceleration
settings, all support TSO on vlan devices, and all use
skb->protocol to decide how to encode TSO information.  Some
also have similar problems when initializing hw checksum information.
On such device, it is simple enough to reproduce the issue.
Simply turn off TX VLAN acceleration on the device, create a vlan,
and run you favorite network performance tool.

There is 1 driver I ran across that I belive will trigger a BUG
in the system when used with vlans.  That driver is tile/tilepro.c
I have not changed it in this patch set and would hope that
the maintainer has time to look at it.

V2: Fix i40ev using the wrong function name.  Full build.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-08-25 17:27:47 -07:00
commit 8dbb200fa4
8 changed files with 34 additions and 31 deletions

View File

@ -2506,7 +2506,7 @@ bnad_tso_prepare(struct bnad *bnad, struct sk_buff *skb)
* For TSO, the TCP checksum field is seeded with pseudo-header sum
* excluding the length field.
*/
if (skb->protocol == htons(ETH_P_IP)) {
if (vlan_get_protocol(skb) == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
/* Do we really need these? */
@ -2870,12 +2870,13 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb,
}
if (skb->ip_summed == CHECKSUM_PARTIAL) {
__be16 net_proto = vlan_get_protocol(skb);
u8 proto = 0;
if (skb->protocol == htons(ETH_P_IP))
if (net_proto == htons(ETH_P_IP))
proto = ip_hdr(skb)->protocol;
#ifdef NETIF_F_IPV6_CSUM
else if (skb->protocol == htons(ETH_P_IPV6)) {
else if (net_proto == htons(ETH_P_IPV6)) {
/* nexthdr may not be TCP immediately. */
proto = ipv6_hdr(skb)->nexthdr;
}

View File

@ -1994,7 +1994,7 @@ static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe)
{
swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC;
if (skb->protocol != htons(ETH_P_IP))
if (vlan_get_protocol(skb) != htons(ETH_P_IP))
return;
if (skb->ip_summed == CHECKSUM_PARTIAL)

View File

@ -2674,7 +2674,8 @@ static void e1000_set_itr(struct e1000_adapter *adapter)
#define E1000_TX_FLAGS_VLAN_SHIFT 16
static int e1000_tso(struct e1000_adapter *adapter,
struct e1000_tx_ring *tx_ring, struct sk_buff *skb)
struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
@ -2692,7 +2693,7 @@ static int e1000_tso(struct e1000_adapter *adapter,
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
mss = skb_shinfo(skb)->gso_size;
if (skb->protocol == htons(ETH_P_IP)) {
if (protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
@ -2702,7 +2703,7 @@ static int e1000_tso(struct e1000_adapter *adapter,
0);
cmd_length = E1000_TXD_CMD_IP;
ipcse = skb_transport_offset(skb) - 1;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (skb_is_gso_v6(skb)) {
ipv6_hdr(skb)->payload_len = 0;
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
@ -2745,7 +2746,8 @@ static int e1000_tso(struct e1000_adapter *adapter,
}
static bool e1000_tx_csum(struct e1000_adapter *adapter,
struct e1000_tx_ring *tx_ring, struct sk_buff *skb)
struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
@ -2756,7 +2758,7 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter,
if (skb->ip_summed != CHECKSUM_PARTIAL)
return false;
switch (skb->protocol) {
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
cmd_len |= E1000_TXD_CMD_TCP;
@ -3097,6 +3099,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
int count = 0;
int tso;
unsigned int f;
__be16 protocol = vlan_get_protocol(skb);
/* This goes back to the question of how to logically map a Tx queue
* to a flow. Right now, performance is impacted slightly negatively
@ -3210,7 +3213,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
first = tx_ring->next_to_use;
tso = e1000_tso(adapter, tx_ring, skb);
tso = e1000_tso(adapter, tx_ring, skb, protocol);
if (tso < 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@ -3220,10 +3223,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
if (likely(hw->mac_type != e1000_82544))
tx_ring->last_tx_tso = true;
tx_flags |= E1000_TX_FLAGS_TSO;
} else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
} else if (likely(e1000_tx_csum(adapter, tx_ring, skb, protocol)))
tx_flags |= E1000_TX_FLAGS_CSUM;
if (likely(skb->protocol == htons(ETH_P_IP)))
if (protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;
if (unlikely(skb->no_fcs))

View File

@ -5164,7 +5164,8 @@ static void e1000_watchdog_task(struct work_struct *work)
#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
#define E1000_TX_FLAGS_VLAN_SHIFT 16
static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
@ -5183,7 +5184,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
mss = skb_shinfo(skb)->gso_size;
if (skb->protocol == htons(ETH_P_IP)) {
if (protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
@ -5231,7 +5232,8 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb)
return 1;
}
static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb)
static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb,
__be16 protocol)
{
struct e1000_adapter *adapter = tx_ring->adapter;
struct e1000_context_desc *context_desc;
@ -5239,16 +5241,10 @@ static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb)
unsigned int i;
u8 css;
u32 cmd_len = E1000_TXD_CMD_DEXT;
__be16 protocol;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return false;
if (skb->protocol == cpu_to_be16(ETH_P_8021Q))
protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
else
protocol = skb->protocol;
switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
@ -5546,6 +5542,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
int count = 0;
int tso;
unsigned int f;
__be16 protocol = vlan_get_protocol(skb);
if (test_bit(__E1000_DOWN, &adapter->state)) {
dev_kfree_skb_any(skb);
@ -5620,7 +5617,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
first = tx_ring->next_to_use;
tso = e1000_tso(tx_ring, skb);
tso = e1000_tso(tx_ring, skb, protocol);
if (tso < 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@ -5628,14 +5625,14 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
if (tso)
tx_flags |= E1000_TX_FLAGS_TSO;
else if (e1000_tx_csum(tx_ring, skb))
else if (e1000_tx_csum(tx_ring, skb, protocol))
tx_flags |= E1000_TX_FLAGS_CSUM;
/* Old method was to assume IPv4 packet by default if TSO was enabled.
* 82571 hardware supports TSO capabilities for IPv6 as well...
* no longer assume, we must.
*/
if (skb->protocol == htons(ETH_P_IP))
if (protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;
if (unlikely(skb->no_fcs))

View File

@ -2295,7 +2295,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
goto out_drop;
/* obtain protocol of skb */
protocol = skb->protocol;
protocol = vlan_get_protocol(skb);
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_bi[tx_ring->next_to_use];

View File

@ -1597,7 +1597,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
goto out_drop;
/* obtain protocol of skb */
protocol = skb->protocol;
protocol = vlan_get_protocol(skb);
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_bi[tx_ring->next_to_use];

View File

@ -1371,15 +1371,16 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_PARTIAL) {
int ip_hdr_len = 0;
__be16 l3_proto = vlan_get_protocol(skb);
u8 l4_proto;
if (skb->protocol == htons(ETH_P_IP)) {
if (l3_proto == htons(ETH_P_IP)) {
struct iphdr *ip4h = ip_hdr(skb);
/* Calculate IPv4 checksum and L4 checksum */
ip_hdr_len = ip4h->ihl;
l4_proto = ip4h->protocol;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (l3_proto == htons(ETH_P_IPV6)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
/* Read l4_protocol from one of IPv6 extra headers */
@ -1390,7 +1391,7 @@ static u32 mvneta_skb_tx_csum(struct mvneta_port *pp, struct sk_buff *skb)
return MVNETA_TX_L4_CSUM_NOT;
return mvneta_txq_desc_csum(skb_network_offset(skb),
skb->protocol, ip_hdr_len, l4_proto);
l3_proto, ip_hdr_len, l4_proto);
}
return MVNETA_TX_L4_CSUM_NOT;

View File

@ -2556,6 +2556,7 @@ static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)
if (skb_is_gso(skb)) {
int err;
__be16 l3_proto = vlan_get_protocol(skb);
err = skb_cow_head(skb, 0);
if (err < 0)
@ -2572,7 +2573,7 @@ static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)
<< OB_MAC_TRANSPORT_HDR_SHIFT);
mac_iocb_ptr->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
mac_iocb_ptr->flags2 |= OB_MAC_TSO_IOCB_LSO;
if (likely(skb->protocol == htons(ETH_P_IP))) {
if (likely(l3_proto == htons(ETH_P_IP))) {
struct iphdr *iph = ip_hdr(skb);
iph->check = 0;
mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP4;
@ -2580,7 +2581,7 @@ static int ql_tso(struct sk_buff *skb, struct ob_mac_tso_iocb_req *mac_iocb_ptr)
iph->daddr, 0,
IPPROTO_TCP,
0);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
} else if (l3_proto == htons(ETH_P_IPV6)) {
mac_iocb_ptr->flags1 |= OB_MAC_TSO_IOCB_IP6;
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,