IB/ipoib: Fix transmit queue stalling forever

Commit f56bcd80 ("IPoIB: Use separate CQ for UD send completions")
introduced a bug where the transmit queue could get stopped and never
woken up.  The problem is that send completions are only polled at the
end of the xmit function, so if the send queue fills up and the xmit
path stops the queue, then there is no way for send completions to
ever get polled, and so the transmit queue stays stopped forever.

Fix this by arming the send CQ just before posting the last send
request that fills the send queue.  Then, when the completion event
handler is called, drain the send CQ.  Since it is possible that not
enough send completions are in the CQ, verify that the the net queue
has been woken up after draining the send CQ, and if not arm a timer
and drain again at the timer function.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
Eli Cohen 2008-04-30 20:02:45 -07:00 committed by Roland Dreier
parent 3ae15e1623
commit 57ce41d1d1
3 changed files with 46 additions and 6 deletions

View File

@ -334,6 +334,7 @@ struct ipoib_dev_priv {
#endif
int hca_caps;
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
};
struct ipoib_ah {
@ -404,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue;
int ipoib_poll(struct napi_struct *napi, int budget);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr);

View File

@ -461,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
netif_rx_schedule(dev, &priv->napi);
}
static void drain_tx_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
spin_lock_irqsave(&priv->tx_lock, flags);
while (poll_tx(priv))
; /* nothing */
if (netif_queue_stopped(dev))
mod_timer(&priv->poll_timer, jiffies + 1);
spin_unlock_irqrestore(&priv->tx_lock, flags);
}
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
drain_tx_cq((struct net_device *)dev_ptr);
}
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
@ -555,12 +575,22 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
else
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
ipoib_warn(priv, "request notify on send CQ failed\n");
netif_stop_queue(dev);
}
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, tx_req, phead, hlen))) {
ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors;
--priv->tx_outstanding;
ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
} else {
dev->trans_start = jiffies;
@ -568,14 +598,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
++priv->tx_head;
skb_orphan(skb);
if (++priv->tx_outstanding == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
}
}
if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
poll_tx(priv);
while (poll_tx(priv))
; /* nothing */
}
static void __ipoib_reap_ah(struct net_device *dev)
@ -609,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work)
round_jiffies_relative(HZ));
}
static void ipoib_ib_tx_timer_func(unsigned long ctx)
{
drain_tx_cq((struct net_device *)ctx);
}
int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@ -645,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev)
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
init_timer(&priv->poll_timer);
priv->poll_timer.function = ipoib_ib_tx_timer_func;
priv->poll_timer.data = (unsigned long)dev;
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
@ -810,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
ipoib_dbg(priv, "All sends and receives done.\n");
timeout:
del_timer_sync(&priv->poll_timer);
qp_attr.qp_state = IB_QPS_RESET;
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");

View File

@ -187,7 +187,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_mr;
}
priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
dev, ipoib_sendq_size, 0);
if (IS_ERR(priv->send_cq)) {
printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
goto out_free_recv_cq;