diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 524d11098c56..1f52d9f66e41 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -328,6 +328,17 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ }; +/* State needed to monitor the forward progress of SGE Ingress DMA activities + * and possible hangs. + */ +struct sge_idma_monitor_state { + unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */ + unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */ + unsigned int idma_state[2]; /* IDMA Hang detect state */ + unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */ + unsigned int idma_warn[2]; /* time to warning in HZ */ +}; + #include "t4fw_api.h" #define FW_VERSION(chip) ( \ @@ -630,12 +641,7 @@ struct sge { u32 fl_align; /* response queue message alignment */ u32 fl_starve_thres; /* Free List starvation threshold */ - /* State variables for detecting an SGE Ingress DMA hang */ - unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */ - unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */ - unsigned int idma_state[2]; /* SGE IDMA Hang detect state */ - unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */ - + struct sge_idma_monitor_state idma_monitor; unsigned int egr_start; unsigned int egr_sz; unsigned int ingr_start; @@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb); int t4_ofld_send(struct adapter *adap, struct sk_buff *skb); int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd); + struct sge_fl *fl, rspq_handler_t hnd, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, unsigned int iqid); @@ -1215,6 +1221,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); +unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx); void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, @@ -1310,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma); +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 803d91beec6f..6c781c1b8fb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -977,7 +977,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler); + uldrx_handler, 0); if (err) return err; memset(&q->stats, 0, sizeof(q->stats)); @@ -1007,7 +1007,7 @@ static int setup_sge_queues(struct adapter *adap) msi_idx = 1; /* vector 0 is for non-queue interrupts */ else { err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL); + NULL, NULL, -1); if (err) return err; msi_idx = -((int)s->intrq.abs_id + 1); @@ -1027,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap) * new/deleted queues. */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler); + msi_idx, NULL, fwevtq_handler, -1); if (err) { freeout: t4_free_sge_resources(adap); return err; @@ -1044,7 +1044,9 @@ freeout: t4_free_sge_resources(adap); msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx, &q->fl, - t4_ethrx_handler); + t4_ethrx_handler, + t4_get_mps_bg_map(adap, + pi->tx_chan)); if (err) goto freeout; q->rspq.idx = j; @@ -2432,6 +2434,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.max_ordird_qp = adap->params.max_ordird_qp; lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lli.nodeid = dev_to_node(adap->pdev_dev); handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 78ab4d406ce2..df34293f35e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -264,6 +264,7 @@ struct cxgb4_lld_info { unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */ unsigned int max_ird_adapter; /* Max IRD memory per adapter */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + int nodeid; /* device numa node id */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 0d2eddab04ef..898842df38fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -100,16 +100,6 @@ */ #define TX_QCHECK_PERIOD (HZ / 2) -/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate - * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA - * State Machines in the same state for this amount of time (in HZ) then we'll - * issue a warning about a potential hang. We'll repeat the warning as the - * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till - * the situation clears. If the situation clears, we'll note that as well. - */ -#define SGE_IDMA_WARN_THRESH (1 * HZ) -#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) - /* * Max number of Tx descriptors to be reclaimed by the Tx timer. */ @@ -1130,7 +1120,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - int len; u32 wr_mid; u64 cntrl, *end; int qidx, credits; @@ -1143,6 +1132,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; bool immediate = false; + int len, max_pkt_len; #ifdef CONFIG_CHELSIO_T4_FCOE int err; #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1156,6 +1146,13 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + pi = netdev_priv(dev); adap = pi->adapter; qidx = skb_get_queue_mapping(skb); @@ -2279,7 +2276,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap) static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, idma_same_state_cnt[2]; + unsigned int i; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; @@ -2300,67 +2297,16 @@ static void sge_rx_timer_cb(unsigned long data) set_bit(id, s->starving_fl); } } + /* The remainder of the SGE RX Timer Callback routine is dedicated to + * global Master PF activities like checking for chip ingress stalls, + * etc. + */ + if (!(adap->flags & MASTER_PF)) + goto done; - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13); - idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A); - idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - - for (i = 0; i < 2; i++) { - u32 debug0, debug11; - - /* If the Ingress DMA Same State Counter ("timer") is less - * than 1s, then we can reset our synthesized Stall Timer and - * continue. If we have previously emitted warnings about a - * potential stalled Ingress Queue, issue a note indicating - * that the Ingress Queue has resumed forward progress. - */ - if (idma_same_state_cnt[i] < s->idma_1s_thresh) { - if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) - CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", - i, s->idma_qid[i], - s->idma_stalled[i]/HZ); - s->idma_stalled[i] = 0; - continue; - } - - /* Synthesize an SGE Ingress DMA Same State Timer in the Hz - * domain. The first time we get here it'll be because we - * passed the 1s Threshold; each additional time it'll be - * because the RX Timer Callback is being fired on its regular - * schedule. - * - * If the stall is below our Potential Hung Ingress Queue - * Warning Threshold, continue. - */ - if (s->idma_stalled[i] == 0) - s->idma_stalled[i] = HZ; - else - s->idma_stalled[i] += RX_QCHECK_PERIOD; - - if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) - continue; - - /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ - if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) - continue; - - /* Read and save the SGE IDMA State and Queue ID information. - * We do this every time in case it changes across time ... - */ - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0); - debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; - - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11); - debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; - - CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", - i, s->idma_qid[i], s->idma_state[i], - s->idma_stalled[i]/HZ, debug0, debug11); - t4_sge_decode_idma_state(adap, s->idma_state[i]); - } + t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD); +done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -2437,9 +2383,12 @@ static void __iomem *bar2_address(struct adapter *adapter, return adapter->bar2 + bar2_qoffset; } +/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0 + * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map + */ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd) + struct sge_fl *fl, rspq_handler_t hnd, int cong) { int ret, flsz = 0; struct fw_iq_cmd c; @@ -2471,8 +2420,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); + if (cong >= 0) + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); if (fl) { + /* Allocate the ring for the hardware free list (with space + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) + fl->size = s->fl_starve_thres - 1 + 2 * 8; fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, @@ -2481,10 +2441,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | - FW_IQ_CMD_FL0PADEN_F); + c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + if (cong >= 0) + c.iqns_to_fl0congen |= + htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | + FW_IQ_CMD_FL0CONGCIF_F | + FW_IQ_CMD_FL0CONGEN_F); c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | FW_IQ_CMD_FL0FBMAX_V(3)); c.fl0size = htons(flsz); @@ -2532,6 +2497,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } + + /* For T5 and later we attempt to set up the Congestion Manager values + * of the new RX Ethernet Queue. This should really be handled by + * firmware because it's more complex than any host driver wants to + * get involved with and it's different per chip and this is almost + * certainly wrong. Firmware would be wrong as well, but it would be + * a lot easier to fix in one place ... For now we do something very + * simple (and hopefully less wrong). + */ + if (!is_t4(adap->params.chip) && cong >= 0) { + u32 param, val; + int i; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + FW_PARAMS_PARAM_YZ_V(iq->cntxt_id)); + if (cong == 0) { + val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X); + } else { + val = + CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X); + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= + CONMCTXT_CNGCHMAP_V(1 << (i << 2)); + } + } + ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1, + ¶m, &val); + if (ret) + dev_warn(adap->pdev_dev, "Failed to set Congestion" + " Manager Context for Ingress Queue %d: %d\n", + iq->cntxt_id, -ret); + } + return 0; fl_nomem: @@ -2637,7 +2637,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, - NULL, 0, NUMA_NO_NODE); + NULL, 0, dev_to_node(adap->pdev_dev)); if (!txq->q.desc) return -ENOMEM; @@ -3067,11 +3067,11 @@ int t4_sge_init(struct adapter *adap) egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl); s->fl_starve_thres = 2*egress_threshold + 1; + t4_idma_monitor_init(adap, &s->idma_monitor); + setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_stalled[0] = 0; - s->idma_stalled[1] = 0; + spin_lock_init(&s->intrq_lock); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8578a742f2a..6164ef3e1376 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3401,7 +3401,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) } /** - * get_mps_bg_map - return the buffer groups associated with a port + * t4_get_mps_bg_map - return the buffer groups associated with a port * @adap: the adapter * @idx: the port index * @@ -3409,7 +3409,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) * with the given port. Bit i is set if buffer group i is used by the * port. */ -static unsigned int get_mps_bg_map(struct adapter *adap, int idx) +unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx) { u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A)); @@ -3460,7 +3460,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type) */ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) { - u32 bgmap = get_mps_bg_map(adap, idx); + u32 bgmap = t4_get_mps_bg_map(adap, idx); #define GET_STAT(name) \ t4_read_reg64(adap, \ @@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr) t4_write_reg(adap, TP_DBG_LA_CONFIG_A, cfg | adap->params.tp.la_mask); } + +/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in + * seconds). If we find one of the SGE Ingress DMA State Machines in the same + * state for more than the Warning Threshold then we'll issue a warning about + * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel + * appears to be hung every Warning Repeat second till the situation clears. + * If the situation clears, we'll note that as well. + */ +#define SGE_IDMA_WARN_THRESH 1 +#define SGE_IDMA_WARN_REPEAT 300 + +/** + * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * + * Initialize the state of an SGE Ingress DMA Monitor. + */ +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma) +{ + /* Initialize the state variables for detecting an SGE Ingress DMA + * hang. The SGE has internal counters which count up on each clock + * tick whenever the SGE finds its Ingress DMA State Engines in the + * same state they were on the previous clock tick. The clock used is + * the Core Clock so we have a limit on the maximum "time" they can + * record; typically a very small number of seconds. For instance, + * with a 600MHz Core Clock, we can only count up to a bit more than + * 7s. So we'll synthesize a larger counter in order to not run the + * risk of having the "timers" overflow and give us the flexibility to + * maintain a Hung SGE State Machine of our own which operates across + * a longer time frame. + */ + idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */ + idma->idma_stalled[0] = 0; + idma->idma_stalled[1] = 0; +} + +/** + * t4_idma_monitor - monitor SGE Ingress DMA state + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * @hz: number of ticks/second + * @ticks: number of ticks since the last IDMA Monitor call + */ +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks) +{ + int i, idma_same_state_cnt[2]; + + /* Read the SGE Debug Ingress DMA Same State Count registers. These + * are counters inside the SGE which count up on each clock when the + * SGE finds its Ingress DMA State Engines in the same states they + * were in the previous clock. The counters will peg out at + * 0xffffffff without wrapping around so once they pass the 1s + * threshold they'll stay above that till the IDMA state changes. + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13); + idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A); + idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + + for (i = 0; i < 2; i++) { + u32 debug0, debug11; + + /* If the Ingress DMA Same State Counter ("timer") is less + * than 1s, then we can reset our synthesized Stall Timer and + * continue. If we have previously emitted warnings about a + * potential stalled Ingress Queue, issue a note indicating + * that the Ingress Queue has resumed forward progress. + */ + if (idma_same_state_cnt[i] < idma->idma_1s_thresh) { + if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz) + dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, " + "resumed after %d seconds\n", + i, idma->idma_qid[i], + idma->idma_stalled[i] / hz); + idma->idma_stalled[i] = 0; + continue; + } + + /* Synthesize an SGE Ingress DMA Same State Timer in the Hz + * domain. The first time we get here it'll be because we + * passed the 1s Threshold; each additional time it'll be + * because the RX Timer Callback is being fired on its regular + * schedule. + * + * If the stall is below our Potential Hung Ingress Queue + * Warning Threshold, continue. + */ + if (idma->idma_stalled[i] == 0) { + idma->idma_stalled[i] = hz; + idma->idma_warn[i] = 0; + } else { + idma->idma_stalled[i] += ticks; + idma->idma_warn[i] -= ticks; + } + + if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz) + continue; + + /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds. + */ + if (idma->idma_warn[i] > 0) + continue; + idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz; + + /* Read and save the SGE IDMA State and Queue ID information. + * We do this every time in case it changes across time ... + * can't be too careful ... + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0); + debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; + + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11); + debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; + + dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in " + "state %u for %d seconds (debug0=%#x, debug11=%#x)\n", + i, idma->idma_qid[i], idma->idma_state[i], + idma->idma_stalled[i] / hz, + debug0, debug11); + t4_sge_decode_idma_state(adapter, idma->idma_state[i]); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index 19b2dcf6acde..c4d9952f814b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -61,6 +61,15 @@ #define SGE_TIMERREGS 6 #define TIMERREG_COUNTER0_X 0 +/* Congestion Manager Definitions. + */ +#define CONMCTXT_CNGTPMODE_S 19 +#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S) +#define CONMCTXT_CNGCHMAP_S 0 +#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S) +#define CONMCTXT_CNGTPMODE_CHANNEL_X 2 +#define CONMCTXT_CNGTPMODE_QUEUE_X 1 + /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. * The User Doorbells are each 128 bytes in length with a Simple Doorbell at * offsets 8x and a Write Combining single 64-byte Egress Queue Unit diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 03fbfd1fb3df..d75fca7695eb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; enum fw_params_param_dev_diag { @@ -1377,6 +1378,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_IQFLINTCONGEN_S 27 #define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S) +#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U) #define FW_IQ_CMD_IQFLINTISCSIC_S 26 #define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S) @@ -1399,6 +1401,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_FL0CONGCIF_S 11 #define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S) +#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U) #define FW_IQ_CMD_FL0ONCHIP_S 10 #define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 482f6de6817d..98cd47c373c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { u32 wr_mid; u64 cntrl, *end; - int qidx, credits; + int qidx, credits, max_pkt_len; unsigned int flits, ndesc; struct adapter *adapter; struct sge_eth_txq *txq; @@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len < fw_hdr_copy_len)) goto out_free; + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + /* * Figure out which TX Queue we're going to use. */ @@ -2243,8 +2250,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, * Allocate the ring for the hardware free list (with space * for its status page) along with the associated software * descriptor ring. The free list size needs to be a multiple - * of the Egress Queue Unit. + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). */ + if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT) + fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT; fl->size = roundup(fl->size, FL_PER_EQ_UNIT); fl->desc = alloc_ring(adapter->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc),