From 1343299727e99230156ddb0dbac0f84e8e0a1d1e Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:51 +0530 Subject: [PATCH 1/6] cxgb4: Make sure that Freelist size is larger than Egress Congestion Threshold We need to make sure that the Free List Size, in pointers, is at least 2 Egress Queue Units (8 pointers/each) larger than the SGE's Egress Congestion Threshold (in pointers). Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 9 +++++++++ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 6 +++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 0d2eddab04ef..517d5b4ea11b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2473,6 +2473,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, c.iqaddr = cpu_to_be64(iq->phys_addr); if (fl) { + /* Allocate the ring for the hardware free list (with space + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) + fl->size = s->fl_starve_thres - 1 + 2 * 8; fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 482f6de6817d..cc92c6984737 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2243,8 +2243,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, * Allocate the ring for the hardware free list (with space * for its status page) along with the associated software * descriptor ring. The free list size needs to be a multiple - * of the Egress Queue Unit. + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). */ + if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT) + fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT; fl->size = roundup(fl->size, FL_PER_EQ_UNIT); fl->desc = alloc_ring(adapter->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), From 145ef8a54eb3bee917947f9bf7940093a0f2b03d Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:52 +0530 Subject: [PATCH 2/6] cxgb4: Enable congestion notification from SGE for IQs and FLs. Also changed the name of t4_hw.c:get_mps_bg_map() to t4_get_mps_bg_map() and make it an exported routine with a definition in cxgb4.h. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 ++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++---- drivers/net/ethernet/chelsio/cxgb4/sge.c | 20 ++++++++++++++----- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 6 +++--- drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 2 ++ 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 524d11098c56..b6fa9583c3f2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1055,7 +1055,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb); int t4_ofld_send(struct adapter *adap, struct sk_buff *skb); int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd); + struct sge_fl *fl, rspq_handler_t hnd, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, unsigned int iqid); @@ -1215,6 +1215,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); +unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx); void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 803d91beec6f..12cd8e4cc63c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -977,7 +977,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler); + uldrx_handler, 0); if (err) return err; memset(&q->stats, 0, sizeof(q->stats)); @@ -1007,7 +1007,7 @@ static int setup_sge_queues(struct adapter *adap) msi_idx = 1; /* vector 0 is for non-queue interrupts */ else { err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL); + NULL, NULL, -1); if (err) return err; msi_idx = -((int)s->intrq.abs_id + 1); @@ -1027,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap) * new/deleted queues. */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler); + msi_idx, NULL, fwevtq_handler, -1); if (err) { freeout: t4_free_sge_resources(adap); return err; @@ -1044,7 +1044,9 @@ freeout: t4_free_sge_resources(adap); msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx, &q->fl, - t4_ethrx_handler); + t4_ethrx_handler, + t4_get_mps_bg_map(adap, + pi->tx_chan)); if (err) goto freeout; q->rspq.idx = j; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 517d5b4ea11b..6397d6ccad1c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2437,9 +2437,12 @@ static void __iomem *bar2_address(struct adapter *adapter, return adapter->bar2 + bar2_qoffset; } +/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0 + * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map + */ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd) + struct sge_fl *fl, rspq_handler_t hnd, int cong) { int ret, flsz = 0; struct fw_iq_cmd c; @@ -2471,6 +2474,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); + if (cong >= 0) + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); if (fl) { /* Allocate the ring for the hardware free list (with space @@ -2490,10 +2495,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | - FW_IQ_CMD_FL0PADEN_F); + c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + if (cong >= 0) + c.iqns_to_fl0congen |= + htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | + FW_IQ_CMD_FL0CONGCIF_F | + FW_IQ_CMD_FL0CONGEN_F); c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | FW_IQ_CMD_FL0FBMAX_V(3)); c.fl0size = htons(flsz); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8578a742f2a..8ba91c328870 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3401,7 +3401,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) } /** - * get_mps_bg_map - return the buffer groups associated with a port + * t4_get_mps_bg_map - return the buffer groups associated with a port * @adap: the adapter * @idx: the port index * @@ -3409,7 +3409,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) * with the given port. Bit i is set if buffer group i is used by the * port. */ -static unsigned int get_mps_bg_map(struct adapter *adap, int idx) +unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx) { u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A)); @@ -3460,7 +3460,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type) */ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) { - u32 bgmap = get_mps_bg_map(adap, idx); + u32 bgmap = t4_get_mps_bg_map(adap, idx); #define GET_STAT(name) \ t4_read_reg64(adap, \ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 03fbfd1fb3df..e34efb7a858f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1377,6 +1377,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_IQFLINTCONGEN_S 27 #define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S) +#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U) #define FW_IQ_CMD_IQFLINTISCSIC_S 26 #define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S) @@ -1399,6 +1400,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_FL0CONGCIF_S 11 #define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S) +#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U) #define FW_IQ_CMD_FL0ONCHIP_S 10 #define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S) From b8b1ae990efa2e677b162e9e24de77743bfca954 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:53 +0530 Subject: [PATCH 3/6] cxgb4: Pass in a Congestion Channel Map to t4_sge_alloc_rxq() Passes a Congestion Channel Map to t4_sge_alloc_rxq() for the Ethernet RX Queues based on the MPS Buffer Group Map of the TX Channel rather than just the TX Channel Map. Also, in t4_sge_alloc_rxq() for T5, setting up the Congestion Manager values of the new RX Ethernet Queue is done by firmware now. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 35 +++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/t4_values.h | 9 +++++ drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 1 + 3 files changed, 45 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 6397d6ccad1c..a9002b1a8ea9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2551,6 +2551,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } + + /* For T5 and later we attempt to set up the Congestion Manager values + * of the new RX Ethernet Queue. This should really be handled by + * firmware because it's more complex than any host driver wants to + * get involved with and it's different per chip and this is almost + * certainly wrong. Firmware would be wrong as well, but it would be + * a lot easier to fix in one place ... For now we do something very + * simple (and hopefully less wrong). + */ + if (!is_t4(adap->params.chip) && cong >= 0) { + u32 param, val; + int i; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + FW_PARAMS_PARAM_YZ_V(iq->cntxt_id)); + if (cong == 0) { + val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X); + } else { + val = + CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X); + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= + CONMCTXT_CNGCHMAP_V(1 << (i << 2)); + } + } + ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1, + ¶m, &val); + if (ret) + dev_warn(adap->pdev_dev, "Failed to set Congestion" + " Manager Context for Ingress Queue %d: %d\n", + iq->cntxt_id, -ret); + } + return 0; fl_nomem: diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index 19b2dcf6acde..c4d9952f814b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -61,6 +61,15 @@ #define SGE_TIMERREGS 6 #define TIMERREG_COUNTER0_X 0 +/* Congestion Manager Definitions. + */ +#define CONMCTXT_CNGTPMODE_S 19 +#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S) +#define CONMCTXT_CNGCHMAP_S 0 +#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S) +#define CONMCTXT_CNGTPMODE_CHANNEL_X 2 +#define CONMCTXT_CNGTPMODE_QUEUE_X 1 + /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. * The User Doorbells are each 128 bytes in length with a Simple Doorbell at * offsets 8x and a Write Combining single 64-byte Egress Queue Unit diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index e34efb7a858f..d75fca7695eb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; enum fw_params_param_dev_diag { From 982b81eb24697656d5f52845793827091c3c6309 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:54 +0530 Subject: [PATCH 4/6] cxgb4: Add device node to ULD info Adds device node to ULD info. Use the node info to alloc_ring() for ctrl TX queues Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 12cd8e4cc63c..6c781c1b8fb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2434,6 +2434,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.max_ordird_qp = adap->params.max_ordird_qp; lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lli.nodeid = dev_to_node(adap->pdev_dev); handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 78ab4d406ce2..df34293f35e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -264,6 +264,7 @@ struct cxgb4_lld_info { unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */ unsigned int max_ird_adapter; /* Max IRD memory per adapter */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + int nodeid; /* device numa node id */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index a9002b1a8ea9..354480dab239 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2691,7 +2691,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, - NULL, 0, NUMA_NO_NODE); + NULL, 0, dev_to_node(adap->pdev_dev)); if (!txq->q.desc) return -ENOMEM; From a3bfb6179cd1277b259f86b022f3340f3bb49cac Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:55 +0530 Subject: [PATCH 5/6] cxgb4: Move SGE Ingress DMA state monitor code to a new routine Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 +++- drivers/net/ethernet/chelsio/cxgb4/sge.c | 85 ++------------ drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 127 +++++++++++++++++++++ 3 files changed, 156 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index b6fa9583c3f2..1f52d9f66e41 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -328,6 +328,17 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ }; +/* State needed to monitor the forward progress of SGE Ingress DMA activities + * and possible hangs. + */ +struct sge_idma_monitor_state { + unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */ + unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */ + unsigned int idma_state[2]; /* IDMA Hang detect state */ + unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */ + unsigned int idma_warn[2]; /* time to warning in HZ */ +}; + #include "t4fw_api.h" #define FW_VERSION(chip) ( \ @@ -630,12 +641,7 @@ struct sge { u32 fl_align; /* response queue message alignment */ u32 fl_starve_thres; /* Free List starvation threshold */ - /* State variables for detecting an SGE Ingress DMA hang */ - unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */ - unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */ - unsigned int idma_state[2]; /* SGE IDMA Hang detect state */ - unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */ - + struct sge_idma_monitor_state idma_monitor; unsigned int egr_start; unsigned int egr_sz; unsigned int ingr_start; @@ -1311,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma); +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 354480dab239..ad504d0db1ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -100,16 +100,6 @@ */ #define TX_QCHECK_PERIOD (HZ / 2) -/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate - * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA - * State Machines in the same state for this amount of time (in HZ) then we'll - * issue a warning about a potential hang. We'll repeat the warning as the - * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till - * the situation clears. If the situation clears, we'll note that as well. - */ -#define SGE_IDMA_WARN_THRESH (1 * HZ) -#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) - /* * Max number of Tx descriptors to be reclaimed by the Tx timer. */ @@ -2279,7 +2269,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap) static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, idma_same_state_cnt[2]; + unsigned int i; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; @@ -2300,67 +2290,16 @@ static void sge_rx_timer_cb(unsigned long data) set_bit(id, s->starving_fl); } } + /* The remainder of the SGE RX Timer Callback routine is dedicated to + * global Master PF activities like checking for chip ingress stalls, + * etc. + */ + if (!(adap->flags & MASTER_PF)) + goto done; - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13); - idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A); - idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - - for (i = 0; i < 2; i++) { - u32 debug0, debug11; - - /* If the Ingress DMA Same State Counter ("timer") is less - * than 1s, then we can reset our synthesized Stall Timer and - * continue. If we have previously emitted warnings about a - * potential stalled Ingress Queue, issue a note indicating - * that the Ingress Queue has resumed forward progress. - */ - if (idma_same_state_cnt[i] < s->idma_1s_thresh) { - if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) - CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", - i, s->idma_qid[i], - s->idma_stalled[i]/HZ); - s->idma_stalled[i] = 0; - continue; - } - - /* Synthesize an SGE Ingress DMA Same State Timer in the Hz - * domain. The first time we get here it'll be because we - * passed the 1s Threshold; each additional time it'll be - * because the RX Timer Callback is being fired on its regular - * schedule. - * - * If the stall is below our Potential Hung Ingress Queue - * Warning Threshold, continue. - */ - if (s->idma_stalled[i] == 0) - s->idma_stalled[i] = HZ; - else - s->idma_stalled[i] += RX_QCHECK_PERIOD; - - if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) - continue; - - /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ - if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) - continue; - - /* Read and save the SGE IDMA State and Queue ID information. - * We do this every time in case it changes across time ... - */ - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0); - debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; - - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11); - debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; - - CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", - i, s->idma_qid[i], s->idma_state[i], - s->idma_stalled[i]/HZ, debug0, debug11); - t4_sge_decode_idma_state(adap, s->idma_state[i]); - } + t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD); +done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -3121,11 +3060,11 @@ int t4_sge_init(struct adapter *adap) egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl); s->fl_starve_thres = 2*egress_threshold + 1; + t4_idma_monitor_init(adap, &s->idma_monitor); + setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_stalled[0] = 0; - s->idma_stalled[1] = 0; + spin_lock_init(&s->intrq_lock); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 8ba91c328870..6164ef3e1376 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr) t4_write_reg(adap, TP_DBG_LA_CONFIG_A, cfg | adap->params.tp.la_mask); } + +/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in + * seconds). If we find one of the SGE Ingress DMA State Machines in the same + * state for more than the Warning Threshold then we'll issue a warning about + * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel + * appears to be hung every Warning Repeat second till the situation clears. + * If the situation clears, we'll note that as well. + */ +#define SGE_IDMA_WARN_THRESH 1 +#define SGE_IDMA_WARN_REPEAT 300 + +/** + * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * + * Initialize the state of an SGE Ingress DMA Monitor. + */ +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma) +{ + /* Initialize the state variables for detecting an SGE Ingress DMA + * hang. The SGE has internal counters which count up on each clock + * tick whenever the SGE finds its Ingress DMA State Engines in the + * same state they were on the previous clock tick. The clock used is + * the Core Clock so we have a limit on the maximum "time" they can + * record; typically a very small number of seconds. For instance, + * with a 600MHz Core Clock, we can only count up to a bit more than + * 7s. So we'll synthesize a larger counter in order to not run the + * risk of having the "timers" overflow and give us the flexibility to + * maintain a Hung SGE State Machine of our own which operates across + * a longer time frame. + */ + idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */ + idma->idma_stalled[0] = 0; + idma->idma_stalled[1] = 0; +} + +/** + * t4_idma_monitor - monitor SGE Ingress DMA state + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * @hz: number of ticks/second + * @ticks: number of ticks since the last IDMA Monitor call + */ +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks) +{ + int i, idma_same_state_cnt[2]; + + /* Read the SGE Debug Ingress DMA Same State Count registers. These + * are counters inside the SGE which count up on each clock when the + * SGE finds its Ingress DMA State Engines in the same states they + * were in the previous clock. The counters will peg out at + * 0xffffffff without wrapping around so once they pass the 1s + * threshold they'll stay above that till the IDMA state changes. + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13); + idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A); + idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + + for (i = 0; i < 2; i++) { + u32 debug0, debug11; + + /* If the Ingress DMA Same State Counter ("timer") is less + * than 1s, then we can reset our synthesized Stall Timer and + * continue. If we have previously emitted warnings about a + * potential stalled Ingress Queue, issue a note indicating + * that the Ingress Queue has resumed forward progress. + */ + if (idma_same_state_cnt[i] < idma->idma_1s_thresh) { + if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz) + dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, " + "resumed after %d seconds\n", + i, idma->idma_qid[i], + idma->idma_stalled[i] / hz); + idma->idma_stalled[i] = 0; + continue; + } + + /* Synthesize an SGE Ingress DMA Same State Timer in the Hz + * domain. The first time we get here it'll be because we + * passed the 1s Threshold; each additional time it'll be + * because the RX Timer Callback is being fired on its regular + * schedule. + * + * If the stall is below our Potential Hung Ingress Queue + * Warning Threshold, continue. + */ + if (idma->idma_stalled[i] == 0) { + idma->idma_stalled[i] = hz; + idma->idma_warn[i] = 0; + } else { + idma->idma_stalled[i] += ticks; + idma->idma_warn[i] -= ticks; + } + + if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz) + continue; + + /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds. + */ + if (idma->idma_warn[i] > 0) + continue; + idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz; + + /* Read and save the SGE IDMA State and Queue ID information. + * We do this every time in case it changes across time ... + * can't be too careful ... + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0); + debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; + + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11); + debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; + + dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in " + "state %u for %d seconds (debug0=%#x, debug11=%#x)\n", + i, idma->idma_qid[i], idma->idma_state[i], + idma->idma_stalled[i] / hz, + debug0, debug11); + t4_sge_decode_idma_state(adapter, idma->idma_state[i]); + } +} From 637d3e99735102f06c3eee095d2bbebf1863ca24 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 5 May 2015 14:59:56 +0530 Subject: [PATCH 6/6] cxgb4: Discard the packet if the length is greater than mtu pktgen sends raw udp packets and bypasses most of the linux networking stack. User can specify different packet sizes. Hence we need to discard the packet if the length is greater than mtu Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 9 ++++++++- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ad504d0db1ec..898842df38fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1120,7 +1120,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - int len; u32 wr_mid; u64 cntrl, *end; int qidx, credits; @@ -1133,6 +1132,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; bool immediate = false; + int len, max_pkt_len; #ifdef CONFIG_CHELSIO_T4_FCOE int err; #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1146,6 +1146,13 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + pi = netdev_priv(dev); adap = pi->adapter; qidx = skb_get_queue_mapping(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index cc92c6984737..98cd47c373c5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { u32 wr_mid; u64 cntrl, *end; - int qidx, credits; + int qidx, credits, max_pkt_len; unsigned int flits, ndesc; struct adapter *adapter; struct sge_eth_txq *txq; @@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len < fw_hdr_copy_len)) goto out_free; + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + /* * Figure out which TX Queue we're going to use. */