1 /* bnx2x_cmn.c: Broadcom Everest network driver.
3 * Copyright (c) 2007-2011 Broadcom Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
9 * Maintained by: Eilon Greenstein <eilong@broadcom.com>
10 * Written by: Eliezer Tamir
11 * Based on code from Michael Chan's bnx2 driver
12 * UDP CSUM errata workaround by Arik Gendelman
13 * Slowpath and fastpath rework by Vladislav Zolotarov
14 * Statistics and Link management by Yitchak Gertner
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/etherdevice.h>
21 #include <linux/if_vlan.h>
22 #include <linux/interrupt.h>
25 #include <net/ip6_checksum.h>
26 #include <linux/firmware.h>
27 #include <linux/prefetch.h>
28 #include "bnx2x_cmn.h"
29 #include "bnx2x_init.h"
35 * bnx2x_bz_fp - zero content of the fastpath structure.
38 * @index: fastpath index to be zeroed
40 * Makes sure the contents of the bp->fp[index].napi is kept
43 static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
45 struct bnx2x_fastpath *fp = &bp->fp[index];
46 struct napi_struct orig_napi = fp->napi;
47 /* bzero bnx2x_fastpath contents */
48 memset(fp, 0, sizeof(*fp));
50 /* Restore the NAPI object as it has been already initialized */
56 fp->max_cos = bp->max_cos;
58 /* Special queues support only one CoS */
62 * set the tpa flag for each queue. The tpa flag determines the queue
63 * minimal size so it must be set prior to queue memory allocation
65 fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
68 /* We don't want TPA on an FCoE L2 ring */
75 * bnx2x_move_fp - move content of the fastpath structure.
78 * @from: source FP index
79 * @to: destination FP index
81 * Makes sure the contents of the bp->fp[to].napi is kept
82 * intact. This is done by first copying the napi struct from
83 * the target to the source, and then mem copying the entire
84 * source onto the target
86 static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to)
88 struct bnx2x_fastpath *from_fp = &bp->fp[from];
89 struct bnx2x_fastpath *to_fp = &bp->fp[to];
91 /* Copy the NAPI object as it has been already initialized */
92 from_fp->napi = to_fp->napi;
94 /* Move bnx2x_fastpath contents */
95 memcpy(to_fp, from_fp, sizeof(*to_fp));
99 int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
101 /* free skb in the packet ring at pos idx
102 * return idx of last bd freed
104 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
107 struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
108 struct eth_tx_start_bd *tx_start_bd;
109 struct eth_tx_bd *tx_data_bd;
110 struct sk_buff *skb = tx_buf->skb;
111 u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
114 /* prefetch skb end pointer to speedup dev_kfree_skb() */
117 DP(BNX2X_MSG_FP, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n",
118 txdata->txq_index, idx, tx_buf, skb);
121 DP(BNX2X_MSG_OFF, "free bd_idx %d\n", bd_idx);
122 tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd;
123 dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd),
124 BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE);
127 nbd = le16_to_cpu(tx_start_bd->nbd) - 1;
128 #ifdef BNX2X_STOP_ON_ERROR
129 if ((nbd - 1) > (MAX_SKB_FRAGS + 2)) {
130 BNX2X_ERR("BAD nbd!\n");
134 new_cons = nbd + tx_buf->first_bd;
136 /* Get the next bd */
137 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
139 /* Skip a parse bd... */
141 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
143 /* ...and the TSO split header bd since they have no mapping */
144 if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) {
146 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
152 DP(BNX2X_MSG_OFF, "free frag bd_idx %d\n", bd_idx);
153 tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd;
154 dma_unmap_page(&bp->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
155 BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
157 bd_idx = TX_BD(NEXT_TX_IDX(bd_idx));
162 dev_kfree_skb_any(skb);
163 tx_buf->first_bd = 0;
169 int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
171 struct netdev_queue *txq;
172 u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
174 #ifdef BNX2X_STOP_ON_ERROR
175 if (unlikely(bp->panic))
179 txq = netdev_get_tx_queue(bp->dev, txdata->txq_index);
180 hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
181 sw_cons = txdata->tx_pkt_cons;
183 while (sw_cons != hw_cons) {
186 pkt_cons = TX_BD(sw_cons);
188 DP(NETIF_MSG_TX_DONE, "queue[%d]: hw_cons %u sw_cons %u "
190 txdata->txq_index, hw_cons, sw_cons, pkt_cons);
192 bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
196 txdata->tx_pkt_cons = sw_cons;
197 txdata->tx_bd_cons = bd_cons;
199 /* Need to make the tx_bd_cons update visible to start_xmit()
200 * before checking for netif_tx_queue_stopped(). Without the
201 * memory barrier, there is a small possibility that
202 * start_xmit() will miss it and cause the queue to be stopped
204 * On the other hand we need an rmb() here to ensure the proper
205 * ordering of bit testing in the following
206 * netif_tx_queue_stopped(txq) call.
210 if (unlikely(netif_tx_queue_stopped(txq))) {
211 /* Taking tx_lock() is needed to prevent reenabling the queue
212 * while it's empty. This could have happen if rx_action() gets
213 * suspended in bnx2x_tx_int() after the condition before
214 * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()):
216 * stops the queue->sees fresh tx_bd_cons->releases the queue->
217 * sends some packets consuming the whole queue again->
221 __netif_tx_lock(txq, smp_processor_id());
223 if ((netif_tx_queue_stopped(txq)) &&
224 (bp->state == BNX2X_STATE_OPEN) &&
225 (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3))
226 netif_tx_wake_queue(txq);
228 __netif_tx_unlock(txq);
233 static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
236 u16 last_max = fp->last_max_sge;
238 if (SUB_S16(idx, last_max) > 0)
239 fp->last_max_sge = idx;
242 static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
243 struct eth_fast_path_rx_cqe *fp_cqe)
245 struct bnx2x *bp = fp->bp;
246 u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
247 le16_to_cpu(fp_cqe->len_on_bd)) >>
249 u16 last_max, last_elem, first_elem;
256 /* First mark all used pages */
257 for (i = 0; i < sge_len; i++)
258 BIT_VEC64_CLEAR_BIT(fp->sge_mask,
259 RX_SGE(le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[i])));
261 DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
262 sge_len - 1, le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
264 /* Here we assume that the last SGE index is the biggest */
265 prefetch((void *)(fp->sge_mask));
266 bnx2x_update_last_max_sge(fp,
267 le16_to_cpu(fp_cqe->sgl_or_raw_data.sgl[sge_len - 1]));
269 last_max = RX_SGE(fp->last_max_sge);
270 last_elem = last_max >> BIT_VEC64_ELEM_SHIFT;
271 first_elem = RX_SGE(fp->rx_sge_prod) >> BIT_VEC64_ELEM_SHIFT;
273 /* If ring is not full */
274 if (last_elem + 1 != first_elem)
277 /* Now update the prod */
278 for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
279 if (likely(fp->sge_mask[i]))
282 fp->sge_mask[i] = BIT_VEC64_ELEM_ONE_MASK;
283 delta += BIT_VEC64_ELEM_SZ;
287 fp->rx_sge_prod += delta;
288 /* clear page-end entries */
289 bnx2x_clear_sge_mask_next_elems(fp);
292 DP(NETIF_MSG_RX_STATUS,
293 "fp->last_max_sge = %d fp->rx_sge_prod = %d\n",
294 fp->last_max_sge, fp->rx_sge_prod);
297 static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
298 struct sk_buff *skb, u16 cons, u16 prod,
299 struct eth_fast_path_rx_cqe *cqe)
301 struct bnx2x *bp = fp->bp;
302 struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
303 struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
304 struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
306 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
307 struct sw_rx_bd *first_buf = &tpa_info->first_buf;
309 /* print error if current state != stop */
310 if (tpa_info->tpa_state != BNX2X_TPA_STOP)
311 BNX2X_ERR("start of bin not in stop [%d]\n", queue);
313 /* Try to map an empty skb from the aggregation info */
314 mapping = dma_map_single(&bp->pdev->dev,
315 first_buf->skb->data,
316 fp->rx_buf_size, DMA_FROM_DEVICE);
318 * ...if it fails - move the skb from the consumer to the producer
319 * and set the current aggregation state as ERROR to drop it
320 * when TPA_STOP arrives.
323 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
324 /* Move the BD from the consumer to the producer */
325 bnx2x_reuse_rx_skb(fp, cons, prod);
326 tpa_info->tpa_state = BNX2X_TPA_ERROR;
330 /* move empty skb from pool to prod */
331 prod_rx_buf->skb = first_buf->skb;
332 dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
333 /* point prod_bd to new skb */
334 prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
335 prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
337 /* move partial skb from cons to pool (don't unmap yet) */
338 *first_buf = *cons_rx_buf;
340 /* mark bin state as START */
341 tpa_info->parsing_flags =
342 le16_to_cpu(cqe->pars_flags.flags);
343 tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
344 tpa_info->tpa_state = BNX2X_TPA_START;
345 tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
346 tpa_info->placement_offset = cqe->placement_offset;
348 #ifdef BNX2X_STOP_ON_ERROR
349 fp->tpa_queue_used |= (1 << queue);
350 #ifdef _ASM_GENERIC_INT_L64_H
351 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
353 DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
359 /* Timestamp option length allowed for TPA aggregation:
361 * nop nop kind length echo val
363 #define TPA_TSTAMP_OPT_LEN 12
365 * bnx2x_set_lro_mss - calculate the approximate value of the MSS
368 * @parsing_flags: parsing flags from the START CQE
369 * @len_on_bd: total length of the first packet for the
372 * Approximate value of the MSS for this aggregation calculated using
373 * the first packet of it.
375 static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
379 * TPA arrgregation won't have either IP options or TCP options
380 * other than timestamp or IPv6 extension headers.
382 u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
384 if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
385 PRS_FLAG_OVERETH_IPV6)
386 hdrs_len += sizeof(struct ipv6hdr);
388 hdrs_len += sizeof(struct iphdr);
391 /* Check if there was a TCP timestamp, if there is it's will
392 * always be 12 bytes length: nop nop kind length echo val.
394 * Otherwise FW would close the aggregation.
396 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
397 hdrs_len += TPA_TSTAMP_OPT_LEN;
399 return len_on_bd - hdrs_len;
402 static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
403 u16 queue, struct sk_buff *skb,
404 struct eth_end_agg_rx_cqe *cqe,
407 struct sw_rx_page *rx_pg, old_rx_pg;
408 u32 i, frag_len, frag_size, pages;
411 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
412 u16 len_on_bd = tpa_info->len_on_bd;
414 frag_size = le16_to_cpu(cqe->pkt_len) - len_on_bd;
415 pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT;
417 /* This is needed in order to enable forwarding support */
419 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
420 tpa_info->parsing_flags, len_on_bd);
422 #ifdef BNX2X_STOP_ON_ERROR
423 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
424 BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
426 BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
432 /* Run through the SGL and compose the fragmented skb */
433 for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
434 u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
436 /* FW gives the indices of the SGE as if the ring is an array
437 (meaning that "next" element will consume 2 indices) */
438 frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE));
439 rx_pg = &fp->rx_page_ring[sge_idx];
442 /* If we fail to allocate a substitute page, we simply stop
443 where we are and drop the whole packet */
444 err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
446 fp->eth_q_stats.rx_skb_alloc_failed++;
450 /* Unmap the page as we r going to pass it to the stack */
451 dma_unmap_page(&bp->pdev->dev,
452 dma_unmap_addr(&old_rx_pg, mapping),
453 SGE_PAGE_SIZE*PAGES_PER_SGE, DMA_FROM_DEVICE);
455 /* Add one frag and update the appropriate fields in the skb */
456 skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
458 skb->data_len += frag_len;
459 skb->truesize += SGE_PAGE_SIZE * PAGES_PER_SGE;
460 skb->len += frag_len;
462 frag_size -= frag_len;
468 static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
469 u16 queue, struct eth_end_agg_rx_cqe *cqe,
472 struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
473 struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
474 u8 pad = tpa_info->placement_offset;
475 u16 len = tpa_info->len_on_bd;
476 struct sk_buff *skb = rx_buf->skb;
478 struct sk_buff *new_skb;
479 u8 old_tpa_state = tpa_info->tpa_state;
481 tpa_info->tpa_state = BNX2X_TPA_STOP;
483 /* If we there was an error during the handling of the TPA_START -
484 * drop this aggregation.
486 if (old_tpa_state == BNX2X_TPA_ERROR)
489 /* Try to allocate the new skb */
490 new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
492 /* Unmap skb in the pool anyway, as we are going to change
493 pool entry status to BNX2X_TPA_STOP even if new skb allocation
495 dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
496 fp->rx_buf_size, DMA_FROM_DEVICE);
498 if (likely(new_skb)) {
500 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
502 #ifdef BNX2X_STOP_ON_ERROR
503 if (pad + len > fp->rx_buf_size) {
504 BNX2X_ERR("skb_put is about to fail... "
505 "pad %d len %d rx_buf_size %d\n",
506 pad, len, fp->rx_buf_size);
512 skb_reserve(skb, pad);
515 skb->protocol = eth_type_trans(skb, bp->dev);
516 skb->ip_summed = CHECKSUM_UNNECESSARY;
518 if (!bnx2x_fill_frag_skb(bp, fp, queue, skb, cqe, cqe_idx)) {
519 if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
520 __vlan_hwaccel_put_tag(skb, tpa_info->vlan_tag);
521 napi_gro_receive(&fp->napi, skb);
523 DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
524 " - dropping packet!\n");
525 dev_kfree_skb_any(skb);
529 /* put new skb in bin */
530 rx_buf->skb = new_skb;
536 /* drop the packet and keep the buffer in the bin */
537 DP(NETIF_MSG_RX_STATUS,
538 "Failed to allocate or map a new skb - dropping packet!\n");
539 fp->eth_q_stats.rx_skb_alloc_failed++;
542 /* Set Toeplitz hash value in the skb using the value from the
543 * CQE (calculated by HW).
545 static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
548 /* Set Toeplitz hash from CQE */
549 if ((bp->dev->features & NETIF_F_RXHASH) &&
550 (cqe->fast_path_cqe.status_flags &
551 ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
553 le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
556 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
558 struct bnx2x *bp = fp->bp;
559 u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
560 u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
563 #ifdef BNX2X_STOP_ON_ERROR
564 if (unlikely(bp->panic))
568 /* CQ "next element" is of the size of the regular element,
569 that's why it's ok here */
570 hw_comp_cons = le16_to_cpu(*fp->rx_cons_sb);
571 if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT)
574 bd_cons = fp->rx_bd_cons;
575 bd_prod = fp->rx_bd_prod;
576 bd_prod_fw = bd_prod;
577 sw_comp_cons = fp->rx_comp_cons;
578 sw_comp_prod = fp->rx_comp_prod;
580 /* Memory barrier necessary as speculative reads of the rx
581 * buffer can be ahead of the index in the status block
585 DP(NETIF_MSG_RX_STATUS,
586 "queue[%d]: hw_comp_cons %u sw_comp_cons %u\n",
587 fp->index, hw_comp_cons, sw_comp_cons);
589 while (sw_comp_cons != hw_comp_cons) {
590 struct sw_rx_bd *rx_buf = NULL;
592 union eth_rx_cqe *cqe;
593 struct eth_fast_path_rx_cqe *cqe_fp;
595 enum eth_rx_cqe_type cqe_fp_type;
598 #ifdef BNX2X_STOP_ON_ERROR
599 if (unlikely(bp->panic))
603 comp_ring_cons = RCQ_BD(sw_comp_cons);
604 bd_prod = RX_BD(bd_prod);
605 bd_cons = RX_BD(bd_cons);
607 /* Prefetch the page containing the BD descriptor
608 at producer's index. It will be needed when new skb is
610 prefetch((void *)(PAGE_ALIGN((unsigned long)
611 (&fp->rx_desc_ring[bd_prod])) -
614 cqe = &fp->rx_comp_ring[comp_ring_cons];
615 cqe_fp = &cqe->fast_path_cqe;
616 cqe_fp_flags = cqe_fp->type_error_flags;
617 cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE;
619 DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x"
620 " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags),
621 cqe_fp_flags, cqe_fp->status_flags,
622 le32_to_cpu(cqe_fp->rss_hash_result),
623 le16_to_cpu(cqe_fp->vlan_tag), le16_to_cpu(cqe_fp->pkt_len));
625 /* is this a slowpath msg? */
626 if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
627 bnx2x_sp_event(fp, cqe);
630 /* this is an rx packet */
632 rx_buf = &fp->rx_buf_ring[bd_cons];
636 if (!CQE_TYPE_FAST(cqe_fp_type)) {
637 #ifdef BNX2X_STOP_ON_ERROR
639 if (fp->disable_tpa &&
640 (CQE_TYPE_START(cqe_fp_type) ||
641 CQE_TYPE_STOP(cqe_fp_type)))
642 BNX2X_ERR("START/STOP packet while "
643 "disable_tpa type %x\n",
644 CQE_TYPE(cqe_fp_type));
647 if (CQE_TYPE_START(cqe_fp_type)) {
648 u16 queue = cqe_fp->queue_index;
649 DP(NETIF_MSG_RX_STATUS,
650 "calling tpa_start on queue %d\n",
653 bnx2x_tpa_start(fp, queue, skb,
657 /* Set Toeplitz hash for LRO skb */
658 bnx2x_set_skb_rxhash(bp, cqe, skb);
664 cqe->end_agg_cqe.queue_index;
665 DP(NETIF_MSG_RX_STATUS,
666 "calling tpa_stop on queue %d\n",
669 bnx2x_tpa_stop(bp, fp, queue,
672 #ifdef BNX2X_STOP_ON_ERROR
677 bnx2x_update_sge_prod(fp, cqe_fp);
682 len = le16_to_cpu(cqe_fp->pkt_len);
683 pad = cqe_fp->placement_offset;
684 dma_sync_single_for_cpu(&bp->pdev->dev,
685 dma_unmap_addr(rx_buf, mapping),
686 pad + RX_COPY_THRESH,
688 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
690 /* is this an error packet? */
691 if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
693 "ERROR flags %x rx packet %u\n",
694 cqe_fp_flags, sw_comp_cons);
695 fp->eth_q_stats.rx_err_discard_pkt++;
699 /* Since we don't have a jumbo ring
700 * copy small packets if mtu > 1500
702 if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
703 (len <= RX_COPY_THRESH)) {
704 struct sk_buff *new_skb;
706 new_skb = netdev_alloc_skb(bp->dev, len + pad);
707 if (new_skb == NULL) {
709 "ERROR packet dropped "
710 "because of alloc failure\n");
711 fp->eth_q_stats.rx_skb_alloc_failed++;
716 skb_copy_from_linear_data_offset(skb, pad,
717 new_skb->data + pad, len);
718 skb_reserve(new_skb, pad);
719 skb_put(new_skb, len);
721 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
726 if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
727 dma_unmap_single(&bp->pdev->dev,
728 dma_unmap_addr(rx_buf, mapping),
731 skb_reserve(skb, pad);
736 "ERROR packet dropped because "
737 "of alloc failure\n");
738 fp->eth_q_stats.rx_skb_alloc_failed++;
740 bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
744 skb->protocol = eth_type_trans(skb, bp->dev);
746 /* Set Toeplitz hash for a none-LRO skb */
747 bnx2x_set_skb_rxhash(bp, cqe, skb);
749 skb_checksum_none_assert(skb);
751 if (bp->dev->features & NETIF_F_RXCSUM) {
753 if (likely(BNX2X_RX_CSUM_OK(cqe)))
754 skb->ip_summed = CHECKSUM_UNNECESSARY;
756 fp->eth_q_stats.hw_csum_err++;
760 skb_record_rx_queue(skb, fp->rx_queue);
762 if (le16_to_cpu(cqe_fp->pars_flags.flags) &
764 __vlan_hwaccel_put_tag(skb,
765 le16_to_cpu(cqe_fp->vlan_tag));
766 napi_gro_receive(&fp->napi, skb);
772 bd_cons = NEXT_RX_IDX(bd_cons);
773 bd_prod = NEXT_RX_IDX(bd_prod);
774 bd_prod_fw = NEXT_RX_IDX(bd_prod_fw);
777 sw_comp_prod = NEXT_RCQ_IDX(sw_comp_prod);
778 sw_comp_cons = NEXT_RCQ_IDX(sw_comp_cons);
780 if (rx_pkt == budget)
784 fp->rx_bd_cons = bd_cons;
785 fp->rx_bd_prod = bd_prod_fw;
786 fp->rx_comp_cons = sw_comp_cons;
787 fp->rx_comp_prod = sw_comp_prod;
789 /* Update producers */
790 bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
793 fp->rx_pkt += rx_pkt;
799 static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
801 struct bnx2x_fastpath *fp = fp_cookie;
802 struct bnx2x *bp = fp->bp;
805 DP(BNX2X_MSG_FP, "got an MSI-X interrupt on IDX:SB "
806 "[fp %d fw_sd %d igusb %d]\n",
807 fp->index, fp->fw_sb_id, fp->igu_sb_id);
808 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_DISABLE, 0);
810 #ifdef BNX2X_STOP_ON_ERROR
811 if (unlikely(bp->panic))
815 /* Handle Rx and Tx according to MSI-X vector */
816 prefetch(fp->rx_cons_sb);
818 for_each_cos_in_tx_queue(fp, cos)
819 prefetch(fp->txdata[cos].tx_cons_sb);
821 prefetch(&fp->sb_running_index[SM_RX_ID]);
822 napi_schedule(&bnx2x_fp(bp, fp->index, napi));
827 /* HW Lock for shared dual port PHYs */
828 void bnx2x_acquire_phy_lock(struct bnx2x *bp)
830 mutex_lock(&bp->port.phy_mutex);
832 if (bp->port.need_hw_lock)
833 bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
836 void bnx2x_release_phy_lock(struct bnx2x *bp)
838 if (bp->port.need_hw_lock)
839 bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_MDIO);
841 mutex_unlock(&bp->port.phy_mutex);
844 /* calculates MF speed according to current linespeed and MF configuration */
845 u16 bnx2x_get_mf_speed(struct bnx2x *bp)
847 u16 line_speed = bp->link_vars.line_speed;
849 u16 maxCfg = bnx2x_extract_max_cfg(bp,
850 bp->mf_config[BP_VN(bp)]);
852 /* Calculate the current MAX line speed limit for the MF
856 line_speed = (line_speed * maxCfg) / 100;
858 u16 vn_max_rate = maxCfg * 100;
860 if (vn_max_rate < line_speed)
861 line_speed = vn_max_rate;
869 * bnx2x_fill_report_data - fill link report data to report
872 * @data: link state to update
874 * It uses a none-atomic bit operations because is called under the mutex.
876 static inline void bnx2x_fill_report_data(struct bnx2x *bp,
877 struct bnx2x_link_report_data *data)
879 u16 line_speed = bnx2x_get_mf_speed(bp);
881 memset(data, 0, sizeof(*data));
883 /* Fill the report data: efective line speed */
884 data->line_speed = line_speed;
887 if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
888 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
889 &data->link_report_flags);
892 if (bp->link_vars.duplex == DUPLEX_FULL)
893 __set_bit(BNX2X_LINK_REPORT_FD, &data->link_report_flags);
895 /* Rx Flow Control is ON */
896 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
897 __set_bit(BNX2X_LINK_REPORT_RX_FC_ON, &data->link_report_flags);
899 /* Tx Flow Control is ON */
900 if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
901 __set_bit(BNX2X_LINK_REPORT_TX_FC_ON, &data->link_report_flags);
905 * bnx2x_link_report - report link status to OS.
909 * Calls the __bnx2x_link_report() under the same locking scheme
910 * as a link/PHY state managing code to ensure a consistent link
914 void bnx2x_link_report(struct bnx2x *bp)
916 bnx2x_acquire_phy_lock(bp);
917 __bnx2x_link_report(bp);
918 bnx2x_release_phy_lock(bp);
922 * __bnx2x_link_report - report link status to OS.
926 * None atomic inmlementation.
927 * Should be called under the phy_lock.
929 void __bnx2x_link_report(struct bnx2x *bp)
931 struct bnx2x_link_report_data cur_data;
935 bnx2x_read_mf_cfg(bp);
937 /* Read the current link report info */
938 bnx2x_fill_report_data(bp, &cur_data);
940 /* Don't report link down or exactly the same link status twice */
941 if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
942 (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
943 &bp->last_reported_link.link_report_flags) &&
944 test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
945 &cur_data.link_report_flags)))
950 /* We are going to report a new link parameters now -
951 * remember the current data for the next time.
953 memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
955 if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
956 &cur_data.link_report_flags)) {
957 netif_carrier_off(bp->dev);
958 netdev_err(bp->dev, "NIC Link is Down\n");
964 netif_carrier_on(bp->dev);
966 if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
967 &cur_data.link_report_flags))
972 /* Handle the FC at the end so that only these flags would be
973 * possibly set. This way we may easily check if there is no FC
976 if (cur_data.link_report_flags) {
977 if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
978 &cur_data.link_report_flags)) {
979 if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
980 &cur_data.link_report_flags))
981 flow = "ON - receive & transmit";
983 flow = "ON - receive";
985 flow = "ON - transmit";
990 netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
991 cur_data.line_speed, duplex, flow);
995 void bnx2x_init_rx_rings(struct bnx2x *bp)
997 int func = BP_FUNC(bp);
1001 /* Allocate TPA resources */
1002 for_each_rx_queue(bp, j) {
1003 struct bnx2x_fastpath *fp = &bp->fp[j];
1006 "mtu %d rx_buf_size %d\n", bp->dev->mtu, fp->rx_buf_size);
1008 if (!fp->disable_tpa) {
1009 /* Fill the per-aggregtion pool */
1010 for (i = 0; i < MAX_AGG_QS(bp); i++) {
1011 struct bnx2x_agg_info *tpa_info =
1013 struct sw_rx_bd *first_buf =
1014 &tpa_info->first_buf;
1016 first_buf->skb = netdev_alloc_skb(bp->dev,
1018 if (!first_buf->skb) {
1019 BNX2X_ERR("Failed to allocate TPA "
1020 "skb pool for queue[%d] - "
1021 "disabling TPA on this "
1023 bnx2x_free_tpa_pool(bp, fp, i);
1024 fp->disable_tpa = 1;
1027 dma_unmap_addr_set(first_buf, mapping, 0);
1028 tpa_info->tpa_state = BNX2X_TPA_STOP;
1031 /* "next page" elements initialization */
1032 bnx2x_set_next_page_sgl(fp);
1034 /* set SGEs bit mask */
1035 bnx2x_init_sge_ring_bit_mask(fp);
1037 /* Allocate SGEs and initialize the ring elements */
1038 for (i = 0, ring_prod = 0;
1039 i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
1041 if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
1042 BNX2X_ERR("was only able to allocate "
1044 BNX2X_ERR("disabling TPA for "
1046 /* Cleanup already allocated elements */
1047 bnx2x_free_rx_sge_range(bp, fp,
1049 bnx2x_free_tpa_pool(bp, fp,
1051 fp->disable_tpa = 1;
1055 ring_prod = NEXT_SGE_IDX(ring_prod);
1058 fp->rx_sge_prod = ring_prod;
1062 for_each_rx_queue(bp, j) {
1063 struct bnx2x_fastpath *fp = &bp->fp[j];
1067 /* Activate BD ring */
1069 * this will generate an interrupt (to the TSTORM)
1070 * must only be done after chip is initialized
1072 bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
1078 if (CHIP_IS_E1(bp)) {
1079 REG_WR(bp, BAR_USTRORM_INTMEM +
1080 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
1081 U64_LO(fp->rx_comp_mapping));
1082 REG_WR(bp, BAR_USTRORM_INTMEM +
1083 USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
1084 U64_HI(fp->rx_comp_mapping));
1089 static void bnx2x_free_tx_skbs(struct bnx2x *bp)
1094 for_each_tx_queue(bp, i) {
1095 struct bnx2x_fastpath *fp = &bp->fp[i];
1096 for_each_cos_in_tx_queue(fp, cos) {
1097 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
1099 u16 sw_prod = txdata->tx_pkt_prod;
1100 u16 sw_cons = txdata->tx_pkt_cons;
1102 while (sw_cons != sw_prod) {
1103 bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons));
1110 static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)
1112 struct bnx2x *bp = fp->bp;
1115 /* ring wasn't allocated */
1116 if (fp->rx_buf_ring == NULL)
1119 for (i = 0; i < NUM_RX_BD; i++) {
1120 struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
1121 struct sk_buff *skb = rx_buf->skb;
1125 dma_unmap_single(&bp->pdev->dev,
1126 dma_unmap_addr(rx_buf, mapping),
1127 fp->rx_buf_size, DMA_FROM_DEVICE);
1134 static void bnx2x_free_rx_skbs(struct bnx2x *bp)
1138 for_each_rx_queue(bp, j) {
1139 struct bnx2x_fastpath *fp = &bp->fp[j];
1141 bnx2x_free_rx_bds(fp);
1143 if (!fp->disable_tpa)
1144 bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
1148 void bnx2x_free_skbs(struct bnx2x *bp)
1150 bnx2x_free_tx_skbs(bp);
1151 bnx2x_free_rx_skbs(bp);
1154 void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1156 /* load old values */
1157 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1159 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1160 /* leave all but MAX value */
1161 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1163 /* set new MAX value */
1164 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1165 & FUNC_MF_CFG_MAX_BW_MASK;
1167 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1172 * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors
1174 * @bp: driver handle
1175 * @nvecs: number of vectors to be released
1177 static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
1181 if (nvecs == offset)
1183 free_irq(bp->msix_table[offset].vector, bp->dev);
1184 DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
1185 bp->msix_table[offset].vector);
1188 if (nvecs == offset)
1193 for_each_eth_queue(bp, i) {
1194 if (nvecs == offset)
1196 DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d "
1197 "irq\n", i, bp->msix_table[offset].vector);
1199 free_irq(bp->msix_table[offset++].vector, &bp->fp[i]);
1203 void bnx2x_free_irq(struct bnx2x *bp)
1205 if (bp->flags & USING_MSIX_FLAG)
1206 bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) +
1208 else if (bp->flags & USING_MSI_FLAG)
1209 free_irq(bp->pdev->irq, bp->dev);
1211 free_irq(bp->pdev->irq, bp->dev);
1214 int bnx2x_enable_msix(struct bnx2x *bp)
1216 int msix_vec = 0, i, rc, req_cnt;
1218 bp->msix_table[msix_vec].entry = msix_vec;
1219 DP(NETIF_MSG_IFUP, "msix_table[0].entry = %d (slowpath)\n",
1220 bp->msix_table[0].entry);
1224 bp->msix_table[msix_vec].entry = msix_vec;
1225 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d (CNIC)\n",
1226 bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry);
1229 /* We need separate vectors for ETH queues only (not FCoE) */
1230 for_each_eth_queue(bp, i) {
1231 bp->msix_table[msix_vec].entry = msix_vec;
1232 DP(NETIF_MSG_IFUP, "msix_table[%d].entry = %d "
1233 "(fastpath #%u)\n", msix_vec, msix_vec, i);
1237 req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1;
1239 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt);
1242 * reconfigure number of tx/rx queues according to available
1245 if (rc >= BNX2X_MIN_MSIX_VEC_CNT) {
1246 /* how less vectors we will have? */
1247 int diff = req_cnt - rc;
1250 "Trying to use less MSI-X vectors: %d\n", rc);
1252 rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], rc);
1256 "MSI-X is not attainable rc %d\n", rc);
1260 * decrease number of queues by number of unallocated entries
1262 bp->num_queues -= diff;
1264 DP(NETIF_MSG_IFUP, "New queue configuration set: %d\n",
1267 /* fall to INTx if not enough memory */
1269 bp->flags |= DISABLE_MSI_FLAG;
1270 DP(NETIF_MSG_IFUP, "MSI-X is not attainable rc %d\n", rc);
1274 bp->flags |= USING_MSIX_FLAG;
1279 static int bnx2x_req_msix_irqs(struct bnx2x *bp)
1281 int i, rc, offset = 0;
1283 rc = request_irq(bp->msix_table[offset++].vector,
1284 bnx2x_msix_sp_int, 0,
1285 bp->dev->name, bp->dev);
1287 BNX2X_ERR("request sp irq failed\n");
1294 for_each_eth_queue(bp, i) {
1295 struct bnx2x_fastpath *fp = &bp->fp[i];
1296 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
1299 rc = request_irq(bp->msix_table[offset].vector,
1300 bnx2x_msix_fp_int, 0, fp->name, fp);
1302 BNX2X_ERR("request fp #%d irq (%d) failed rc %d\n", i,
1303 bp->msix_table[offset].vector, rc);
1304 bnx2x_free_msix_irqs(bp, offset);
1311 i = BNX2X_NUM_ETH_QUEUES(bp);
1312 offset = 1 + CNIC_PRESENT;
1313 netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d"
1315 bp->msix_table[0].vector,
1316 0, bp->msix_table[offset].vector,
1317 i - 1, bp->msix_table[offset + i - 1].vector);
1322 int bnx2x_enable_msi(struct bnx2x *bp)
1326 rc = pci_enable_msi(bp->pdev);
1328 DP(NETIF_MSG_IFUP, "MSI is not attainable\n");
1331 bp->flags |= USING_MSI_FLAG;
1336 static int bnx2x_req_irq(struct bnx2x *bp)
1338 unsigned long flags;
1341 if (bp->flags & USING_MSI_FLAG)
1344 flags = IRQF_SHARED;
1346 rc = request_irq(bp->pdev->irq, bnx2x_interrupt, flags,
1347 bp->dev->name, bp->dev);
1351 static inline int bnx2x_setup_irqs(struct bnx2x *bp)
1354 if (bp->flags & USING_MSIX_FLAG) {
1355 rc = bnx2x_req_msix_irqs(bp);
1360 rc = bnx2x_req_irq(bp);
1362 BNX2X_ERR("IRQ request failed rc %d, aborting\n", rc);
1365 if (bp->flags & USING_MSI_FLAG) {
1366 bp->dev->irq = bp->pdev->irq;
1367 netdev_info(bp->dev, "using MSI IRQ %d\n",
1375 static inline void bnx2x_napi_enable(struct bnx2x *bp)
1379 for_each_rx_queue(bp, i)
1380 napi_enable(&bnx2x_fp(bp, i, napi));
1383 static inline void bnx2x_napi_disable(struct bnx2x *bp)
1387 for_each_rx_queue(bp, i)
1388 napi_disable(&bnx2x_fp(bp, i, napi));
1391 void bnx2x_netif_start(struct bnx2x *bp)
1393 if (netif_running(bp->dev)) {
1394 bnx2x_napi_enable(bp);
1395 bnx2x_int_enable(bp);
1396 if (bp->state == BNX2X_STATE_OPEN)
1397 netif_tx_wake_all_queues(bp->dev);
1401 void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
1403 bnx2x_int_disable_sync(bp, disable_hw);
1404 bnx2x_napi_disable(bp);
1407 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1409 struct bnx2x *bp = netdev_priv(dev);
1413 struct ethhdr *hdr = (struct ethhdr *)skb->data;
1414 u16 ether_type = ntohs(hdr->h_proto);
1416 /* Skip VLAN tag if present */
1417 if (ether_type == ETH_P_8021Q) {
1418 struct vlan_ethhdr *vhdr =
1419 (struct vlan_ethhdr *)skb->data;
1421 ether_type = ntohs(vhdr->h_vlan_encapsulated_proto);
1424 /* If ethertype is FCoE or FIP - use FCoE ring */
1425 if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP))
1426 return bnx2x_fcoe_tx(bp, txq_index);
1429 /* select a non-FCoE queue */
1430 return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
1433 void bnx2x_set_num_queues(struct bnx2x *bp)
1435 switch (bp->multi_mode) {
1436 case ETH_RSS_MODE_DISABLED:
1439 case ETH_RSS_MODE_REGULAR:
1440 bp->num_queues = bnx2x_calc_num_queues(bp);
1448 /* Add special queues */
1449 bp->num_queues += NON_ETH_CONTEXT_USE;
1453 * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
1455 * @bp: Driver handle
1457 * We currently support for at most 16 Tx queues for each CoS thus we will
1458 * allocate a multiple of 16 for ETH L2 rings according to the value of the
1461 * If there is an FCoE L2 queue the appropriate Tx queue will have the next
1462 * index after all ETH L2 indices.
1464 * If the actual number of Tx queues (for each CoS) is less than 16 then there
1465 * will be the holes at the end of each group of 16 ETh L2 indices (0..15,
1466 * 16..31,...) with indicies that are not coupled with any real Tx queue.
1468 * The proper configuration of skb->queue_mapping is handled by
1469 * bnx2x_select_queue() and __skb_tx_hash().
1471 * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash()
1472 * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
1474 static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
1478 tx = MAX_TXQS_PER_COS * bp->max_cos;
1479 rx = BNX2X_NUM_ETH_QUEUES(bp);
1481 /* account for fcoe queue */
1489 rc = netif_set_real_num_tx_queues(bp->dev, tx);
1491 BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc);
1494 rc = netif_set_real_num_rx_queues(bp->dev, rx);
1496 BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc);
1500 DP(NETIF_MSG_DRV, "Setting real num queues to (tx, rx) (%d, %d)\n",
1506 static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
1510 for_each_queue(bp, i) {
1511 struct bnx2x_fastpath *fp = &bp->fp[i];
1513 /* Always use a mini-jumbo MTU for the FCoE L2 ring */
1516 * Although there are no IP frames expected to arrive to
1517 * this ring we still want to add an
1518 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
1522 BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
1523 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1526 bp->dev->mtu + ETH_OVREHEAD +
1527 BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
1531 static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
1534 u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
1535 u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
1538 * Prepare the inital contents fo the indirection table if RSS is
1541 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1542 for (i = 0; i < sizeof(ind_table); i++)
1544 bp->fp->cl_id + (i % num_eth_queues);
1548 * For 57710 and 57711 SEARCHER configuration (rss_keys) is
1549 * per-port, so if explicit configuration is needed , do it only
1552 * For 57712 and newer on the other hand it's a per-function
1555 return bnx2x_config_rss_pf(bp, ind_table,
1556 bp->port.pmf || !CHIP_IS_E1x(bp));
1559 int bnx2x_config_rss_pf(struct bnx2x *bp, u8 *ind_table, bool config_hash)
1561 struct bnx2x_config_rss_params params = {0};
1564 /* Although RSS is meaningless when there is a single HW queue we
1565 * still need it enabled in order to have HW Rx hash generated.
1567 * if (!is_eth_multi(bp))
1568 * bp->multi_mode = ETH_RSS_MODE_DISABLED;
1571 params.rss_obj = &bp->rss_conf_obj;
1573 __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags);
1576 switch (bp->multi_mode) {
1577 case ETH_RSS_MODE_DISABLED:
1578 __set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
1580 case ETH_RSS_MODE_REGULAR:
1581 __set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
1583 case ETH_RSS_MODE_VLAN_PRI:
1584 __set_bit(BNX2X_RSS_MODE_VLAN_PRI, ¶ms.rss_flags);
1586 case ETH_RSS_MODE_E1HOV_PRI:
1587 __set_bit(BNX2X_RSS_MODE_E1HOV_PRI, ¶ms.rss_flags);
1589 case ETH_RSS_MODE_IP_DSCP:
1590 __set_bit(BNX2X_RSS_MODE_IP_DSCP, ¶ms.rss_flags);
1593 BNX2X_ERR("Unknown multi_mode: %d\n", bp->multi_mode);
1597 /* If RSS is enabled */
1598 if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
1599 /* RSS configuration */
1600 __set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
1601 __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
1602 __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
1603 __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags);
1606 params.rss_result_mask = MULTI_MASK;
1608 memcpy(params.ind_table, ind_table, sizeof(params.ind_table));
1612 for (i = 0; i < sizeof(params.rss_key) / 4; i++)
1613 params.rss_key[i] = random32();
1615 __set_bit(BNX2X_RSS_SET_SRCH, ¶ms.rss_flags);
1619 return bnx2x_config_rss(bp, ¶ms);
1622 static inline int bnx2x_init_hw(struct bnx2x *bp, u32 load_code)
1624 struct bnx2x_func_state_params func_params = {0};
1626 /* Prepare parameters for function state transitions */
1627 __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
1629 func_params.f_obj = &bp->func_obj;
1630 func_params.cmd = BNX2X_F_CMD_HW_INIT;
1632 func_params.params.hw_init.load_phase = load_code;
1634 return bnx2x_func_state_change(bp, &func_params);
1638 * Cleans the object that have internal lists without sending
1639 * ramrods. Should be run when interrutps are disabled.
1641 static void bnx2x_squeeze_objects(struct bnx2x *bp)
1644 unsigned long ramrod_flags = 0, vlan_mac_flags = 0;
1645 struct bnx2x_mcast_ramrod_params rparam = {0};
1646 struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj;
1648 /***************** Cleanup MACs' object first *************************/
1650 /* Wait for completion of requested */
1651 __set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
1652 /* Perform a dry cleanup */
1653 __set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
1655 /* Clean ETH primary MAC */
1656 __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
1657 rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags,
1660 BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
1662 /* Cleanup UC list */
1664 __set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
1665 rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
1668 BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
1670 /***************** Now clean mcast object *****************************/
1671 rparam.mcast_obj = &bp->mcast_obj;
1672 __set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);
1674 /* Add a DEL command... */
1675 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
1677 BNX2X_ERR("Failed to add a new DEL command to a multi-cast "
1678 "object: %d\n", rc);
1680 /* ...and wait until all pending commands are cleared */
1681 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1684 BNX2X_ERR("Failed to clean multi-cast object: %d\n",
1689 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
1693 #ifndef BNX2X_STOP_ON_ERROR
1694 #define LOAD_ERROR_EXIT(bp, label) \
1696 (bp)->state = BNX2X_STATE_ERROR; \
1700 #define LOAD_ERROR_EXIT(bp, label) \
1702 (bp)->state = BNX2X_STATE_ERROR; \
1708 /* must be called with rtnl_lock */
1709 int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1711 int port = BP_PORT(bp);
1715 #ifdef BNX2X_STOP_ON_ERROR
1716 if (unlikely(bp->panic))
1720 bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
1722 /* Set the initial link reported state to link down */
1723 bnx2x_acquire_phy_lock(bp);
1724 memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
1725 __set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
1726 &bp->last_reported_link.link_report_flags);
1727 bnx2x_release_phy_lock(bp);
1729 /* must be called before memory allocation and HW init */
1730 bnx2x_ilt_set_info(bp);
1733 * Zero fastpath structures preserving invariants like napi, which are
1734 * allocated only once, fp index, max_cos, bp pointer.
1735 * Also set fp->disable_tpa.
1737 for_each_queue(bp, i)
1741 /* Set the receive queues buffer size */
1742 bnx2x_set_rx_buf_size(bp);
1744 if (bnx2x_alloc_mem(bp))
1747 /* As long as bnx2x_alloc_mem() may possibly update
1748 * bp->num_queues, bnx2x_set_real_num_queues() should always
1751 rc = bnx2x_set_real_num_queues(bp);
1753 BNX2X_ERR("Unable to set real_num_queues\n");
1754 LOAD_ERROR_EXIT(bp, load_error0);
1757 /* configure multi cos mappings in kernel.
1758 * this configuration may be overriden by a multi class queue discipline
1759 * or by a dcbx negotiation result.
1761 bnx2x_setup_tc(bp->dev, bp->max_cos);
1763 bnx2x_napi_enable(bp);
1765 /* Send LOAD_REQUEST command to MCP
1766 * Returns the type of LOAD command:
1767 * if it is the first port to be initialized
1768 * common blocks should be initialized, otherwise - not
1770 if (!BP_NOMCP(bp)) {
1771 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
1773 BNX2X_ERR("MCP response failure, aborting\n");
1775 LOAD_ERROR_EXIT(bp, load_error1);
1777 if (load_code == FW_MSG_CODE_DRV_LOAD_REFUSED) {
1778 rc = -EBUSY; /* other port in diagnostic mode */
1779 LOAD_ERROR_EXIT(bp, load_error1);
1783 int path = BP_PATH(bp);
1785 DP(NETIF_MSG_IFUP, "NO MCP - load counts[%d] %d, %d, %d\n",
1786 path, load_count[path][0], load_count[path][1],
1787 load_count[path][2]);
1788 load_count[path][0]++;
1789 load_count[path][1 + port]++;
1790 DP(NETIF_MSG_IFUP, "NO MCP - new load counts[%d] %d, %d, %d\n",
1791 path, load_count[path][0], load_count[path][1],
1792 load_count[path][2]);
1793 if (load_count[path][0] == 1)
1794 load_code = FW_MSG_CODE_DRV_LOAD_COMMON;
1795 else if (load_count[path][1 + port] == 1)
1796 load_code = FW_MSG_CODE_DRV_LOAD_PORT;
1798 load_code = FW_MSG_CODE_DRV_LOAD_FUNCTION;
1801 if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1802 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
1803 (load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
1806 * We need the barrier to ensure the ordering between the
1807 * writing to bp->port.pmf here and reading it from the
1808 * bnx2x_periodic_task().
1811 queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
1815 DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
1817 /* Init Function state controlling object */
1818 bnx2x__init_func_obj(bp);
1821 rc = bnx2x_init_hw(bp, load_code);
1823 BNX2X_ERR("HW init failed, aborting\n");
1824 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1825 LOAD_ERROR_EXIT(bp, load_error2);
1828 /* Connect to IRQs */
1829 rc = bnx2x_setup_irqs(bp);
1831 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1832 LOAD_ERROR_EXIT(bp, load_error2);
1835 /* Setup NIC internals and enable interrupts */
1836 bnx2x_nic_init(bp, load_code);
1838 /* Init per-function objects */
1839 bnx2x_init_bp_objs(bp);
1841 if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
1842 (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
1843 (bp->common.shmem2_base)) {
1844 if (SHMEM2_HAS(bp, dcc_support))
1845 SHMEM2_WR(bp, dcc_support,
1846 (SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
1847 SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV));
1850 bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
1851 rc = bnx2x_func_start(bp);
1853 BNX2X_ERR("Function start failed!\n");
1854 bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1855 LOAD_ERROR_EXIT(bp, load_error3);
1858 /* Send LOAD_DONE command to MCP */
1859 if (!BP_NOMCP(bp)) {
1860 load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
1862 BNX2X_ERR("MCP response failure, aborting\n");
1864 LOAD_ERROR_EXIT(bp, load_error3);
1868 rc = bnx2x_setup_leading(bp);
1870 BNX2X_ERR("Setup leading failed!\n");
1871 LOAD_ERROR_EXIT(bp, load_error3);
1875 /* Enable Timer scan */
1876 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1);
1879 for_each_nondefault_queue(bp, i) {
1880 rc = bnx2x_setup_queue(bp, &bp->fp[i], 0);
1882 LOAD_ERROR_EXIT(bp, load_error4);
1885 rc = bnx2x_init_rss_pf(bp);
1887 LOAD_ERROR_EXIT(bp, load_error4);
1889 /* Now when Clients are configured we are ready to work */
1890 bp->state = BNX2X_STATE_OPEN;
1892 /* Configure a ucast MAC */
1893 rc = bnx2x_set_eth_mac(bp, true);
1895 LOAD_ERROR_EXIT(bp, load_error4);
1897 if (bp->pending_max) {
1898 bnx2x_update_max_mf_config(bp, bp->pending_max);
1899 bp->pending_max = 0;
1903 bnx2x_initial_phy_init(bp, load_mode);
1905 /* Start fast path */
1907 /* Initialize Rx filter. */
1908 netif_addr_lock_bh(bp->dev);
1909 bnx2x_set_rx_mode(bp->dev);
1910 netif_addr_unlock_bh(bp->dev);
1913 switch (load_mode) {
1915 /* Tx queue should be only reenabled */
1916 netif_tx_wake_all_queues(bp->dev);
1920 netif_tx_start_all_queues(bp->dev);
1921 smp_mb__after_clear_bit();
1925 bp->state = BNX2X_STATE_DIAG;
1933 bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 0);
1935 bnx2x__link_status_update(bp);
1937 /* start the timer */
1938 mod_timer(&bp->timer, jiffies + bp->current_interval);
1941 /* re-read iscsi info */
1942 bnx2x_get_iscsi_info(bp);
1943 bnx2x_setup_cnic_irq_info(bp);
1944 if (bp->state == BNX2X_STATE_OPEN)
1945 bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
1947 bnx2x_inc_load_cnt(bp);
1949 /* Wait for all pending SP commands to complete */
1950 if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) {
1951 BNX2X_ERR("Timeout waiting for SP elements to complete\n");
1952 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
1956 bnx2x_dcbx_init(bp);
1959 #ifndef BNX2X_STOP_ON_ERROR
1962 /* Disable Timer scan */
1963 REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0);
1966 bnx2x_int_disable_sync(bp, 1);
1968 /* Clean queueable objects */
1969 bnx2x_squeeze_objects(bp);
1971 /* Free SKBs, SGEs, TPA pool and driver internals */
1972 bnx2x_free_skbs(bp);
1973 for_each_rx_queue(bp, i)
1974 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
1979 if (!BP_NOMCP(bp)) {
1980 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_REQ_WOL_MCP, 0);
1981 bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0);
1986 bnx2x_napi_disable(bp);
1991 #endif /* ! BNX2X_STOP_ON_ERROR */
1994 /* must be called with rtnl_lock */
1995 int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
1998 bool global = false;
2000 if ((bp->state == BNX2X_STATE_CLOSED) ||
2001 (bp->state == BNX2X_STATE_ERROR)) {
2002 /* We can get here if the driver has been unloaded
2003 * during parity error recovery and is either waiting for a
2004 * leader to complete or for other functions to unload and
2005 * then ifdown has been issued. In this case we want to
2006 * unload and let other functions to complete a recovery
2009 bp->recovery_state = BNX2X_RECOVERY_DONE;
2011 bnx2x_release_leader_lock(bp);
2014 DP(NETIF_MSG_HW, "Releasing a leadership...\n");
2020 * It's important to set the bp->state to the value different from
2021 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
2022 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
2024 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
2028 bnx2x_tx_disable(bp);
2031 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
2034 bp->rx_mode = BNX2X_RX_MODE_NONE;
2036 del_timer_sync(&bp->timer);
2038 /* Set ALWAYS_ALIVE bit in shmem */
2039 bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
2041 bnx2x_drv_pulse(bp);
2043 bnx2x_stats_handle(bp, STATS_EVENT_STOP);
2045 /* Cleanup the chip if needed */
2046 if (unload_mode != UNLOAD_RECOVERY)
2047 bnx2x_chip_cleanup(bp, unload_mode);
2049 /* Send the UNLOAD_REQUEST to the MCP */
2050 bnx2x_send_unload_req(bp, unload_mode);
2053 * Prevent transactions to host from the functions on the
2054 * engine that doesn't reset global blocks in case of global
2055 * attention once gloabl blocks are reset and gates are opened
2056 * (the engine which leader will perform the recovery
2059 if (!CHIP_IS_E1x(bp))
2060 bnx2x_pf_disable(bp);
2062 /* Disable HW interrupts, NAPI */
2063 bnx2x_netif_stop(bp, 1);
2068 /* Report UNLOAD_DONE to MCP */
2069 bnx2x_send_unload_done(bp);
2073 * At this stage no more interrupts will arrive so we may safly clean
2074 * the queueable objects here in case they failed to get cleaned so far.
2076 bnx2x_squeeze_objects(bp);
2078 /* There should be no more pending SP commands at this stage */
2083 /* Free SKBs, SGEs, TPA pool and driver internals */
2084 bnx2x_free_skbs(bp);
2085 for_each_rx_queue(bp, i)
2086 bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
2090 bp->state = BNX2X_STATE_CLOSED;
2092 /* Check if there are pending parity attentions. If there are - set
2093 * RECOVERY_IN_PROGRESS.
2095 if (bnx2x_chk_parity_attn(bp, &global, false)) {
2096 bnx2x_set_reset_in_progress(bp);
2098 /* Set RESET_IS_GLOBAL if needed */
2100 bnx2x_set_reset_global(bp);
2104 /* The last driver must disable a "close the gate" if there is no
2105 * parity attention or "process kill" pending.
2107 if (!bnx2x_dec_load_cnt(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp)))
2108 bnx2x_disable_close_the_gate(bp);
2113 int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
2117 /* If there is no power capability, silently succeed */
2119 DP(NETIF_MSG_HW, "No power capability. Breaking.\n");
2123 pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
2127 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2128 ((pmcsr & ~PCI_PM_CTRL_STATE_MASK) |
2129 PCI_PM_CTRL_PME_STATUS));
2131 if (pmcsr & PCI_PM_CTRL_STATE_MASK)
2132 /* delay required during transition out of D3hot */
2137 /* If there are other clients above don't
2138 shut down the power */
2139 if (atomic_read(&bp->pdev->enable_cnt) != 1)
2141 /* Don't shut down the power for emulation and FPGA */
2142 if (CHIP_REV_IS_SLOW(bp))
2145 pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
2149 pmcsr |= PCI_PM_CTRL_PME_ENABLE;
2151 pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
2154 /* No more memory access after this point until
2155 * device is brought back to D0.
2166 * net_device service functions
2168 int bnx2x_poll(struct napi_struct *napi, int budget)
2172 struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
2174 struct bnx2x *bp = fp->bp;
2177 #ifdef BNX2X_STOP_ON_ERROR
2178 if (unlikely(bp->panic)) {
2179 napi_complete(napi);
2184 for_each_cos_in_tx_queue(fp, cos)
2185 if (bnx2x_tx_queue_has_work(&fp->txdata[cos]))
2186 bnx2x_tx_int(bp, &fp->txdata[cos]);
2189 if (bnx2x_has_rx_work(fp)) {
2190 work_done += bnx2x_rx_int(fp, budget - work_done);
2192 /* must not complete if we consumed full budget */
2193 if (work_done >= budget)
2197 /* Fall out from the NAPI loop if needed */
2198 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2200 /* No need to update SB for FCoE L2 ring as long as
2201 * it's connected to the default SB and the SB
2202 * has been updated when NAPI was scheduled.
2204 if (IS_FCOE_FP(fp)) {
2205 napi_complete(napi);
2210 bnx2x_update_fpsb_idx(fp);
2211 /* bnx2x_has_rx_work() reads the status block,
2212 * thus we need to ensure that status block indices
2213 * have been actually read (bnx2x_update_fpsb_idx)
2214 * prior to this check (bnx2x_has_rx_work) so that
2215 * we won't write the "newer" value of the status block
2216 * to IGU (if there was a DMA right after
2217 * bnx2x_has_rx_work and if there is no rmb, the memory
2218 * reading (bnx2x_update_fpsb_idx) may be postponed
2219 * to right before bnx2x_ack_sb). In this case there
2220 * will never be another interrupt until there is
2221 * another update of the status block, while there
2222 * is still unhandled work.
2226 if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
2227 napi_complete(napi);
2228 /* Re-enable interrupts */
2230 "Update index to %d\n", fp->fp_hc_idx);
2231 bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
2232 le16_to_cpu(fp->fp_hc_idx),
2242 /* we split the first BD into headers and data BDs
2243 * to ease the pain of our fellow microcode engineers
2244 * we use one mapping for both BDs
2245 * So far this has only been observed to happen
2246 * in Other Operating Systems(TM)
2248 static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
2249 struct bnx2x_fp_txdata *txdata,
2250 struct sw_tx_bd *tx_buf,
2251 struct eth_tx_start_bd **tx_bd, u16 hlen,
2252 u16 bd_prod, int nbd)
2254 struct eth_tx_start_bd *h_tx_bd = *tx_bd;
2255 struct eth_tx_bd *d_tx_bd;
2257 int old_len = le16_to_cpu(h_tx_bd->nbytes);
2259 /* first fix first BD */
2260 h_tx_bd->nbd = cpu_to_le16(nbd);
2261 h_tx_bd->nbytes = cpu_to_le16(hlen);
2263 DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
2264 "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
2265 h_tx_bd->addr_lo, h_tx_bd->nbd);
2267 /* now get a new data BD
2268 * (after the pbd) and fill it */
2269 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2270 d_tx_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2272 mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
2273 le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
2275 d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2276 d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2277 d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
2279 /* this marks the BD as one that has no individual mapping */
2280 tx_buf->flags |= BNX2X_TSO_SPLIT_BD;
2282 DP(NETIF_MSG_TX_QUEUED,
2283 "TSO split data size is %d (%x:%x)\n",
2284 d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
2287 *tx_bd = (struct eth_tx_start_bd *)d_tx_bd;
2292 static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
2295 csum = (u16) ~csum_fold(csum_sub(csum,
2296 csum_partial(t_header - fix, fix, 0)));
2299 csum = (u16) ~csum_fold(csum_add(csum,
2300 csum_partial(t_header, -fix, 0)));
2302 return swab16(csum);
2305 static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
2309 if (skb->ip_summed != CHECKSUM_PARTIAL)
2313 if (vlan_get_protocol(skb) == htons(ETH_P_IPV6)) {
2315 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2316 rc |= XMIT_CSUM_TCP;
2320 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2321 rc |= XMIT_CSUM_TCP;
2325 if (skb_is_gso_v6(skb))
2326 rc |= XMIT_GSO_V6 | XMIT_CSUM_TCP | XMIT_CSUM_V6;
2327 else if (skb_is_gso(skb))
2328 rc |= XMIT_GSO_V4 | XMIT_CSUM_V4 | XMIT_CSUM_TCP;
2333 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2334 /* check if packet requires linearization (packet is too fragmented)
2335 no need to check fragmentation if page size > 8K (there will be no
2336 violation to FW restrictions) */
2337 static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
2342 int first_bd_sz = 0;
2344 /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
2345 if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
2347 if (xmit_type & XMIT_GSO) {
2348 unsigned short lso_mss = skb_shinfo(skb)->gso_size;
2349 /* Check if LSO packet needs to be copied:
2350 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
2351 int wnd_size = MAX_FETCH_BD - 3;
2352 /* Number of windows to check */
2353 int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
2358 /* Headers length */
2359 hlen = (int)(skb_transport_header(skb) - skb->data) +
2362 /* Amount of data (w/o headers) on linear part of SKB*/
2363 first_bd_sz = skb_headlen(skb) - hlen;
2365 wnd_sum = first_bd_sz;
2367 /* Calculate the first sum - it's special */
2368 for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
2370 skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]);
2372 /* If there was data on linear skb data - check it */
2373 if (first_bd_sz > 0) {
2374 if (unlikely(wnd_sum < lso_mss)) {
2379 wnd_sum -= first_bd_sz;
2382 /* Others are easier: run through the frag list and
2383 check all windows */
2384 for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
2386 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]);
2388 if (unlikely(wnd_sum < lso_mss)) {
2393 skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]);
2396 /* in non-LSO too fragmented packet should always
2403 if (unlikely(to_copy))
2404 DP(NETIF_MSG_TX_QUEUED,
2405 "Linearization IS REQUIRED for %s packet. "
2406 "num_frags %d hlen %d first_bd_sz %d\n",
2407 (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
2408 skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
2414 static inline void bnx2x_set_pbd_gso_e2(struct sk_buff *skb, u32 *parsing_data,
2417 *parsing_data |= (skb_shinfo(skb)->gso_size <<
2418 ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT) &
2419 ETH_TX_PARSE_BD_E2_LSO_MSS;
2420 if ((xmit_type & XMIT_GSO_V6) &&
2421 (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
2422 *parsing_data |= ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR;
2426 * bnx2x_set_pbd_gso - update PBD in GSO case.
2430 * @xmit_type: xmit flags
2432 static inline void bnx2x_set_pbd_gso(struct sk_buff *skb,
2433 struct eth_tx_parse_bd_e1x *pbd,
2436 pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
2437 pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
2438 pbd->tcp_flags = pbd_tcp_flags(skb);
2440 if (xmit_type & XMIT_GSO_V4) {
2441 pbd->ip_id = swab16(ip_hdr(skb)->id);
2442 pbd->tcp_pseudo_csum =
2443 swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
2445 0, IPPROTO_TCP, 0));
2448 pbd->tcp_pseudo_csum =
2449 swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
2450 &ipv6_hdr(skb)->daddr,
2451 0, IPPROTO_TCP, 0));
2453 pbd->global_data |= ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN;
2457 * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length
2459 * @bp: driver handle
2461 * @parsing_data: data to be updated
2462 * @xmit_type: xmit flags
2466 static inline u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
2467 u32 *parsing_data, u32 xmit_type)
2470 ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) <<
2471 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT) &
2472 ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W;
2474 if (xmit_type & XMIT_CSUM_TCP) {
2475 *parsing_data |= ((tcp_hdrlen(skb) / 4) <<
2476 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT) &
2477 ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW;
2479 return skb_transport_header(skb) + tcp_hdrlen(skb) - skb->data;
2481 /* We support checksum offload for TCP and UDP only.
2482 * No need to pass the UDP header length - it's a constant.
2484 return skb_transport_header(skb) +
2485 sizeof(struct udphdr) - skb->data;
2488 static inline void bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2489 struct eth_tx_start_bd *tx_start_bd, u32 xmit_type)
2491 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
2493 if (xmit_type & XMIT_CSUM_V4)
2494 tx_start_bd->bd_flags.as_bitfield |=
2495 ETH_TX_BD_FLAGS_IP_CSUM;
2497 tx_start_bd->bd_flags.as_bitfield |=
2498 ETH_TX_BD_FLAGS_IPV6;
2500 if (!(xmit_type & XMIT_CSUM_TCP))
2501 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
2505 * bnx2x_set_pbd_csum - update PBD with checksum and return header length
2507 * @bp: driver handle
2509 * @pbd: parse BD to be updated
2510 * @xmit_type: xmit flags
2512 static inline u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb,
2513 struct eth_tx_parse_bd_e1x *pbd,
2516 u8 hlen = (skb_network_header(skb) - skb->data) >> 1;
2518 /* for now NS flag is not used in Linux */
2520 (hlen | ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
2521 ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
2523 pbd->ip_hlen_w = (skb_transport_header(skb) -
2524 skb_network_header(skb)) >> 1;
2526 hlen += pbd->ip_hlen_w;
2528 /* We support checksum offload for TCP and UDP only */
2529 if (xmit_type & XMIT_CSUM_TCP)
2530 hlen += tcp_hdrlen(skb) / 2;
2532 hlen += sizeof(struct udphdr) / 2;
2534 pbd->total_hlen_w = cpu_to_le16(hlen);
2537 if (xmit_type & XMIT_CSUM_TCP) {
2538 pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
2541 s8 fix = SKB_CS_OFF(skb); /* signed! */
2543 DP(NETIF_MSG_TX_QUEUED,
2544 "hlen %d fix %d csum before fix %x\n",
2545 le16_to_cpu(pbd->total_hlen_w), fix, SKB_CS(skb));
2547 /* HW bug: fixup the CSUM */
2548 pbd->tcp_pseudo_csum =
2549 bnx2x_csum_fix(skb_transport_header(skb),
2552 DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
2553 pbd->tcp_pseudo_csum);
2559 /* called with netif_tx_lock
2560 * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
2561 * netif_wake_queue()
2563 netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
2565 struct bnx2x *bp = netdev_priv(dev);
2567 struct bnx2x_fastpath *fp;
2568 struct netdev_queue *txq;
2569 struct bnx2x_fp_txdata *txdata;
2570 struct sw_tx_bd *tx_buf;
2571 struct eth_tx_start_bd *tx_start_bd, *first_bd;
2572 struct eth_tx_bd *tx_data_bd, *total_pkt_bd = NULL;
2573 struct eth_tx_parse_bd_e1x *pbd_e1x = NULL;
2574 struct eth_tx_parse_bd_e2 *pbd_e2 = NULL;
2575 u32 pbd_e2_parsing_data = 0;
2576 u16 pkt_prod, bd_prod;
2577 int nbd, txq_index, fp_index, txdata_index;
2579 u32 xmit_type = bnx2x_xmit_type(bp, skb);
2582 __le16 pkt_size = 0;
2584 u8 mac_type = UNICAST_ADDRESS;
2586 #ifdef BNX2X_STOP_ON_ERROR
2587 if (unlikely(bp->panic))
2588 return NETDEV_TX_BUSY;
2591 txq_index = skb_get_queue_mapping(skb);
2592 txq = netdev_get_tx_queue(dev, txq_index);
2594 BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT);
2596 /* decode the fastpath index and the cos index from the txq */
2597 fp_index = TXQ_TO_FP(txq_index);
2598 txdata_index = TXQ_TO_COS(txq_index);
2602 * Override the above for the FCoE queue:
2603 * - FCoE fp entry is right after the ETH entries.
2604 * - FCoE L2 queue uses bp->txdata[0] only.
2606 if (unlikely(!NO_FCOE(bp) && (txq_index ==
2607 bnx2x_fcoe_tx(bp, txq_index)))) {
2608 fp_index = FCOE_IDX;
2613 /* enable this debug print to view the transmission queue being used
2614 DP(BNX2X_MSG_FP, "indices: txq %d, fp %d, txdata %d\n",
2615 txq_index, fp_index, txdata_index); */
2617 /* locate the fastpath and the txdata */
2618 fp = &bp->fp[fp_index];
2619 txdata = &fp->txdata[txdata_index];
2621 /* enable this debug print to view the tranmission details
2622 DP(BNX2X_MSG_FP,"transmitting packet cid %d fp index %d txdata_index %d"
2623 " tx_data ptr %p fp pointer %p\n",
2624 txdata->cid, fp_index, txdata_index, txdata, fp); */
2626 if (unlikely(bnx2x_tx_avail(bp, txdata) <
2627 (skb_shinfo(skb)->nr_frags + 3))) {
2628 fp->eth_q_stats.driver_xoff++;
2629 netif_tx_stop_queue(txq);
2630 BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
2631 return NETDEV_TX_BUSY;
2634 DP(NETIF_MSG_TX_QUEUED, "queue[%d]: SKB: summed %x protocol %x "
2635 "protocol(%x,%x) gso type %x xmit_type %x\n",
2636 txq_index, skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
2637 ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
2639 eth = (struct ethhdr *)skb->data;
2641 /* set flag according to packet type (UNICAST_ADDRESS is default)*/
2642 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
2643 if (is_broadcast_ether_addr(eth->h_dest))
2644 mac_type = BROADCAST_ADDRESS;
2646 mac_type = MULTICAST_ADDRESS;
2649 #if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
2650 /* First, check if we need to linearize the skb (due to FW
2651 restrictions). No need to check fragmentation if page size > 8K
2652 (there will be no violation to FW restrictions) */
2653 if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
2654 /* Statistics of linearization */
2656 if (skb_linearize(skb) != 0) {
2657 DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
2658 "silently dropping this SKB\n");
2659 dev_kfree_skb_any(skb);
2660 return NETDEV_TX_OK;
2664 /* Map skb linear data for DMA */
2665 mapping = dma_map_single(&bp->pdev->dev, skb->data,
2666 skb_headlen(skb), DMA_TO_DEVICE);
2667 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2668 DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - "
2669 "silently dropping this SKB\n");
2670 dev_kfree_skb_any(skb);
2671 return NETDEV_TX_OK;
2674 Please read carefully. First we use one BD which we mark as start,
2675 then we have a parsing info BD (used for TSO or xsum),
2676 and only then we have the rest of the TSO BDs.
2677 (don't forget to mark the last one as last,
2678 and to unmap only AFTER you write to the BD ...)
2679 And above all, all pdb sizes are in words - NOT DWORDS!
2682 /* get current pkt produced now - advance it just before sending packet
2683 * since mapping of pages may fail and cause packet to be dropped
2685 pkt_prod = txdata->tx_pkt_prod;
2686 bd_prod = TX_BD(txdata->tx_bd_prod);
2688 /* get a tx_buf and first BD
2689 * tx_start_bd may be changed during SPLIT,
2690 * but first_bd will always stay first
2692 tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
2693 tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
2694 first_bd = tx_start_bd;
2696 tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
2697 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE,
2701 SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1);
2703 /* remember the first BD of the packet */
2704 tx_buf->first_bd = txdata->tx_bd_prod;
2708 DP(NETIF_MSG_TX_QUEUED,
2709 "sending pkt %u @%p next_idx %u bd %u @%p\n",
2710 pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
2712 if (vlan_tx_tag_present(skb)) {
2713 tx_start_bd->vlan_or_ethertype =
2714 cpu_to_le16(vlan_tx_tag_get(skb));
2715 tx_start_bd->bd_flags.as_bitfield |=
2716 (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
2718 tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
2720 /* turn on parsing and get a BD */
2721 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2723 if (xmit_type & XMIT_CSUM)
2724 bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
2726 if (!CHIP_IS_E1x(bp)) {
2727 pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
2728 memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
2729 /* Set PBD in checksum offload case */
2730 if (xmit_type & XMIT_CSUM)
2731 hlen = bnx2x_set_pbd_csum_e2(bp, skb,
2732 &pbd_e2_parsing_data,
2736 * fill in the MAC addresses in the PBD - for local
2739 bnx2x_set_fw_mac_addr(&pbd_e2->src_mac_addr_hi,
2740 &pbd_e2->src_mac_addr_mid,
2741 &pbd_e2->src_mac_addr_lo,
2743 bnx2x_set_fw_mac_addr(&pbd_e2->dst_mac_addr_hi,
2744 &pbd_e2->dst_mac_addr_mid,
2745 &pbd_e2->dst_mac_addr_lo,
2749 pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
2750 memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x));
2751 /* Set PBD in checksum offload case */
2752 if (xmit_type & XMIT_CSUM)
2753 hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
2757 /* Setup the data pointer of the first BD of the packet */
2758 tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2759 tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2760 nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
2761 tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
2762 pkt_size = tx_start_bd->nbytes;
2764 DP(NETIF_MSG_TX_QUEUED, "first bd @%p addr (%x:%x) nbd %d"
2765 " nbytes %d flags %x vlan %x\n",
2766 tx_start_bd, tx_start_bd->addr_hi, tx_start_bd->addr_lo,
2767 le16_to_cpu(tx_start_bd->nbd), le16_to_cpu(tx_start_bd->nbytes),
2768 tx_start_bd->bd_flags.as_bitfield,
2769 le16_to_cpu(tx_start_bd->vlan_or_ethertype));
2771 if (xmit_type & XMIT_GSO) {
2773 DP(NETIF_MSG_TX_QUEUED,
2774 "TSO packet len %d hlen %d total len %d tso size %d\n",
2775 skb->len, hlen, skb_headlen(skb),
2776 skb_shinfo(skb)->gso_size);
2778 tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
2780 if (unlikely(skb_headlen(skb) > hlen))
2781 bd_prod = bnx2x_tx_split(bp, txdata, tx_buf,
2784 if (!CHIP_IS_E1x(bp))
2785 bnx2x_set_pbd_gso_e2(skb, &pbd_e2_parsing_data,
2788 bnx2x_set_pbd_gso(skb, pbd_e1x, xmit_type);
2791 /* Set the PBD's parsing_data field if not zero
2792 * (for the chips newer than 57711).
2794 if (pbd_e2_parsing_data)
2795 pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
2797 tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
2799 /* Handle fragmented skb */
2800 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2801 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2803 mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0,
2804 skb_frag_size(frag), DMA_TO_DEVICE);
2805 if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
2807 DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
2808 "dropping packet...\n");
2810 /* we need unmap all buffers already mapped
2812 * first_bd->nbd need to be properly updated
2813 * before call to bnx2x_free_tx_pkt
2815 first_bd->nbd = cpu_to_le16(nbd);
2816 bnx2x_free_tx_pkt(bp, txdata,
2817 TX_BD(txdata->tx_pkt_prod));
2818 return NETDEV_TX_OK;
2821 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2822 tx_data_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2823 if (total_pkt_bd == NULL)
2824 total_pkt_bd = &txdata->tx_desc_ring[bd_prod].reg_bd;
2826 tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
2827 tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
2828 tx_data_bd->nbytes = cpu_to_le16(skb_frag_size(frag));
2829 le16_add_cpu(&pkt_size, skb_frag_size(frag));
2832 DP(NETIF_MSG_TX_QUEUED,
2833 "frag %d bd @%p addr (%x:%x) nbytes %d\n",
2834 i, tx_data_bd, tx_data_bd->addr_hi, tx_data_bd->addr_lo,
2835 le16_to_cpu(tx_data_bd->nbytes));
2838 DP(NETIF_MSG_TX_QUEUED, "last bd @%p\n", tx_data_bd);
2840 /* update with actual num BDs */
2841 first_bd->nbd = cpu_to_le16(nbd);
2843 bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
2845 /* now send a tx doorbell, counting the next BD
2846 * if the packet contains or ends with it
2848 if (TX_BD_POFF(bd_prod) < nbd)
2851 /* total_pkt_bytes should be set on the first data BD if
2852 * it's not an LSO packet and there is more than one
2853 * data BD. In this case pkt_size is limited by an MTU value.
2854 * However we prefer to set it for an LSO packet (while we don't
2855 * have to) in order to save some CPU cycles in a none-LSO
2856 * case, when we much more care about them.
2858 if (total_pkt_bd != NULL)
2859 total_pkt_bd->total_pkt_bytes = pkt_size;
2862 DP(NETIF_MSG_TX_QUEUED,
2863 "PBD (E1X) @%p ip_data %x ip_hlen %u ip_id %u lso_mss %u"
2864 " tcp_flags %x xsum %x seq %u hlen %u\n",
2865 pbd_e1x, pbd_e1x->global_data, pbd_e1x->ip_hlen_w,
2866 pbd_e1x->ip_id, pbd_e1x->lso_mss, pbd_e1x->tcp_flags,
2867 pbd_e1x->tcp_pseudo_csum, pbd_e1x->tcp_send_seq,
2868 le16_to_cpu(pbd_e1x->total_hlen_w));
2870 DP(NETIF_MSG_TX_QUEUED,
2871 "PBD (E2) @%p dst %x %x %x src %x %x %x parsing_data %x\n",
2872 pbd_e2, pbd_e2->dst_mac_addr_hi, pbd_e2->dst_mac_addr_mid,
2873 pbd_e2->dst_mac_addr_lo, pbd_e2->src_mac_addr_hi,
2874 pbd_e2->src_mac_addr_mid, pbd_e2->src_mac_addr_lo,
2875 pbd_e2->parsing_data);
2876 DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod);
2878 txdata->tx_pkt_prod++;
2880 * Make sure that the BD data is updated before updating the producer
2881 * since FW might read the BD right after the producer is updated.
2882 * This is only applicable for weak-ordered memory model archs such
2883 * as IA-64. The following barrier is also mandatory since FW will
2884 * assumes packets must have BDs.
2888 txdata->tx_db.data.prod += nbd;
2891 DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
2895 txdata->tx_bd_prod += nbd;
2897 if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) {
2898 netif_tx_stop_queue(txq);
2900 /* paired memory barrier is in bnx2x_tx_int(), we have to keep
2901 * ordering of set_bit() in netif_tx_stop_queue() and read of
2905 fp->eth_q_stats.driver_xoff++;
2906 if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)
2907 netif_tx_wake_queue(txq);
2911 return NETDEV_TX_OK;
2915 * bnx2x_setup_tc - routine to configure net_device for multi tc
2917 * @netdev: net device to configure
2918 * @tc: number of traffic classes to enable
2920 * callback connected to the ndo_setup_tc function pointer
2922 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
2924 int cos, prio, count, offset;
2925 struct bnx2x *bp = netdev_priv(dev);
2927 /* setup tc must be called under rtnl lock */
2930 /* no traffic classes requested. aborting */
2932 netdev_reset_tc(dev);
2936 /* requested to support too many traffic classes */
2937 if (num_tc > bp->max_cos) {
2938 DP(NETIF_MSG_TX_ERR, "support for too many traffic classes"
2939 " requested: %d. max supported is %d\n",
2940 num_tc, bp->max_cos);
2944 /* declare amount of supported traffic classes */
2945 if (netdev_set_num_tc(dev, num_tc)) {
2946 DP(NETIF_MSG_TX_ERR, "failed to declare %d traffic classes\n",
2951 /* configure priority to traffic class mapping */
2952 for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) {
2953 netdev_set_prio_tc_map(dev, prio, bp->prio_to_cos[prio]);
2954 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n",
2955 prio, bp->prio_to_cos[prio]);
2959 /* Use this configuration to diffrentiate tc0 from other COSes
2960 This can be used for ets or pfc, and save the effort of setting
2961 up a multio class queue disc or negotiating DCBX with a switch
2962 netdev_set_prio_tc_map(dev, 0, 0);
2963 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", 0, 0);
2964 for (prio = 1; prio < 16; prio++) {
2965 netdev_set_prio_tc_map(dev, prio, 1);
2966 DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", prio, 1);
2969 /* configure traffic class to transmission queue mapping */
2970 for (cos = 0; cos < bp->max_cos; cos++) {
2971 count = BNX2X_NUM_ETH_QUEUES(bp);
2972 offset = cos * MAX_TXQS_PER_COS;
2973 netdev_set_tc_queue(dev, cos, count, offset);
2974 DP(BNX2X_MSG_SP, "mapping tc %d to offset %d count %d\n",
2975 cos, offset, count);
2981 /* called with rtnl_lock */
2982 int bnx2x_change_mac_addr(struct net_device *dev, void *p)
2984 struct sockaddr *addr = p;
2985 struct bnx2x *bp = netdev_priv(dev);
2988 if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
2991 if (netif_running(dev)) {
2992 rc = bnx2x_set_eth_mac(bp, false);
2997 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2999 if (netif_running(dev))
3000 rc = bnx2x_set_eth_mac(bp, true);
3005 static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index)
3007 union host_hc_status_block *sb = &bnx2x_fp(bp, fp_index, status_blk);
3008 struct bnx2x_fastpath *fp = &bp->fp[fp_index];
3013 if (IS_FCOE_IDX(fp_index)) {
3014 memset(sb, 0, sizeof(union host_hc_status_block));
3015 fp->status_blk_mapping = 0;
3020 if (!CHIP_IS_E1x(bp))
3021 BNX2X_PCI_FREE(sb->e2_sb,
3022 bnx2x_fp(bp, fp_index,
3023 status_blk_mapping),
3024 sizeof(struct host_hc_status_block_e2));
3026 BNX2X_PCI_FREE(sb->e1x_sb,
3027 bnx2x_fp(bp, fp_index,
3028 status_blk_mapping),
3029 sizeof(struct host_hc_status_block_e1x));
3034 if (!skip_rx_queue(bp, fp_index)) {
3035 bnx2x_free_rx_bds(fp);
3037 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3038 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_buf_ring));
3039 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_desc_ring),
3040 bnx2x_fp(bp, fp_index, rx_desc_mapping),
3041 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3043 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_comp_ring),
3044 bnx2x_fp(bp, fp_index, rx_comp_mapping),
3045 sizeof(struct eth_fast_path_rx_cqe) *
3049 BNX2X_FREE(bnx2x_fp(bp, fp_index, rx_page_ring));
3050 BNX2X_PCI_FREE(bnx2x_fp(bp, fp_index, rx_sge_ring),
3051 bnx2x_fp(bp, fp_index, rx_sge_mapping),
3052 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3056 if (!skip_tx_queue(bp, fp_index)) {
3057 /* fastpath tx rings: tx_buf tx_desc */
3058 for_each_cos_in_tx_queue(fp, cos) {
3059 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3062 "freeing tx memory of fp %d cos %d cid %d\n",
3063 fp_index, cos, txdata->cid);
3065 BNX2X_FREE(txdata->tx_buf_ring);
3066 BNX2X_PCI_FREE(txdata->tx_desc_ring,
3067 txdata->tx_desc_mapping,
3068 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3071 /* end of fastpath */
3074 void bnx2x_free_fp_mem(struct bnx2x *bp)
3077 for_each_queue(bp, i)
3078 bnx2x_free_fp_mem_at(bp, i);
3081 static inline void set_sb_shortcuts(struct bnx2x *bp, int index)
3083 union host_hc_status_block status_blk = bnx2x_fp(bp, index, status_blk);
3084 if (!CHIP_IS_E1x(bp)) {
3085 bnx2x_fp(bp, index, sb_index_values) =
3086 (__le16 *)status_blk.e2_sb->sb.index_values;
3087 bnx2x_fp(bp, index, sb_running_index) =
3088 (__le16 *)status_blk.e2_sb->sb.running_index;
3090 bnx2x_fp(bp, index, sb_index_values) =
3091 (__le16 *)status_blk.e1x_sb->sb.index_values;
3092 bnx2x_fp(bp, index, sb_running_index) =
3093 (__le16 *)status_blk.e1x_sb->sb.running_index;
3097 static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
3099 union host_hc_status_block *sb;
3100 struct bnx2x_fastpath *fp = &bp->fp[index];
3103 int rx_ring_size = 0;
3105 /* if rx_ring_size specified - use it */
3106 if (!bp->rx_ring_size) {
3108 rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
3110 /* allocate at least number of buffers required by FW */
3111 rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
3112 MIN_RX_SIZE_TPA, rx_ring_size);
3114 bp->rx_ring_size = rx_ring_size;
3116 rx_ring_size = bp->rx_ring_size;
3119 sb = &bnx2x_fp(bp, index, status_blk);
3121 if (!IS_FCOE_IDX(index)) {
3124 if (!CHIP_IS_E1x(bp))
3125 BNX2X_PCI_ALLOC(sb->e2_sb,
3126 &bnx2x_fp(bp, index, status_blk_mapping),
3127 sizeof(struct host_hc_status_block_e2));
3129 BNX2X_PCI_ALLOC(sb->e1x_sb,
3130 &bnx2x_fp(bp, index, status_blk_mapping),
3131 sizeof(struct host_hc_status_block_e1x));
3136 /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to
3137 * set shortcuts for it.
3139 if (!IS_FCOE_IDX(index))
3140 set_sb_shortcuts(bp, index);
3143 if (!skip_tx_queue(bp, index)) {
3144 /* fastpath tx rings: tx_buf tx_desc */
3145 for_each_cos_in_tx_queue(fp, cos) {
3146 struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
3148 DP(BNX2X_MSG_SP, "allocating tx memory of "
3152 BNX2X_ALLOC(txdata->tx_buf_ring,
3153 sizeof(struct sw_tx_bd) * NUM_TX_BD);
3154 BNX2X_PCI_ALLOC(txdata->tx_desc_ring,
3155 &txdata->tx_desc_mapping,
3156 sizeof(union eth_tx_bd_types) * NUM_TX_BD);
3161 if (!skip_rx_queue(bp, index)) {
3162 /* fastpath rx rings: rx_buf rx_desc rx_comp */
3163 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_buf_ring),
3164 sizeof(struct sw_rx_bd) * NUM_RX_BD);
3165 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_desc_ring),
3166 &bnx2x_fp(bp, index, rx_desc_mapping),
3167 sizeof(struct eth_rx_bd) * NUM_RX_BD);
3169 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_comp_ring),
3170 &bnx2x_fp(bp, index, rx_comp_mapping),
3171 sizeof(struct eth_fast_path_rx_cqe) *
3175 BNX2X_ALLOC(bnx2x_fp(bp, index, rx_page_ring),
3176 sizeof(struct sw_rx_page) * NUM_RX_SGE);
3177 BNX2X_PCI_ALLOC(bnx2x_fp(bp, index, rx_sge_ring),
3178 &bnx2x_fp(bp, index, rx_sge_mapping),
3179 BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
3181 bnx2x_set_next_page_rx_bd(fp);
3184 bnx2x_set_next_page_rx_cq(fp);
3187 ring_size = bnx2x_alloc_rx_bds(fp, rx_ring_size);
3188 if (ring_size < rx_ring_size)
3194 /* handles low memory cases */
3196 BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
3198 /* FW will drop all packets if queue is not big enough,
3199 * In these cases we disable the queue
3200 * Min size is different for OOO, TPA and non-TPA queues
3202 if (ring_size < (fp->disable_tpa ?
3203 MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) {
3204 /* release memory allocated for this queue */
3205 bnx2x_free_fp_mem_at(bp, index);
3211 int bnx2x_alloc_fp_mem(struct bnx2x *bp)
3216 * 1. Allocate FP for leading - fatal if error
3217 * 2. {CNIC} Allocate FCoE FP - fatal if error
3218 * 3. {CNIC} Allocate OOO + FWD - disable OOO if error
3219 * 4. Allocate RSS - fix number of queues if error
3223 if (bnx2x_alloc_fp_mem_at(bp, 0))
3229 if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX))
3230 /* we will fail load process instead of mark
3237 for_each_nondefault_eth_queue(bp, i)
3238 if (bnx2x_alloc_fp_mem_at(bp, i))
3241 /* handle memory failures */
3242 if (i != BNX2X_NUM_ETH_QUEUES(bp)) {
3243 int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
3248 * move non eth FPs next to last eth FP
3249 * must be done in that order
3250 * FCOE_IDX < FWD_IDX < OOO_IDX
3253 /* move FCoE fp even NO_FCOE_FLAG is on */
3254 bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta);
3256 bp->num_queues -= delta;
3257 BNX2X_ERR("Adjusted num of queues from %d to %d\n",
3258 bp->num_queues + delta, bp->num_queues);
3264 void bnx2x_free_mem_bp(struct bnx2x *bp)
3267 kfree(bp->msix_table);
3271 int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp)
3273 struct bnx2x_fastpath *fp;
3274 struct msix_entry *tbl;
3275 struct bnx2x_ilt *ilt;
3276 int msix_table_size = 0;
3279 * The biggest MSI-X table we might need is as a maximum number of fast
3280 * path IGU SBs plus default SB (for PF).
3282 msix_table_size = bp->igu_sb_cnt + 1;
3284 /* fp array: RSS plus CNIC related L2 queues */
3285 fp = kzalloc((BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE) *
3286 sizeof(*fp), GFP_KERNEL);
3292 tbl = kzalloc(msix_table_size * sizeof(*tbl), GFP_KERNEL);
3295 bp->msix_table = tbl;
3298 ilt = kzalloc(sizeof(*ilt), GFP_KERNEL);
3305 bnx2x_free_mem_bp(bp);
3310 int bnx2x_reload_if_running(struct net_device *dev)
3312 struct bnx2x *bp = netdev_priv(dev);
3314 if (unlikely(!netif_running(dev)))
3317 bnx2x_nic_unload(bp, UNLOAD_NORMAL);
3318 return bnx2x_nic_load(bp, LOAD_NORMAL);
3321 int bnx2x_get_cur_phy_idx(struct bnx2x *bp)
3323 u32 sel_phy_idx = 0;
3324 if (bp->link_params.num_phys <= 1)
3327 if (bp->link_vars.link_up) {
3328 sel_phy_idx = EXT_PHY1;
3329 /* In case link is SERDES, check if the EXT_PHY2 is the one */
3330 if ((bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) &&
3331 (bp->link_params.phy[EXT_PHY2].supported & SUPPORTED_FIBRE))
3332 sel_phy_idx = EXT_PHY2;
3335 switch (bnx2x_phy_selection(&bp->link_params)) {
3336 case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
3337 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY:
3338 case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
3339 sel_phy_idx = EXT_PHY1;
3341 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY:
3342 case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
3343 sel_phy_idx = EXT_PHY2;
3351 int bnx2x_get_link_cfg_idx(struct bnx2x *bp)
3353 u32 sel_phy_idx = bnx2x_get_cur_phy_idx(bp);
3355 * The selected actived PHY is always after swapping (in case PHY
3356 * swapping is enabled). So when swapping is enabled, we need to reverse
3360 if (bp->link_params.multi_phy_config &
3361 PORT_HW_CFG_PHY_SWAPPED_ENABLED) {
3362 if (sel_phy_idx == EXT_PHY1)
3363 sel_phy_idx = EXT_PHY2;
3364 else if (sel_phy_idx == EXT_PHY2)
3365 sel_phy_idx = EXT_PHY1;
3367 return LINK_CONFIG_IDX(sel_phy_idx);
3370 #if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC)
3371 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
3373 struct bnx2x *bp = netdev_priv(dev);
3374 struct cnic_eth_dev *cp = &bp->cnic_eth_dev;
3377 case NETDEV_FCOE_WWNN:
3378 *wwn = HILO_U64(cp->fcoe_wwn_node_name_hi,
3379 cp->fcoe_wwn_node_name_lo);
3381 case NETDEV_FCOE_WWPN:
3382 *wwn = HILO_U64(cp->fcoe_wwn_port_name_hi,
3383 cp->fcoe_wwn_port_name_lo);
3393 /* called with rtnl_lock */
3394 int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
3396 struct bnx2x *bp = netdev_priv(dev);
3398 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3399 pr_err("Handling parity error recovery. Try again later\n");
3403 if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
3404 ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE))
3407 /* This does not race with packet allocation
3408 * because the actual alloc size is
3409 * only updated as part of load
3413 return bnx2x_reload_if_running(dev);
3416 u32 bnx2x_fix_features(struct net_device *dev, u32 features)
3418 struct bnx2x *bp = netdev_priv(dev);
3420 /* TPA requires Rx CSUM offloading */
3421 if (!(features & NETIF_F_RXCSUM) || bp->disable_tpa)
3422 features &= ~NETIF_F_LRO;
3427 int bnx2x_set_features(struct net_device *dev, u32 features)
3429 struct bnx2x *bp = netdev_priv(dev);
3430 u32 flags = bp->flags;
3431 bool bnx2x_reload = false;
3433 if (features & NETIF_F_LRO)
3434 flags |= TPA_ENABLE_FLAG;
3436 flags &= ~TPA_ENABLE_FLAG;
3438 if (features & NETIF_F_LOOPBACK) {
3439 if (bp->link_params.loopback_mode != LOOPBACK_BMAC) {
3440 bp->link_params.loopback_mode = LOOPBACK_BMAC;
3441 bnx2x_reload = true;
3444 if (bp->link_params.loopback_mode != LOOPBACK_NONE) {
3445 bp->link_params.loopback_mode = LOOPBACK_NONE;
3446 bnx2x_reload = true;
3450 if (flags ^ bp->flags) {
3452 bnx2x_reload = true;
3456 if (bp->recovery_state == BNX2X_RECOVERY_DONE)
3457 return bnx2x_reload_if_running(dev);
3458 /* else: bnx2x_nic_load() will be called at end of recovery */
3464 void bnx2x_tx_timeout(struct net_device *dev)
3466 struct bnx2x *bp = netdev_priv(dev);
3468 #ifdef BNX2X_STOP_ON_ERROR
3473 smp_mb__before_clear_bit();
3474 set_bit(BNX2X_SP_RTNL_TX_TIMEOUT, &bp->sp_rtnl_state);
3475 smp_mb__after_clear_bit();
3477 /* This allows the netif to be shutdown gracefully before resetting */
3478 schedule_delayed_work(&bp->sp_rtnl_task, 0);
3481 int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state)
3483 struct net_device *dev = pci_get_drvdata(pdev);
3487 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3490 bp = netdev_priv(dev);
3494 pci_save_state(pdev);
3496 if (!netif_running(dev)) {
3501 netif_device_detach(dev);
3503 bnx2x_nic_unload(bp, UNLOAD_CLOSE);
3505 bnx2x_set_power_state(bp, pci_choose_state(pdev, state));
3512 int bnx2x_resume(struct pci_dev *pdev)
3514 struct net_device *dev = pci_get_drvdata(pdev);
3519 dev_err(&pdev->dev, "BAD net device from bnx2x_init_one\n");
3522 bp = netdev_priv(dev);
3524 if (bp->recovery_state != BNX2X_RECOVERY_DONE) {
3525 pr_err("Handling parity error recovery. Try again later\n");
3531 pci_restore_state(pdev);
3533 if (!netif_running(dev)) {
3538 bnx2x_set_power_state(bp, PCI_D0);
3539 netif_device_attach(dev);
3541 /* Since the chip was reset, clear the FW sequence number */
3543 rc = bnx2x_nic_load(bp, LOAD_OPEN);
3551 void bnx2x_set_ctx_validation(struct bnx2x *bp, struct eth_context *cxt,
3554 /* ustorm cxt validation */
3555 cxt->ustorm_ag_context.cdu_usage =
3556 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3557 CDU_REGION_NUMBER_UCM_AG, ETH_CONNECTION_TYPE);
3558 /* xcontext validation */
3559 cxt->xstorm_ag_context.cdu_reserved =
3560 CDU_RSRVD_VALUE_TYPE_A(HW_CID(bp, cid),
3561 CDU_REGION_NUMBER_XCM_AG, ETH_CONNECTION_TYPE);
3564 static inline void storm_memset_hc_timeout(struct bnx2x *bp, u8 port,
3565 u8 fw_sb_id, u8 sb_index,
3569 u32 addr = BAR_CSTRORM_INTMEM +
3570 CSTORM_STATUS_BLOCK_DATA_TIMEOUT_OFFSET(fw_sb_id, sb_index);
3571 REG_WR8(bp, addr, ticks);
3572 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d ticks %d\n",
3573 port, fw_sb_id, sb_index, ticks);
3576 static inline void storm_memset_hc_disable(struct bnx2x *bp, u8 port,
3577 u16 fw_sb_id, u8 sb_index,
3580 u32 enable_flag = disable ? 0 : (1 << HC_INDEX_DATA_HC_ENABLED_SHIFT);
3581 u32 addr = BAR_CSTRORM_INTMEM +
3582 CSTORM_STATUS_BLOCK_DATA_FLAGS_OFFSET(fw_sb_id, sb_index);
3583 u16 flags = REG_RD16(bp, addr);
3585 flags &= ~HC_INDEX_DATA_HC_ENABLED;
3586 flags |= enable_flag;
3587 REG_WR16(bp, addr, flags);
3588 DP(NETIF_MSG_HW, "port %x fw_sb_id %d sb_index %d disable %d\n",
3589 port, fw_sb_id, sb_index, disable);
3592 void bnx2x_update_coalesce_sb_index(struct bnx2x *bp, u8 fw_sb_id,
3593 u8 sb_index, u8 disable, u16 usec)
3595 int port = BP_PORT(bp);
3596 u8 ticks = usec / BNX2X_BTR;
3598 storm_memset_hc_timeout(bp, port, fw_sb_id, sb_index, ticks);
3600 disable = disable ? 1 : (usec ? 0 : 1);
3601 storm_memset_hc_disable(bp, port, fw_sb_id, sb_index, disable);