1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
49 #include <linux/dca.h>
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
60 static const struct e1000_info *igb_info_tbl[] = {
61 [board_82575] = &e1000_82575_info,
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87 /* required last entry */
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
100 static void __devexit igb_remove(struct pci_dev *pdev);
101 static int igb_sw_init(struct igb_adapter *);
102 static int igb_open(struct net_device *);
103 static int igb_close(struct net_device *);
104 static void igb_configure_tx(struct igb_adapter *);
105 static void igb_configure_rx(struct igb_adapter *);
106 static void igb_clean_all_tx_rings(struct igb_adapter *);
107 static void igb_clean_all_rx_rings(struct igb_adapter *);
108 static void igb_clean_tx_ring(struct igb_ring *);
109 static void igb_clean_rx_ring(struct igb_ring *);
110 static void igb_set_rx_mode(struct net_device *);
111 static void igb_update_phy_info(unsigned long);
112 static void igb_watchdog(unsigned long);
113 static void igb_watchdog_task(struct work_struct *);
114 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
115 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
116 struct rtnl_link_stats64 *stats);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146 int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149 struct ifla_vf_info *ivi);
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159 .notifier_call = igb_notify_dca,
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172 "per physical function");
173 #endif /* CONFIG_PCI_IOV */
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176 pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
180 static struct pci_error_handlers igb_err_handler = {
181 .error_detected = igb_io_error_detected,
182 .slot_reset = igb_io_slot_reset,
183 .resume = igb_io_resume,
187 static struct pci_driver igb_driver = {
188 .name = igb_driver_name,
189 .id_table = igb_pci_tbl,
191 .remove = __devexit_p(igb_remove),
193 /* Power Managment Hooks */
194 .suspend = igb_suspend,
195 .resume = igb_resume,
197 .shutdown = igb_shutdown,
198 .err_handler = &igb_err_handler
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
206 struct igb_reg_info {
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
213 /* General Registers */
214 {E1000_CTRL, "CTRL"},
215 {E1000_STATUS, "STATUS"},
216 {E1000_CTRL_EXT, "CTRL_EXT"},
218 /* Interrupt Registers */
222 {E1000_RCTL, "RCTL"},
223 {E1000_RDLEN(0), "RDLEN"},
224 {E1000_RDH(0), "RDH"},
225 {E1000_RDT(0), "RDT"},
226 {E1000_RXDCTL(0), "RXDCTL"},
227 {E1000_RDBAL(0), "RDBAL"},
228 {E1000_RDBAH(0), "RDBAH"},
231 {E1000_TCTL, "TCTL"},
232 {E1000_TDBAL(0), "TDBAL"},
233 {E1000_TDBAH(0), "TDBAH"},
234 {E1000_TDLEN(0), "TDLEN"},
235 {E1000_TDH(0), "TDH"},
236 {E1000_TDT(0), "TDT"},
237 {E1000_TXDCTL(0), "TXDCTL"},
238 {E1000_TDFH, "TDFH"},
239 {E1000_TDFT, "TDFT"},
240 {E1000_TDFHS, "TDFHS"},
241 {E1000_TDFPC, "TDFPC"},
243 /* List Terminator */
248 * igb_regdump - register printout routine
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
256 switch (reginfo->ofs) {
258 for (n = 0; n < 4; n++)
259 regs[n] = rd32(E1000_RDLEN(n));
262 for (n = 0; n < 4; n++)
263 regs[n] = rd32(E1000_RDH(n));
266 for (n = 0; n < 4; n++)
267 regs[n] = rd32(E1000_RDT(n));
269 case E1000_RXDCTL(0):
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RXDCTL(n));
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RDBAL(n));
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDBAH(n));
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RDBAL(n));
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_TDBAH(n));
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_TDLEN(n));
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_TDH(n));
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDT(n));
301 case E1000_TXDCTL(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TXDCTL(n));
306 printk(KERN_INFO "%-15s %08x\n",
307 reginfo->name, rd32(reginfo->ofs));
311 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312 printk(KERN_INFO "%-15s ", rname);
313 for (n = 0; n < 4; n++)
314 printk(KERN_CONT "%08x ", regs[n]);
315 printk(KERN_CONT "\n");
319 * igb_dump - Print registers, tx-rings and rx-rings
321 static void igb_dump(struct igb_adapter *adapter)
323 struct net_device *netdev = adapter->netdev;
324 struct e1000_hw *hw = &adapter->hw;
325 struct igb_reg_info *reginfo;
327 struct igb_ring *tx_ring;
328 union e1000_adv_tx_desc *tx_desc;
329 struct my_u0 { u64 a; u64 b; } *u0;
330 struct igb_buffer *buffer_info;
331 struct igb_ring *rx_ring;
332 union e1000_adv_rx_desc *rx_desc;
336 if (!netif_msg_hw(adapter))
339 /* Print netdevice Info */
341 dev_info(&adapter->pdev->dev, "Net device Info\n");
342 printk(KERN_INFO "Device Name state "
343 "trans_start last_rx\n");
344 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
351 /* Print Registers */
352 dev_info(&adapter->pdev->dev, "Register Dump\n");
353 printk(KERN_INFO " Register Name Value\n");
354 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355 reginfo->name; reginfo++) {
356 igb_regdump(hw, reginfo);
359 /* Print TX Ring Summary */
360 if (!netdev || !netif_running(netdev))
363 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
365 " leng ntw timestamp\n");
366 for (n = 0; n < adapter->num_tx_queues; n++) {
367 tx_ring = adapter->tx_ring[n];
368 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370 n, tx_ring->next_to_use, tx_ring->next_to_clean,
371 (u64)buffer_info->dma,
373 buffer_info->next_to_watch,
374 (u64)buffer_info->time_stamp);
378 if (!netif_msg_tx_done(adapter))
379 goto rx_ring_summary;
381 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
383 /* Transmit Descriptor Formats
385 * Advanced Transmit Descriptor
386 * +--------------------------------------------------------------+
387 * 0 | Buffer Address [63:0] |
388 * +--------------------------------------------------------------+
389 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
390 * +--------------------------------------------------------------+
391 * 63 46 45 40 39 38 36 35 32 31 24 15 0
394 for (n = 0; n < adapter->num_tx_queues; n++) {
395 tx_ring = adapter->tx_ring[n];
396 printk(KERN_INFO "------------------------------------\n");
397 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398 printk(KERN_INFO "------------------------------------\n");
399 printk(KERN_INFO "T [desc] [address 63:0 ] "
400 "[PlPOCIStDDM Ln] [bi->dma ] "
401 "leng ntw timestamp bi->skb\n");
403 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405 buffer_info = &tx_ring->buffer_info[i];
406 u0 = (struct my_u0 *)tx_desc;
407 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
408 " %04X %3X %016llX %p", i,
411 (u64)buffer_info->dma,
413 buffer_info->next_to_watch,
414 (u64)buffer_info->time_stamp,
416 if (i == tx_ring->next_to_use &&
417 i == tx_ring->next_to_clean)
418 printk(KERN_CONT " NTC/U\n");
419 else if (i == tx_ring->next_to_use)
420 printk(KERN_CONT " NTU\n");
421 else if (i == tx_ring->next_to_clean)
422 printk(KERN_CONT " NTC\n");
424 printk(KERN_CONT "\n");
426 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427 print_hex_dump(KERN_INFO, "",
429 16, 1, phys_to_virt(buffer_info->dma),
430 buffer_info->length, true);
434 /* Print RX Rings Summary */
436 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437 printk(KERN_INFO "Queue [NTU] [NTC]\n");
438 for (n = 0; n < adapter->num_rx_queues; n++) {
439 rx_ring = adapter->rx_ring[n];
440 printk(KERN_INFO " %5d %5X %5X\n", n,
441 rx_ring->next_to_use, rx_ring->next_to_clean);
445 if (!netif_msg_rx_status(adapter))
448 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
450 /* Advanced Receive Descriptor (Read) Format
452 * +-----------------------------------------------------+
453 * 0 | Packet Buffer Address [63:1] |A0/NSE|
454 * +----------------------------------------------+------+
455 * 8 | Header Buffer Address [63:1] | DD |
456 * +-----------------------------------------------------+
459 * Advanced Receive Descriptor (Write-Back) Format
461 * 63 48 47 32 31 30 21 20 17 16 4 3 0
462 * +------------------------------------------------------+
463 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
464 * | Checksum Ident | | | | Type | Type |
465 * +------------------------------------------------------+
466 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467 * +------------------------------------------------------+
468 * 63 48 47 32 31 20 19 0
471 for (n = 0; n < adapter->num_rx_queues; n++) {
472 rx_ring = adapter->rx_ring[n];
473 printk(KERN_INFO "------------------------------------\n");
474 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475 printk(KERN_INFO "------------------------------------\n");
476 printk(KERN_INFO "R [desc] [ PktBuf A0] "
477 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
478 "<-- Adv Rx Read format\n");
479 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
480 "[vl er S cks ln] ---------------- [bi->skb] "
481 "<-- Adv Rx Write-Back format\n");
483 for (i = 0; i < rx_ring->count; i++) {
484 buffer_info = &rx_ring->buffer_info[i];
485 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486 u0 = (struct my_u0 *)rx_desc;
487 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488 if (staterr & E1000_RXD_STAT_DD) {
489 /* Descriptor Done */
490 printk(KERN_INFO "RWB[0x%03X] %016llX "
491 "%016llX ---------------- %p", i,
496 printk(KERN_INFO "R [0x%03X] %016llX "
497 "%016llX %016llX %p", i,
500 (u64)buffer_info->dma,
503 if (netif_msg_pktdata(adapter)) {
504 print_hex_dump(KERN_INFO, "",
507 phys_to_virt(buffer_info->dma),
508 rx_ring->rx_buffer_len, true);
509 if (rx_ring->rx_buffer_len
511 print_hex_dump(KERN_INFO, "",
515 buffer_info->page_dma +
516 buffer_info->page_offset),
521 if (i == rx_ring->next_to_use)
522 printk(KERN_CONT " NTU\n");
523 else if (i == rx_ring->next_to_clean)
524 printk(KERN_CONT " NTC\n");
526 printk(KERN_CONT "\n");
537 * igb_read_clock - read raw cycle counter (to be used by time counter)
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
541 struct igb_adapter *adapter =
542 container_of(tc, struct igb_adapter, cycles);
543 struct e1000_hw *hw = &adapter->hw;
548 * The timestamp latches on lowest register read. For the 82580
549 * the lowest register is SYSTIMR instead of SYSTIML. However we never
550 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
552 if (hw->mac.type == e1000_82580) {
553 stamp = rd32(E1000_SYSTIMR) >> 8;
554 shift = IGB_82580_TSYNC_SHIFT;
557 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
563 * igb_get_hw_dev - return device
564 * used by hardware layer to print debugging information
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
568 struct igb_adapter *adapter = hw->back;
569 return adapter->netdev;
573 * igb_init_module - Driver Registration Routine
575 * igb_init_module is the first routine called when the driver is
576 * loaded. All it does is register with the PCI subsystem.
578 static int __init igb_init_module(void)
581 printk(KERN_INFO "%s - version %s\n",
582 igb_driver_string, igb_driver_version);
584 printk(KERN_INFO "%s\n", igb_copyright);
586 #ifdef CONFIG_IGB_DCA
587 dca_register_notify(&dca_notifier);
589 ret = pci_register_driver(&igb_driver);
593 module_init(igb_init_module);
596 * igb_exit_module - Driver Exit Cleanup Routine
598 * igb_exit_module is called just before the driver is removed
601 static void __exit igb_exit_module(void)
603 #ifdef CONFIG_IGB_DCA
604 dca_unregister_notify(&dca_notifier);
606 pci_unregister_driver(&igb_driver);
609 module_exit(igb_exit_module);
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
613 * igb_cache_ring_register - Descriptor ring to register mapping
614 * @adapter: board private structure to initialize
616 * Once we know the feature-set enabled for the device, we'll cache
617 * the register offset the descriptor ring is assigned to.
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
622 u32 rbase_offset = adapter->vfs_allocated_count;
624 switch (adapter->hw.mac.type) {
626 /* The queues are allocated for virtualization such that VF 0
627 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628 * In order to avoid collision we start at the first free queue
629 * and continue consuming queues in the same sequence
631 if (adapter->vfs_allocated_count) {
632 for (; i < adapter->rss_queues; i++)
633 adapter->rx_ring[i]->reg_idx = rbase_offset +
640 for (; i < adapter->num_rx_queues; i++)
641 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642 for (; j < adapter->num_tx_queues; j++)
643 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
648 static void igb_free_queues(struct igb_adapter *adapter)
652 for (i = 0; i < adapter->num_tx_queues; i++) {
653 kfree(adapter->tx_ring[i]);
654 adapter->tx_ring[i] = NULL;
656 for (i = 0; i < adapter->num_rx_queues; i++) {
657 kfree(adapter->rx_ring[i]);
658 adapter->rx_ring[i] = NULL;
660 adapter->num_rx_queues = 0;
661 adapter->num_tx_queues = 0;
665 * igb_alloc_queues - Allocate memory for all rings
666 * @adapter: board private structure to initialize
668 * We allocate one ring per queue at run-time since we don't know the
669 * number of queues at compile-time.
671 static int igb_alloc_queues(struct igb_adapter *adapter)
673 struct igb_ring *ring;
676 for (i = 0; i < adapter->num_tx_queues; i++) {
677 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
680 ring->count = adapter->tx_ring_count;
681 ring->queue_index = i;
682 ring->dev = &adapter->pdev->dev;
683 ring->netdev = adapter->netdev;
684 /* For 82575, context index must be unique per ring. */
685 if (adapter->hw.mac.type == e1000_82575)
686 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687 adapter->tx_ring[i] = ring;
690 for (i = 0; i < adapter->num_rx_queues; i++) {
691 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
694 ring->count = adapter->rx_ring_count;
695 ring->queue_index = i;
696 ring->dev = &adapter->pdev->dev;
697 ring->netdev = adapter->netdev;
698 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700 /* set flag indicating ring supports SCTP checksum offload */
701 if (adapter->hw.mac.type >= e1000_82576)
702 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703 adapter->rx_ring[i] = ring;
706 igb_cache_ring_register(adapter);
711 igb_free_queues(adapter);
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
720 struct igb_adapter *adapter = q_vector->adapter;
721 struct e1000_hw *hw = &adapter->hw;
723 int rx_queue = IGB_N0_QUEUE;
724 int tx_queue = IGB_N0_QUEUE;
726 if (q_vector->rx_ring)
727 rx_queue = q_vector->rx_ring->reg_idx;
728 if (q_vector->tx_ring)
729 tx_queue = q_vector->tx_ring->reg_idx;
731 switch (hw->mac.type) {
733 /* The 82575 assigns vectors using a bitmask, which matches the
734 bitmask for the EICR/EIMS/EIMC registers. To assign one
735 or more queues to a vector, we write the appropriate bits
736 into the MSIXBM register for that vector. */
737 if (rx_queue > IGB_N0_QUEUE)
738 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739 if (tx_queue > IGB_N0_QUEUE)
740 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741 if (!adapter->msix_entries && msix_vector == 0)
742 msixbm |= E1000_EIMS_OTHER;
743 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744 q_vector->eims_value = msixbm;
747 /* 82576 uses a table-based method for assigning vectors.
748 Each queue has a single entry in the table to which we write
749 a vector number along with a "valid" bit. Sadly, the layout
750 of the table is somewhat counterintuitive. */
751 if (rx_queue > IGB_N0_QUEUE) {
752 index = (rx_queue & 0x7);
753 ivar = array_rd32(E1000_IVAR0, index);
755 /* vector goes into low byte of register */
756 ivar = ivar & 0xFFFFFF00;
757 ivar |= msix_vector | E1000_IVAR_VALID;
759 /* vector goes into third byte of register */
760 ivar = ivar & 0xFF00FFFF;
761 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
763 array_wr32(E1000_IVAR0, index, ivar);
765 if (tx_queue > IGB_N0_QUEUE) {
766 index = (tx_queue & 0x7);
767 ivar = array_rd32(E1000_IVAR0, index);
769 /* vector goes into second byte of register */
770 ivar = ivar & 0xFFFF00FF;
771 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
773 /* vector goes into high byte of register */
774 ivar = ivar & 0x00FFFFFF;
775 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
777 array_wr32(E1000_IVAR0, index, ivar);
779 q_vector->eims_value = 1 << msix_vector;
783 /* 82580 uses the same table-based approach as 82576 but has fewer
784 entries as a result we carry over for queues greater than 4. */
785 if (rx_queue > IGB_N0_QUEUE) {
786 index = (rx_queue >> 1);
787 ivar = array_rd32(E1000_IVAR0, index);
788 if (rx_queue & 0x1) {
789 /* vector goes into third byte of register */
790 ivar = ivar & 0xFF00FFFF;
791 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
793 /* vector goes into low byte of register */
794 ivar = ivar & 0xFFFFFF00;
795 ivar |= msix_vector | E1000_IVAR_VALID;
797 array_wr32(E1000_IVAR0, index, ivar);
799 if (tx_queue > IGB_N0_QUEUE) {
800 index = (tx_queue >> 1);
801 ivar = array_rd32(E1000_IVAR0, index);
802 if (tx_queue & 0x1) {
803 /* vector goes into high byte of register */
804 ivar = ivar & 0x00FFFFFF;
805 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
807 /* vector goes into second byte of register */
808 ivar = ivar & 0xFFFF00FF;
809 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
811 array_wr32(E1000_IVAR0, index, ivar);
813 q_vector->eims_value = 1 << msix_vector;
820 /* add q_vector eims value to global eims_enable_mask */
821 adapter->eims_enable_mask |= q_vector->eims_value;
823 /* configure q_vector to set itr on first interrupt */
824 q_vector->set_itr = 1;
828 * igb_configure_msix - Configure MSI-X hardware
830 * igb_configure_msix sets up the hardware to properly
831 * generate MSI-X interrupts.
833 static void igb_configure_msix(struct igb_adapter *adapter)
837 struct e1000_hw *hw = &adapter->hw;
839 adapter->eims_enable_mask = 0;
841 /* set vector for other causes, i.e. link changes */
842 switch (hw->mac.type) {
844 tmp = rd32(E1000_CTRL_EXT);
845 /* enable MSI-X PBA support*/
846 tmp |= E1000_CTRL_EXT_PBA_CLR;
848 /* Auto-Mask interrupts upon ICR read. */
849 tmp |= E1000_CTRL_EXT_EIAME;
850 tmp |= E1000_CTRL_EXT_IRCA;
852 wr32(E1000_CTRL_EXT, tmp);
854 /* enable msix_other interrupt */
855 array_wr32(E1000_MSIXBM(0), vector++,
857 adapter->eims_other = E1000_EIMS_OTHER;
864 /* Turn on MSI-X capability first, or our settings
865 * won't stick. And it will take days to debug. */
866 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867 E1000_GPIE_PBA | E1000_GPIE_EIAME |
870 /* enable msix_other interrupt */
871 adapter->eims_other = 1 << vector;
872 tmp = (vector++ | E1000_IVAR_VALID) << 8;
874 wr32(E1000_IVAR_MISC, tmp);
877 /* do nothing, since nothing else supports MSI-X */
879 } /* switch (hw->mac.type) */
881 adapter->eims_enable_mask |= adapter->eims_other;
883 for (i = 0; i < adapter->num_q_vectors; i++)
884 igb_assign_vector(adapter->q_vector[i], vector++);
890 * igb_request_msix - Initialize MSI-X interrupts
892 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
895 static int igb_request_msix(struct igb_adapter *adapter)
897 struct net_device *netdev = adapter->netdev;
898 struct e1000_hw *hw = &adapter->hw;
899 int i, err = 0, vector = 0;
901 err = request_irq(adapter->msix_entries[vector].vector,
902 igb_msix_other, 0, netdev->name, adapter);
907 for (i = 0; i < adapter->num_q_vectors; i++) {
908 struct igb_q_vector *q_vector = adapter->q_vector[i];
910 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
912 if (q_vector->rx_ring && q_vector->tx_ring)
913 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914 q_vector->rx_ring->queue_index);
915 else if (q_vector->tx_ring)
916 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917 q_vector->tx_ring->queue_index);
918 else if (q_vector->rx_ring)
919 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920 q_vector->rx_ring->queue_index);
922 sprintf(q_vector->name, "%s-unused", netdev->name);
924 err = request_irq(adapter->msix_entries[vector].vector,
925 igb_msix_ring, 0, q_vector->name,
932 igb_configure_msix(adapter);
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
940 if (adapter->msix_entries) {
941 pci_disable_msix(adapter->pdev);
942 kfree(adapter->msix_entries);
943 adapter->msix_entries = NULL;
944 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945 pci_disable_msi(adapter->pdev);
950 * igb_free_q_vectors - Free memory allocated for interrupt vectors
951 * @adapter: board private structure to initialize
953 * This function frees the memory allocated to the q_vectors. In addition if
954 * NAPI is enabled it will delete any references to the NAPI struct prior
955 * to freeing the q_vector.
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
961 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963 adapter->q_vector[v_idx] = NULL;
966 netif_napi_del(&q_vector->napi);
969 adapter->num_q_vectors = 0;
973 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
975 * This function resets the device so that it has 0 rx queues, tx queues, and
976 * MSI-X interrupts allocated.
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
980 igb_free_queues(adapter);
981 igb_free_q_vectors(adapter);
982 igb_reset_interrupt_capability(adapter);
986 * igb_set_interrupt_capability - set MSI or MSI-X if supported
988 * Attempt to configure interrupts using the best available
989 * capabilities of the hardware and kernel.
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 /* Number of supported queues. */
997 adapter->num_rx_queues = adapter->rss_queues;
998 if (adapter->vfs_allocated_count)
999 adapter->num_tx_queues = 1;
1001 adapter->num_tx_queues = adapter->rss_queues;
1003 /* start with one vector for every rx queue */
1004 numvecs = adapter->num_rx_queues;
1006 /* if tx handler is separate add 1 for every tx queue */
1007 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008 numvecs += adapter->num_tx_queues;
1010 /* store the number of vectors reserved for queues */
1011 adapter->num_q_vectors = numvecs;
1013 /* add 1 vector for link status interrupts */
1015 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1017 if (!adapter->msix_entries)
1020 for (i = 0; i < numvecs; i++)
1021 adapter->msix_entries[i].entry = i;
1023 err = pci_enable_msix(adapter->pdev,
1024 adapter->msix_entries,
1029 igb_reset_interrupt_capability(adapter);
1031 /* If we can't do MSI-X, try MSI */
1033 #ifdef CONFIG_PCI_IOV
1034 /* disable SR-IOV for non MSI-X configurations */
1035 if (adapter->vf_data) {
1036 struct e1000_hw *hw = &adapter->hw;
1037 /* disable iov and allow time for transactions to clear */
1038 pci_disable_sriov(adapter->pdev);
1041 kfree(adapter->vf_data);
1042 adapter->vf_data = NULL;
1043 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1045 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1048 adapter->vfs_allocated_count = 0;
1049 adapter->rss_queues = 1;
1050 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051 adapter->num_rx_queues = 1;
1052 adapter->num_tx_queues = 1;
1053 adapter->num_q_vectors = 1;
1054 if (!pci_enable_msi(adapter->pdev))
1055 adapter->flags |= IGB_FLAG_HAS_MSI;
1057 /* Notify the stack of the (possibly) reduced queue counts. */
1058 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059 return netif_set_real_num_rx_queues(adapter->netdev,
1060 adapter->num_rx_queues);
1064 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065 * @adapter: board private structure to initialize
1067 * We allocate one q_vector per queue interrupt. If allocation fails we
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1072 struct igb_q_vector *q_vector;
1073 struct e1000_hw *hw = &adapter->hw;
1076 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1080 q_vector->adapter = adapter;
1081 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082 q_vector->itr_val = IGB_START_ITR;
1083 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084 adapter->q_vector[v_idx] = q_vector;
1089 igb_free_q_vectors(adapter);
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094 int ring_idx, int v_idx)
1096 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1098 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099 q_vector->rx_ring->q_vector = q_vector;
1100 q_vector->itr_val = adapter->rx_itr_setting;
1101 if (q_vector->itr_val && q_vector->itr_val <= 3)
1102 q_vector->itr_val = IGB_START_ITR;
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106 int ring_idx, int v_idx)
1108 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1110 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111 q_vector->tx_ring->q_vector = q_vector;
1112 q_vector->itr_val = adapter->tx_itr_setting;
1113 if (q_vector->itr_val && q_vector->itr_val <= 3)
1114 q_vector->itr_val = IGB_START_ITR;
1118 * igb_map_ring_to_vector - maps allocated queues to vectors
1120 * This function maps the recently allocated queues to vectors.
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128 (adapter->num_q_vectors < adapter->num_tx_queues))
1131 if (adapter->num_q_vectors >=
1132 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133 for (i = 0; i < adapter->num_rx_queues; i++)
1134 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135 for (i = 0; i < adapter->num_tx_queues; i++)
1136 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1138 for (i = 0; i < adapter->num_rx_queues; i++) {
1139 if (i < adapter->num_tx_queues)
1140 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1143 for (; i < adapter->num_tx_queues; i++)
1144 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1150 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1152 * This function initializes the interrupts and allocates all of the queues.
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1156 struct pci_dev *pdev = adapter->pdev;
1159 err = igb_set_interrupt_capability(adapter);
1163 err = igb_alloc_q_vectors(adapter);
1165 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166 goto err_alloc_q_vectors;
1169 err = igb_alloc_queues(adapter);
1171 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172 goto err_alloc_queues;
1175 err = igb_map_ring_to_vector(adapter);
1177 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178 goto err_map_queues;
1184 igb_free_queues(adapter);
1186 igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188 igb_reset_interrupt_capability(adapter);
1193 * igb_request_irq - initialize interrupts
1195 * Attempts to configure interrupts using the best available
1196 * capabilities of the hardware and kernel.
1198 static int igb_request_irq(struct igb_adapter *adapter)
1200 struct net_device *netdev = adapter->netdev;
1201 struct pci_dev *pdev = adapter->pdev;
1204 if (adapter->msix_entries) {
1205 err = igb_request_msix(adapter);
1208 /* fall back to MSI */
1209 igb_clear_interrupt_scheme(adapter);
1210 if (!pci_enable_msi(adapter->pdev))
1211 adapter->flags |= IGB_FLAG_HAS_MSI;
1212 igb_free_all_tx_resources(adapter);
1213 igb_free_all_rx_resources(adapter);
1214 adapter->num_tx_queues = 1;
1215 adapter->num_rx_queues = 1;
1216 adapter->num_q_vectors = 1;
1217 err = igb_alloc_q_vectors(adapter);
1220 "Unable to allocate memory for vectors\n");
1223 err = igb_alloc_queues(adapter);
1226 "Unable to allocate memory for queues\n");
1227 igb_free_q_vectors(adapter);
1230 igb_setup_all_tx_resources(adapter);
1231 igb_setup_all_rx_resources(adapter);
1233 igb_assign_vector(adapter->q_vector[0], 0);
1236 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238 netdev->name, adapter);
1242 /* fall back to legacy interrupts */
1243 igb_reset_interrupt_capability(adapter);
1244 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1247 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248 netdev->name, adapter);
1251 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1258 static void igb_free_irq(struct igb_adapter *adapter)
1260 if (adapter->msix_entries) {
1263 free_irq(adapter->msix_entries[vector++].vector, adapter);
1265 for (i = 0; i < adapter->num_q_vectors; i++) {
1266 struct igb_q_vector *q_vector = adapter->q_vector[i];
1267 free_irq(adapter->msix_entries[vector++].vector,
1271 free_irq(adapter->pdev->irq, adapter);
1276 * igb_irq_disable - Mask off interrupt generation on the NIC
1277 * @adapter: board private structure
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1281 struct e1000_hw *hw = &adapter->hw;
1284 * we need to be careful when disabling interrupts. The VFs are also
1285 * mapped into these registers and so clearing the bits can cause
1286 * issues on the VF drivers so we only need to clear what we set
1288 if (adapter->msix_entries) {
1289 u32 regval = rd32(E1000_EIAM);
1290 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292 regval = rd32(E1000_EIAC);
1293 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1297 wr32(E1000_IMC, ~0);
1299 if (adapter->msix_entries) {
1301 for (i = 0; i < adapter->num_q_vectors; i++)
1302 synchronize_irq(adapter->msix_entries[i].vector);
1304 synchronize_irq(adapter->pdev->irq);
1309 * igb_irq_enable - Enable default interrupt generation settings
1310 * @adapter: board private structure
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1314 struct e1000_hw *hw = &adapter->hw;
1316 if (adapter->msix_entries) {
1317 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318 u32 regval = rd32(E1000_EIAC);
1319 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320 regval = rd32(E1000_EIAM);
1321 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323 if (adapter->vfs_allocated_count) {
1324 wr32(E1000_MBVFIMR, 0xFF);
1325 ims |= E1000_IMS_VMMB;
1327 if (adapter->hw.mac.type == e1000_82580)
1328 ims |= E1000_IMS_DRSTA;
1330 wr32(E1000_IMS, ims);
1332 wr32(E1000_IMS, IMS_ENABLE_MASK |
1334 wr32(E1000_IAM, IMS_ENABLE_MASK |
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1341 struct e1000_hw *hw = &adapter->hw;
1342 u16 vid = adapter->hw.mng_cookie.vlan_id;
1343 u16 old_vid = adapter->mng_vlan_id;
1345 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346 /* add VID to filter table */
1347 igb_vfta_set(hw, vid, true);
1348 adapter->mng_vlan_id = vid;
1350 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1353 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1355 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356 /* remove VID from filter table */
1357 igb_vfta_set(hw, old_vid, false);
1362 * igb_release_hw_control - release control of the h/w to f/w
1363 * @adapter: address of board private structure
1365 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366 * For ASF and Pass Through versions of f/w this means that the
1367 * driver is no longer loaded.
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1372 struct e1000_hw *hw = &adapter->hw;
1375 /* Let firmware take over control of h/w */
1376 ctrl_ext = rd32(E1000_CTRL_EXT);
1377 wr32(E1000_CTRL_EXT,
1378 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1382 * igb_get_hw_control - get control of the h/w from f/w
1383 * @adapter: address of board private structure
1385 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386 * For ASF and Pass Through versions of f/w this means that
1387 * the driver is loaded.
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1392 struct e1000_hw *hw = &adapter->hw;
1395 /* Let firmware know the driver has taken over */
1396 ctrl_ext = rd32(E1000_CTRL_EXT);
1397 wr32(E1000_CTRL_EXT,
1398 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1402 * igb_configure - configure the hardware for RX and TX
1403 * @adapter: private board structure
1405 static void igb_configure(struct igb_adapter *adapter)
1407 struct net_device *netdev = adapter->netdev;
1410 igb_get_hw_control(adapter);
1411 igb_set_rx_mode(netdev);
1413 igb_restore_vlan(adapter);
1415 igb_setup_tctl(adapter);
1416 igb_setup_mrqc(adapter);
1417 igb_setup_rctl(adapter);
1419 igb_configure_tx(adapter);
1420 igb_configure_rx(adapter);
1422 igb_rx_fifo_flush_82575(&adapter->hw);
1424 /* call igb_desc_unused which always leaves
1425 * at least 1 descriptor unused to make sure
1426 * next_to_use != next_to_clean */
1427 for (i = 0; i < adapter->num_rx_queues; i++) {
1428 struct igb_ring *ring = adapter->rx_ring[i];
1429 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1434 * igb_power_up_link - Power up the phy/serdes link
1435 * @adapter: address of board private structure
1437 void igb_power_up_link(struct igb_adapter *adapter)
1439 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440 igb_power_up_phy_copper(&adapter->hw);
1442 igb_power_up_serdes_link_82575(&adapter->hw);
1446 * igb_power_down_link - Power down the phy/serdes link
1447 * @adapter: address of board private structure
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1451 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452 igb_power_down_phy_copper_82575(&adapter->hw);
1454 igb_shutdown_serdes_link_82575(&adapter->hw);
1458 * igb_up - Open the interface and prepare it to handle traffic
1459 * @adapter: board private structure
1461 int igb_up(struct igb_adapter *adapter)
1463 struct e1000_hw *hw = &adapter->hw;
1466 /* hardware has been reset, we need to reload some things */
1467 igb_configure(adapter);
1469 clear_bit(__IGB_DOWN, &adapter->state);
1471 for (i = 0; i < adapter->num_q_vectors; i++) {
1472 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473 napi_enable(&q_vector->napi);
1475 if (adapter->msix_entries)
1476 igb_configure_msix(adapter);
1478 igb_assign_vector(adapter->q_vector[0], 0);
1480 /* Clear any pending interrupts. */
1482 igb_irq_enable(adapter);
1484 /* notify VFs that reset has been completed */
1485 if (adapter->vfs_allocated_count) {
1486 u32 reg_data = rd32(E1000_CTRL_EXT);
1487 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488 wr32(E1000_CTRL_EXT, reg_data);
1491 netif_tx_start_all_queues(adapter->netdev);
1493 /* start the watchdog. */
1494 hw->mac.get_link_status = 1;
1495 schedule_work(&adapter->watchdog_task);
1500 void igb_down(struct igb_adapter *adapter)
1502 struct net_device *netdev = adapter->netdev;
1503 struct e1000_hw *hw = &adapter->hw;
1507 /* signal that we're down so the interrupt handler does not
1508 * reschedule our watchdog timer */
1509 set_bit(__IGB_DOWN, &adapter->state);
1511 /* disable receives in the hardware */
1512 rctl = rd32(E1000_RCTL);
1513 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514 /* flush and sleep below */
1516 netif_tx_stop_all_queues(netdev);
1518 /* disable transmits in the hardware */
1519 tctl = rd32(E1000_TCTL);
1520 tctl &= ~E1000_TCTL_EN;
1521 wr32(E1000_TCTL, tctl);
1522 /* flush both disables and wait for them to finish */
1526 for (i = 0; i < adapter->num_q_vectors; i++) {
1527 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528 napi_disable(&q_vector->napi);
1531 igb_irq_disable(adapter);
1533 del_timer_sync(&adapter->watchdog_timer);
1534 del_timer_sync(&adapter->phy_info_timer);
1536 netif_carrier_off(netdev);
1538 /* record the stats before reset*/
1539 spin_lock(&adapter->stats64_lock);
1540 igb_update_stats(adapter, &adapter->stats64);
1541 spin_unlock(&adapter->stats64_lock);
1543 adapter->link_speed = 0;
1544 adapter->link_duplex = 0;
1546 if (!pci_channel_offline(adapter->pdev))
1548 igb_clean_all_tx_rings(adapter);
1549 igb_clean_all_rx_rings(adapter);
1550 #ifdef CONFIG_IGB_DCA
1552 /* since we reset the hardware DCA settings were cleared */
1553 igb_setup_dca(adapter);
1557 void igb_reinit_locked(struct igb_adapter *adapter)
1559 WARN_ON(in_interrupt());
1560 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1564 clear_bit(__IGB_RESETTING, &adapter->state);
1567 void igb_reset(struct igb_adapter *adapter)
1569 struct pci_dev *pdev = adapter->pdev;
1570 struct e1000_hw *hw = &adapter->hw;
1571 struct e1000_mac_info *mac = &hw->mac;
1572 struct e1000_fc_info *fc = &hw->fc;
1573 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1576 /* Repartition Pba for greater than 9k mtu
1577 * To take effect CTRL.RST is required.
1579 switch (mac->type) {
1582 pba = rd32(E1000_RXPBS);
1583 pba = igb_rxpbs_adjust_82580(pba);
1586 pba = rd32(E1000_RXPBS);
1587 pba &= E1000_RXPBS_SIZE_MASK_82576;
1591 pba = E1000_PBA_34K;
1595 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1596 (mac->type < e1000_82576)) {
1597 /* adjust PBA for jumbo frames */
1598 wr32(E1000_PBA, pba);
1600 /* To maintain wire speed transmits, the Tx FIFO should be
1601 * large enough to accommodate two full transmit packets,
1602 * rounded up to the next 1KB and expressed in KB. Likewise,
1603 * the Rx FIFO should be large enough to accommodate at least
1604 * one full receive packet and is similarly rounded up and
1605 * expressed in KB. */
1606 pba = rd32(E1000_PBA);
1607 /* upper 16 bits has Tx packet buffer allocation size in KB */
1608 tx_space = pba >> 16;
1609 /* lower 16 bits has Rx packet buffer allocation size in KB */
1611 /* the tx fifo also stores 16 bytes of information about the tx
1612 * but don't include ethernet FCS because hardware appends it */
1613 min_tx_space = (adapter->max_frame_size +
1614 sizeof(union e1000_adv_tx_desc) -
1616 min_tx_space = ALIGN(min_tx_space, 1024);
1617 min_tx_space >>= 10;
1618 /* software strips receive CRC, so leave room for it */
1619 min_rx_space = adapter->max_frame_size;
1620 min_rx_space = ALIGN(min_rx_space, 1024);
1621 min_rx_space >>= 10;
1623 /* If current Tx allocation is less than the min Tx FIFO size,
1624 * and the min Tx FIFO size is less than the current Rx FIFO
1625 * allocation, take space away from current Rx allocation */
1626 if (tx_space < min_tx_space &&
1627 ((min_tx_space - tx_space) < pba)) {
1628 pba = pba - (min_tx_space - tx_space);
1630 /* if short on rx space, rx wins and must trump tx
1632 if (pba < min_rx_space)
1635 wr32(E1000_PBA, pba);
1638 /* flow control settings */
1639 /* The high water mark must be low enough to fit one full frame
1640 * (or the size used for early receive) above it in the Rx FIFO.
1641 * Set it to the lower of:
1642 * - 90% of the Rx FIFO size, or
1643 * - the full Rx FIFO size minus one full frame */
1644 hwm = min(((pba << 10) * 9 / 10),
1645 ((pba << 10) - 2 * adapter->max_frame_size));
1647 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1648 fc->low_water = fc->high_water - 16;
1649 fc->pause_time = 0xFFFF;
1651 fc->current_mode = fc->requested_mode;
1653 /* disable receive for all VFs and wait one second */
1654 if (adapter->vfs_allocated_count) {
1656 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1657 adapter->vf_data[i].flags = 0;
1659 /* ping all the active vfs to let them know we are going down */
1660 igb_ping_all_vfs(adapter);
1662 /* disable transmits and receives */
1663 wr32(E1000_VFRE, 0);
1664 wr32(E1000_VFTE, 0);
1667 /* Allow time for pending master requests to run */
1668 hw->mac.ops.reset_hw(hw);
1671 if (hw->mac.ops.init_hw(hw))
1672 dev_err(&pdev->dev, "Hardware Error\n");
1674 if (hw->mac.type == e1000_82580) {
1675 u32 reg = rd32(E1000_PCIEMISC);
1676 wr32(E1000_PCIEMISC,
1677 reg & ~E1000_PCIEMISC_LX_DECISION);
1679 if (!netif_running(adapter->netdev))
1680 igb_power_down_link(adapter);
1682 igb_update_mng_vlan(adapter);
1684 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1685 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1687 igb_get_phy_info(hw);
1690 static const struct net_device_ops igb_netdev_ops = {
1691 .ndo_open = igb_open,
1692 .ndo_stop = igb_close,
1693 .ndo_start_xmit = igb_xmit_frame_adv,
1694 .ndo_get_stats64 = igb_get_stats64,
1695 .ndo_set_rx_mode = igb_set_rx_mode,
1696 .ndo_set_multicast_list = igb_set_rx_mode,
1697 .ndo_set_mac_address = igb_set_mac,
1698 .ndo_change_mtu = igb_change_mtu,
1699 .ndo_do_ioctl = igb_ioctl,
1700 .ndo_tx_timeout = igb_tx_timeout,
1701 .ndo_validate_addr = eth_validate_addr,
1702 .ndo_vlan_rx_register = igb_vlan_rx_register,
1703 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1704 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1705 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1706 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1707 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1708 .ndo_get_vf_config = igb_ndo_get_vf_config,
1709 #ifdef CONFIG_NET_POLL_CONTROLLER
1710 .ndo_poll_controller = igb_netpoll,
1715 * igb_probe - Device Initialization Routine
1716 * @pdev: PCI device information struct
1717 * @ent: entry in igb_pci_tbl
1719 * Returns 0 on success, negative on failure
1721 * igb_probe initializes an adapter identified by a pci_dev structure.
1722 * The OS initialization, configuring of the adapter private structure,
1723 * and a hardware reset occur.
1725 static int __devinit igb_probe(struct pci_dev *pdev,
1726 const struct pci_device_id *ent)
1728 struct net_device *netdev;
1729 struct igb_adapter *adapter;
1730 struct e1000_hw *hw;
1731 u16 eeprom_data = 0;
1732 static int global_quad_port_a; /* global quad port a indication */
1733 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1734 unsigned long mmio_start, mmio_len;
1735 int err, pci_using_dac;
1736 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1739 /* Catch broken hardware that put the wrong VF device ID in
1740 * the PCIe SR-IOV capability.
1742 if (pdev->is_virtfn) {
1743 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1744 pci_name(pdev), pdev->vendor, pdev->device);
1748 err = pci_enable_device_mem(pdev);
1753 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1755 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1759 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1761 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1763 dev_err(&pdev->dev, "No usable DMA "
1764 "configuration, aborting\n");
1770 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1776 pci_enable_pcie_error_reporting(pdev);
1778 pci_set_master(pdev);
1779 pci_save_state(pdev);
1782 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1783 IGB_ABS_MAX_TX_QUEUES);
1785 goto err_alloc_etherdev;
1787 SET_NETDEV_DEV(netdev, &pdev->dev);
1789 pci_set_drvdata(pdev, netdev);
1790 adapter = netdev_priv(netdev);
1791 adapter->netdev = netdev;
1792 adapter->pdev = pdev;
1795 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1797 mmio_start = pci_resource_start(pdev, 0);
1798 mmio_len = pci_resource_len(pdev, 0);
1801 hw->hw_addr = ioremap(mmio_start, mmio_len);
1805 netdev->netdev_ops = &igb_netdev_ops;
1806 igb_set_ethtool_ops(netdev);
1807 netdev->watchdog_timeo = 5 * HZ;
1809 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1811 netdev->mem_start = mmio_start;
1812 netdev->mem_end = mmio_start + mmio_len;
1814 /* PCI config space info */
1815 hw->vendor_id = pdev->vendor;
1816 hw->device_id = pdev->device;
1817 hw->revision_id = pdev->revision;
1818 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1819 hw->subsystem_device_id = pdev->subsystem_device;
1821 /* Copy the default MAC, PHY and NVM function pointers */
1822 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1823 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1824 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1825 /* Initialize skew-specific constants */
1826 err = ei->get_invariants(hw);
1830 /* setup the private structure */
1831 err = igb_sw_init(adapter);
1835 igb_get_bus_info_pcie(hw);
1837 hw->phy.autoneg_wait_to_complete = false;
1839 /* Copper options */
1840 if (hw->phy.media_type == e1000_media_type_copper) {
1841 hw->phy.mdix = AUTO_ALL_MODES;
1842 hw->phy.disable_polarity_correction = false;
1843 hw->phy.ms_type = e1000_ms_hw_default;
1846 if (igb_check_reset_block(hw))
1847 dev_info(&pdev->dev,
1848 "PHY reset is blocked due to SOL/IDER session.\n");
1850 netdev->features = NETIF_F_SG |
1852 NETIF_F_HW_VLAN_TX |
1853 NETIF_F_HW_VLAN_RX |
1854 NETIF_F_HW_VLAN_FILTER;
1856 netdev->features |= NETIF_F_IPV6_CSUM;
1857 netdev->features |= NETIF_F_TSO;
1858 netdev->features |= NETIF_F_TSO6;
1859 netdev->features |= NETIF_F_GRO;
1861 netdev->vlan_features |= NETIF_F_TSO;
1862 netdev->vlan_features |= NETIF_F_TSO6;
1863 netdev->vlan_features |= NETIF_F_IP_CSUM;
1864 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1865 netdev->vlan_features |= NETIF_F_SG;
1867 if (pci_using_dac) {
1868 netdev->features |= NETIF_F_HIGHDMA;
1869 netdev->vlan_features |= NETIF_F_HIGHDMA;
1872 if (hw->mac.type >= e1000_82576)
1873 netdev->features |= NETIF_F_SCTP_CSUM;
1875 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1877 /* before reading the NVM, reset the controller to put the device in a
1878 * known good starting state */
1879 hw->mac.ops.reset_hw(hw);
1881 /* make sure the NVM is good */
1882 if (igb_validate_nvm_checksum(hw) < 0) {
1883 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1888 /* copy the MAC address out of the NVM */
1889 if (hw->mac.ops.read_mac_addr(hw))
1890 dev_err(&pdev->dev, "NVM Read Error\n");
1892 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1893 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1895 if (!is_valid_ether_addr(netdev->perm_addr)) {
1896 dev_err(&pdev->dev, "Invalid MAC Address\n");
1901 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1902 (unsigned long) adapter);
1903 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1904 (unsigned long) adapter);
1906 INIT_WORK(&adapter->reset_task, igb_reset_task);
1907 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1909 /* Initialize link properties that are user-changeable */
1910 adapter->fc_autoneg = true;
1911 hw->mac.autoneg = true;
1912 hw->phy.autoneg_advertised = 0x2f;
1914 hw->fc.requested_mode = e1000_fc_default;
1915 hw->fc.current_mode = e1000_fc_default;
1917 igb_validate_mdi_setting(hw);
1919 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1920 * enable the ACPI Magic Packet filter
1923 if (hw->bus.func == 0)
1924 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1925 else if (hw->mac.type == e1000_82580)
1926 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1927 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1929 else if (hw->bus.func == 1)
1930 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1932 if (eeprom_data & eeprom_apme_mask)
1933 adapter->eeprom_wol |= E1000_WUFC_MAG;
1935 /* now that we have the eeprom settings, apply the special cases where
1936 * the eeprom may be wrong or the board simply won't support wake on
1937 * lan on a particular port */
1938 switch (pdev->device) {
1939 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1940 adapter->eeprom_wol = 0;
1942 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1943 case E1000_DEV_ID_82576_FIBER:
1944 case E1000_DEV_ID_82576_SERDES:
1945 /* Wake events only supported on port A for dual fiber
1946 * regardless of eeprom setting */
1947 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1948 adapter->eeprom_wol = 0;
1950 case E1000_DEV_ID_82576_QUAD_COPPER:
1951 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1952 /* if quad port adapter, disable WoL on all but port A */
1953 if (global_quad_port_a != 0)
1954 adapter->eeprom_wol = 0;
1956 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1957 /* Reset for multiple quad port adapters */
1958 if (++global_quad_port_a == 4)
1959 global_quad_port_a = 0;
1963 /* initialize the wol settings based on the eeprom settings */
1964 adapter->wol = adapter->eeprom_wol;
1965 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1967 /* reset the hardware with the new settings */
1970 /* let the f/w know that the h/w is now under the control of the
1972 igb_get_hw_control(adapter);
1974 strcpy(netdev->name, "eth%d");
1975 err = register_netdev(netdev);
1979 /* carrier off reporting is important to ethtool even BEFORE open */
1980 netif_carrier_off(netdev);
1982 #ifdef CONFIG_IGB_DCA
1983 if (dca_add_requester(&pdev->dev) == 0) {
1984 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1985 dev_info(&pdev->dev, "DCA enabled\n");
1986 igb_setup_dca(adapter);
1990 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1991 /* print bus type/speed/width info */
1992 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1994 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1995 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1997 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1998 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1999 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2003 igb_read_part_num(hw, &part_num);
2004 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
2005 (part_num >> 8), (part_num & 0xff));
2007 dev_info(&pdev->dev,
2008 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2009 adapter->msix_entries ? "MSI-X" :
2010 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2011 adapter->num_rx_queues, adapter->num_tx_queues);
2016 igb_release_hw_control(adapter);
2018 if (!igb_check_reset_block(hw))
2021 if (hw->flash_address)
2022 iounmap(hw->flash_address);
2024 igb_clear_interrupt_scheme(adapter);
2025 iounmap(hw->hw_addr);
2027 free_netdev(netdev);
2029 pci_release_selected_regions(pdev,
2030 pci_select_bars(pdev, IORESOURCE_MEM));
2033 pci_disable_device(pdev);
2038 * igb_remove - Device Removal Routine
2039 * @pdev: PCI device information struct
2041 * igb_remove is called by the PCI subsystem to alert the driver
2042 * that it should release a PCI device. The could be caused by a
2043 * Hot-Plug event, or because the driver is going to be removed from
2046 static void __devexit igb_remove(struct pci_dev *pdev)
2048 struct net_device *netdev = pci_get_drvdata(pdev);
2049 struct igb_adapter *adapter = netdev_priv(netdev);
2050 struct e1000_hw *hw = &adapter->hw;
2052 /* flush_scheduled work may reschedule our watchdog task, so
2053 * explicitly disable watchdog tasks from being rescheduled */
2054 set_bit(__IGB_DOWN, &adapter->state);
2055 del_timer_sync(&adapter->watchdog_timer);
2056 del_timer_sync(&adapter->phy_info_timer);
2058 flush_scheduled_work();
2060 #ifdef CONFIG_IGB_DCA
2061 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2062 dev_info(&pdev->dev, "DCA disabled\n");
2063 dca_remove_requester(&pdev->dev);
2064 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2065 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2069 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2070 * would have already happened in close and is redundant. */
2071 igb_release_hw_control(adapter);
2073 unregister_netdev(netdev);
2075 igb_clear_interrupt_scheme(adapter);
2077 #ifdef CONFIG_PCI_IOV
2078 /* reclaim resources allocated to VFs */
2079 if (adapter->vf_data) {
2080 /* disable iov and allow time for transactions to clear */
2081 pci_disable_sriov(pdev);
2084 kfree(adapter->vf_data);
2085 adapter->vf_data = NULL;
2086 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2088 dev_info(&pdev->dev, "IOV Disabled\n");
2092 iounmap(hw->hw_addr);
2093 if (hw->flash_address)
2094 iounmap(hw->flash_address);
2095 pci_release_selected_regions(pdev,
2096 pci_select_bars(pdev, IORESOURCE_MEM));
2098 free_netdev(netdev);
2100 pci_disable_pcie_error_reporting(pdev);
2102 pci_disable_device(pdev);
2106 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2107 * @adapter: board private structure to initialize
2109 * This function initializes the vf specific data storage and then attempts to
2110 * allocate the VFs. The reason for ordering it this way is because it is much
2111 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2112 * the memory for the VFs.
2114 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2116 #ifdef CONFIG_PCI_IOV
2117 struct pci_dev *pdev = adapter->pdev;
2119 if (adapter->vfs_allocated_count) {
2120 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2121 sizeof(struct vf_data_storage),
2123 /* if allocation failed then we do not support SR-IOV */
2124 if (!adapter->vf_data) {
2125 adapter->vfs_allocated_count = 0;
2126 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2131 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2132 kfree(adapter->vf_data);
2133 adapter->vf_data = NULL;
2134 #endif /* CONFIG_PCI_IOV */
2135 adapter->vfs_allocated_count = 0;
2136 #ifdef CONFIG_PCI_IOV
2138 unsigned char mac_addr[ETH_ALEN];
2140 dev_info(&pdev->dev, "%d vfs allocated\n",
2141 adapter->vfs_allocated_count);
2142 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2143 random_ether_addr(mac_addr);
2144 igb_set_vf_mac(adapter, i, mac_addr);
2147 #endif /* CONFIG_PCI_IOV */
2152 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2153 * @adapter: board private structure to initialize
2155 * igb_init_hw_timer initializes the function pointer and values for the hw
2156 * timer found in hardware.
2158 static void igb_init_hw_timer(struct igb_adapter *adapter)
2160 struct e1000_hw *hw = &adapter->hw;
2162 switch (hw->mac.type) {
2165 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2166 adapter->cycles.read = igb_read_clock;
2167 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2168 adapter->cycles.mult = 1;
2170 * The 82580 timesync updates the system timer every 8ns by 8ns
2171 * and the value cannot be shifted. Instead we need to shift
2172 * the registers to generate a 64bit timer value. As a result
2173 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2174 * 24 in order to generate a larger value for synchronization.
2176 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2177 /* disable system timer temporarily by setting bit 31 */
2178 wr32(E1000_TSAUXC, 0x80000000);
2181 /* Set registers so that rollover occurs soon to test this. */
2182 wr32(E1000_SYSTIMR, 0x00000000);
2183 wr32(E1000_SYSTIML, 0x80000000);
2184 wr32(E1000_SYSTIMH, 0x000000FF);
2187 /* enable system timer by clearing bit 31 */
2188 wr32(E1000_TSAUXC, 0x0);
2191 timecounter_init(&adapter->clock,
2193 ktime_to_ns(ktime_get_real()));
2195 * Synchronize our NIC clock against system wall clock. NIC
2196 * time stamp reading requires ~3us per sample, each sample
2197 * was pretty stable even under load => only require 10
2198 * samples for each offset comparison.
2200 memset(&adapter->compare, 0, sizeof(adapter->compare));
2201 adapter->compare.source = &adapter->clock;
2202 adapter->compare.target = ktime_get_real;
2203 adapter->compare.num_samples = 10;
2204 timecompare_update(&adapter->compare, 0);
2208 * Initialize hardware timer: we keep it running just in case
2209 * that some program needs it later on.
2211 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2212 adapter->cycles.read = igb_read_clock;
2213 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2214 adapter->cycles.mult = 1;
2216 * Scale the NIC clock cycle by a large factor so that
2217 * relatively small clock corrections can be added or
2218 * substracted at each clock tick. The drawbacks of a large
2219 * factor are a) that the clock register overflows more quickly
2220 * (not such a big deal) and b) that the increment per tick has
2221 * to fit into 24 bits. As a result we need to use a shift of
2222 * 19 so we can fit a value of 16 into the TIMINCA register.
2224 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2226 (1 << E1000_TIMINCA_16NS_SHIFT) |
2227 (16 << IGB_82576_TSYNC_SHIFT));
2229 /* Set registers so that rollover occurs soon to test this. */
2230 wr32(E1000_SYSTIML, 0x00000000);
2231 wr32(E1000_SYSTIMH, 0xFF800000);
2234 timecounter_init(&adapter->clock,
2236 ktime_to_ns(ktime_get_real()));
2238 * Synchronize our NIC clock against system wall clock. NIC
2239 * time stamp reading requires ~3us per sample, each sample
2240 * was pretty stable even under load => only require 10
2241 * samples for each offset comparison.
2243 memset(&adapter->compare, 0, sizeof(adapter->compare));
2244 adapter->compare.source = &adapter->clock;
2245 adapter->compare.target = ktime_get_real;
2246 adapter->compare.num_samples = 10;
2247 timecompare_update(&adapter->compare, 0);
2250 /* 82575 does not support timesync */
2258 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2259 * @adapter: board private structure to initialize
2261 * igb_sw_init initializes the Adapter private data structure.
2262 * Fields are initialized based on PCI device information and
2263 * OS network device settings (MTU size).
2265 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2267 struct e1000_hw *hw = &adapter->hw;
2268 struct net_device *netdev = adapter->netdev;
2269 struct pci_dev *pdev = adapter->pdev;
2271 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2273 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2274 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2275 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2276 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2278 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2279 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2281 spin_lock_init(&adapter->stats64_lock);
2282 #ifdef CONFIG_PCI_IOV
2283 if (hw->mac.type == e1000_82576)
2284 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2286 #endif /* CONFIG_PCI_IOV */
2287 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2290 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2291 * then we should combine the queues into a queue pair in order to
2292 * conserve interrupts due to limited supply
2294 if ((adapter->rss_queues > 4) ||
2295 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2296 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2298 /* This call may decrease the number of queues */
2299 if (igb_init_interrupt_scheme(adapter)) {
2300 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2304 igb_init_hw_timer(adapter);
2305 igb_probe_vfs(adapter);
2307 /* Explicitly disable IRQ since the NIC can be in any state. */
2308 igb_irq_disable(adapter);
2310 set_bit(__IGB_DOWN, &adapter->state);
2315 * igb_open - Called when a network interface is made active
2316 * @netdev: network interface device structure
2318 * Returns 0 on success, negative value on failure
2320 * The open entry point is called when a network interface is made
2321 * active by the system (IFF_UP). At this point all resources needed
2322 * for transmit and receive operations are allocated, the interrupt
2323 * handler is registered with the OS, the watchdog timer is started,
2324 * and the stack is notified that the interface is ready.
2326 static int igb_open(struct net_device *netdev)
2328 struct igb_adapter *adapter = netdev_priv(netdev);
2329 struct e1000_hw *hw = &adapter->hw;
2333 /* disallow open during test */
2334 if (test_bit(__IGB_TESTING, &adapter->state))
2337 netif_carrier_off(netdev);
2339 /* allocate transmit descriptors */
2340 err = igb_setup_all_tx_resources(adapter);
2344 /* allocate receive descriptors */
2345 err = igb_setup_all_rx_resources(adapter);
2349 igb_power_up_link(adapter);
2351 /* before we allocate an interrupt, we must be ready to handle it.
2352 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2353 * as soon as we call pci_request_irq, so we have to setup our
2354 * clean_rx handler before we do so. */
2355 igb_configure(adapter);
2357 err = igb_request_irq(adapter);
2361 /* From here on the code is the same as igb_up() */
2362 clear_bit(__IGB_DOWN, &adapter->state);
2364 for (i = 0; i < adapter->num_q_vectors; i++) {
2365 struct igb_q_vector *q_vector = adapter->q_vector[i];
2366 napi_enable(&q_vector->napi);
2369 /* Clear any pending interrupts. */
2372 igb_irq_enable(adapter);
2374 /* notify VFs that reset has been completed */
2375 if (adapter->vfs_allocated_count) {
2376 u32 reg_data = rd32(E1000_CTRL_EXT);
2377 reg_data |= E1000_CTRL_EXT_PFRSTD;
2378 wr32(E1000_CTRL_EXT, reg_data);
2381 netif_tx_start_all_queues(netdev);
2383 /* start the watchdog. */
2384 hw->mac.get_link_status = 1;
2385 schedule_work(&adapter->watchdog_task);
2390 igb_release_hw_control(adapter);
2391 igb_power_down_link(adapter);
2392 igb_free_all_rx_resources(adapter);
2394 igb_free_all_tx_resources(adapter);
2402 * igb_close - Disables a network interface
2403 * @netdev: network interface device structure
2405 * Returns 0, this is not allowed to fail
2407 * The close entry point is called when an interface is de-activated
2408 * by the OS. The hardware is still under the driver's control, but
2409 * needs to be disabled. A global MAC reset is issued to stop the
2410 * hardware, and all transmit and receive resources are freed.
2412 static int igb_close(struct net_device *netdev)
2414 struct igb_adapter *adapter = netdev_priv(netdev);
2416 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2419 igb_free_irq(adapter);
2421 igb_free_all_tx_resources(adapter);
2422 igb_free_all_rx_resources(adapter);
2428 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2429 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2431 * Return 0 on success, negative on failure
2433 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2435 struct device *dev = tx_ring->dev;
2438 size = sizeof(struct igb_buffer) * tx_ring->count;
2439 tx_ring->buffer_info = vzalloc(size);
2440 if (!tx_ring->buffer_info)
2443 /* round up to nearest 4K */
2444 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2445 tx_ring->size = ALIGN(tx_ring->size, 4096);
2447 tx_ring->desc = dma_alloc_coherent(dev,
2455 tx_ring->next_to_use = 0;
2456 tx_ring->next_to_clean = 0;
2460 vfree(tx_ring->buffer_info);
2462 "Unable to allocate memory for the transmit descriptor ring\n");
2467 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2468 * (Descriptors) for all queues
2469 * @adapter: board private structure
2471 * Return 0 on success, negative on failure
2473 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2475 struct pci_dev *pdev = adapter->pdev;
2478 for (i = 0; i < adapter->num_tx_queues; i++) {
2479 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2482 "Allocation for Tx Queue %u failed\n", i);
2483 for (i--; i >= 0; i--)
2484 igb_free_tx_resources(adapter->tx_ring[i]);
2489 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2490 int r_idx = i % adapter->num_tx_queues;
2491 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2497 * igb_setup_tctl - configure the transmit control registers
2498 * @adapter: Board private structure
2500 void igb_setup_tctl(struct igb_adapter *adapter)
2502 struct e1000_hw *hw = &adapter->hw;
2505 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2506 wr32(E1000_TXDCTL(0), 0);
2508 /* Program the Transmit Control Register */
2509 tctl = rd32(E1000_TCTL);
2510 tctl &= ~E1000_TCTL_CT;
2511 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2512 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2514 igb_config_collision_dist(hw);
2516 /* Enable transmits */
2517 tctl |= E1000_TCTL_EN;
2519 wr32(E1000_TCTL, tctl);
2523 * igb_configure_tx_ring - Configure transmit ring after Reset
2524 * @adapter: board private structure
2525 * @ring: tx ring to configure
2527 * Configure a transmit ring after a reset.
2529 void igb_configure_tx_ring(struct igb_adapter *adapter,
2530 struct igb_ring *ring)
2532 struct e1000_hw *hw = &adapter->hw;
2534 u64 tdba = ring->dma;
2535 int reg_idx = ring->reg_idx;
2537 /* disable the queue */
2538 txdctl = rd32(E1000_TXDCTL(reg_idx));
2539 wr32(E1000_TXDCTL(reg_idx),
2540 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2544 wr32(E1000_TDLEN(reg_idx),
2545 ring->count * sizeof(union e1000_adv_tx_desc));
2546 wr32(E1000_TDBAL(reg_idx),
2547 tdba & 0x00000000ffffffffULL);
2548 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2550 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2551 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2552 writel(0, ring->head);
2553 writel(0, ring->tail);
2555 txdctl |= IGB_TX_PTHRESH;
2556 txdctl |= IGB_TX_HTHRESH << 8;
2557 txdctl |= IGB_TX_WTHRESH << 16;
2559 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2560 wr32(E1000_TXDCTL(reg_idx), txdctl);
2564 * igb_configure_tx - Configure transmit Unit after Reset
2565 * @adapter: board private structure
2567 * Configure the Tx unit of the MAC after a reset.
2569 static void igb_configure_tx(struct igb_adapter *adapter)
2573 for (i = 0; i < adapter->num_tx_queues; i++)
2574 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2578 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2579 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2581 * Returns 0 on success, negative on failure
2583 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2585 struct device *dev = rx_ring->dev;
2588 size = sizeof(struct igb_buffer) * rx_ring->count;
2589 rx_ring->buffer_info = vzalloc(size);
2590 if (!rx_ring->buffer_info)
2593 desc_len = sizeof(union e1000_adv_rx_desc);
2595 /* Round up to nearest 4K */
2596 rx_ring->size = rx_ring->count * desc_len;
2597 rx_ring->size = ALIGN(rx_ring->size, 4096);
2599 rx_ring->desc = dma_alloc_coherent(dev,
2607 rx_ring->next_to_clean = 0;
2608 rx_ring->next_to_use = 0;
2613 vfree(rx_ring->buffer_info);
2614 rx_ring->buffer_info = NULL;
2615 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2621 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2622 * (Descriptors) for all queues
2623 * @adapter: board private structure
2625 * Return 0 on success, negative on failure
2627 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2629 struct pci_dev *pdev = adapter->pdev;
2632 for (i = 0; i < adapter->num_rx_queues; i++) {
2633 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2636 "Allocation for Rx Queue %u failed\n", i);
2637 for (i--; i >= 0; i--)
2638 igb_free_rx_resources(adapter->rx_ring[i]);
2647 * igb_setup_mrqc - configure the multiple receive queue control registers
2648 * @adapter: Board private structure
2650 static void igb_setup_mrqc(struct igb_adapter *adapter)
2652 struct e1000_hw *hw = &adapter->hw;
2654 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2659 static const u8 rsshash[40] = {
2660 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2661 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2662 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2663 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2665 /* Fill out hash function seeds */
2666 for (j = 0; j < 10; j++) {
2667 u32 rsskey = rsshash[(j * 4)];
2668 rsskey |= rsshash[(j * 4) + 1] << 8;
2669 rsskey |= rsshash[(j * 4) + 2] << 16;
2670 rsskey |= rsshash[(j * 4) + 3] << 24;
2671 array_wr32(E1000_RSSRK(0), j, rsskey);
2674 num_rx_queues = adapter->rss_queues;
2676 if (adapter->vfs_allocated_count) {
2677 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2678 switch (hw->mac.type) {
2695 if (hw->mac.type == e1000_82575)
2699 for (j = 0; j < (32 * 4); j++) {
2700 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2702 reta.bytes[j & 3] |= num_rx_queues << shift2;
2704 wr32(E1000_RETA(j >> 2), reta.dword);
2708 * Disable raw packet checksumming so that RSS hash is placed in
2709 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2710 * offloads as they are enabled by default
2712 rxcsum = rd32(E1000_RXCSUM);
2713 rxcsum |= E1000_RXCSUM_PCSD;
2715 if (adapter->hw.mac.type >= e1000_82576)
2716 /* Enable Receive Checksum Offload for SCTP */
2717 rxcsum |= E1000_RXCSUM_CRCOFL;
2719 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2720 wr32(E1000_RXCSUM, rxcsum);
2722 /* If VMDq is enabled then we set the appropriate mode for that, else
2723 * we default to RSS so that an RSS hash is calculated per packet even
2724 * if we are only using one queue */
2725 if (adapter->vfs_allocated_count) {
2726 if (hw->mac.type > e1000_82575) {
2727 /* Set the default pool for the PF's first queue */
2728 u32 vtctl = rd32(E1000_VT_CTL);
2729 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2730 E1000_VT_CTL_DISABLE_DEF_POOL);
2731 vtctl |= adapter->vfs_allocated_count <<
2732 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2733 wr32(E1000_VT_CTL, vtctl);
2735 if (adapter->rss_queues > 1)
2736 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2738 mrqc = E1000_MRQC_ENABLE_VMDQ;
2740 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2742 igb_vmm_control(adapter);
2745 * Generate RSS hash based on TCP port numbers and/or
2746 * IPv4/v6 src and dst addresses since UDP cannot be
2747 * hashed reliably due to IP fragmentation
2749 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2750 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2751 E1000_MRQC_RSS_FIELD_IPV6 |
2752 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2753 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2755 wr32(E1000_MRQC, mrqc);
2759 * igb_setup_rctl - configure the receive control registers
2760 * @adapter: Board private structure
2762 void igb_setup_rctl(struct igb_adapter *adapter)
2764 struct e1000_hw *hw = &adapter->hw;
2767 rctl = rd32(E1000_RCTL);
2769 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2770 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2772 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2773 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2776 * enable stripping of CRC. It's unlikely this will break BMC
2777 * redirection as it did with e1000. Newer features require
2778 * that the HW strips the CRC.
2780 rctl |= E1000_RCTL_SECRC;
2782 /* disable store bad packets and clear size bits. */
2783 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2785 /* enable LPE to prevent packets larger than max_frame_size */
2786 rctl |= E1000_RCTL_LPE;
2788 /* disable queue 0 to prevent tail write w/o re-config */
2789 wr32(E1000_RXDCTL(0), 0);
2791 /* Attention!!! For SR-IOV PF driver operations you must enable
2792 * queue drop for all VF and PF queues to prevent head of line blocking
2793 * if an un-trusted VF does not provide descriptors to hardware.
2795 if (adapter->vfs_allocated_count) {
2796 /* set all queue drop enable bits */
2797 wr32(E1000_QDE, ALL_QUEUES);
2800 wr32(E1000_RCTL, rctl);
2803 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2806 struct e1000_hw *hw = &adapter->hw;
2809 /* if it isn't the PF check to see if VFs are enabled and
2810 * increase the size to support vlan tags */
2811 if (vfn < adapter->vfs_allocated_count &&
2812 adapter->vf_data[vfn].vlans_enabled)
2813 size += VLAN_TAG_SIZE;
2815 vmolr = rd32(E1000_VMOLR(vfn));
2816 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2817 vmolr |= size | E1000_VMOLR_LPE;
2818 wr32(E1000_VMOLR(vfn), vmolr);
2824 * igb_rlpml_set - set maximum receive packet size
2825 * @adapter: board private structure
2827 * Configure maximum receivable packet size.
2829 static void igb_rlpml_set(struct igb_adapter *adapter)
2831 u32 max_frame_size = adapter->max_frame_size;
2832 struct e1000_hw *hw = &adapter->hw;
2833 u16 pf_id = adapter->vfs_allocated_count;
2836 max_frame_size += VLAN_TAG_SIZE;
2838 /* if vfs are enabled we set RLPML to the largest possible request
2839 * size and set the VMOLR RLPML to the size we need */
2841 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2842 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2845 wr32(E1000_RLPML, max_frame_size);
2848 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2851 struct e1000_hw *hw = &adapter->hw;
2855 * This register exists only on 82576 and newer so if we are older then
2856 * we should exit and do nothing
2858 if (hw->mac.type < e1000_82576)
2861 vmolr = rd32(E1000_VMOLR(vfn));
2862 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2864 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2866 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2868 /* clear all bits that might not be set */
2869 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2871 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2872 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2874 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2877 if (vfn <= adapter->vfs_allocated_count)
2878 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2880 wr32(E1000_VMOLR(vfn), vmolr);
2884 * igb_configure_rx_ring - Configure a receive ring after Reset
2885 * @adapter: board private structure
2886 * @ring: receive ring to be configured
2888 * Configure the Rx unit of the MAC after a reset.
2890 void igb_configure_rx_ring(struct igb_adapter *adapter,
2891 struct igb_ring *ring)
2893 struct e1000_hw *hw = &adapter->hw;
2894 u64 rdba = ring->dma;
2895 int reg_idx = ring->reg_idx;
2898 /* disable the queue */
2899 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2900 wr32(E1000_RXDCTL(reg_idx),
2901 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2903 /* Set DMA base address registers */
2904 wr32(E1000_RDBAL(reg_idx),
2905 rdba & 0x00000000ffffffffULL);
2906 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2907 wr32(E1000_RDLEN(reg_idx),
2908 ring->count * sizeof(union e1000_adv_rx_desc));
2910 /* initialize head and tail */
2911 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2912 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2913 writel(0, ring->head);
2914 writel(0, ring->tail);
2916 /* set descriptor configuration */
2917 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2918 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2919 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2920 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2921 srrctl |= IGB_RXBUFFER_16384 >>
2922 E1000_SRRCTL_BSIZEPKT_SHIFT;
2924 srrctl |= (PAGE_SIZE / 2) >>
2925 E1000_SRRCTL_BSIZEPKT_SHIFT;
2927 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2929 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2930 E1000_SRRCTL_BSIZEPKT_SHIFT;
2931 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2933 if (hw->mac.type == e1000_82580)
2934 srrctl |= E1000_SRRCTL_TIMESTAMP;
2935 /* Only set Drop Enable if we are supporting multiple queues */
2936 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2937 srrctl |= E1000_SRRCTL_DROP_EN;
2939 wr32(E1000_SRRCTL(reg_idx), srrctl);
2941 /* set filtering for VMDQ pools */
2942 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2944 /* enable receive descriptor fetching */
2945 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2946 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2947 rxdctl &= 0xFFF00000;
2948 rxdctl |= IGB_RX_PTHRESH;
2949 rxdctl |= IGB_RX_HTHRESH << 8;
2950 rxdctl |= IGB_RX_WTHRESH << 16;
2951 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2955 * igb_configure_rx - Configure receive Unit after Reset
2956 * @adapter: board private structure
2958 * Configure the Rx unit of the MAC after a reset.
2960 static void igb_configure_rx(struct igb_adapter *adapter)
2964 /* set UTA to appropriate mode */
2965 igb_set_uta(adapter);
2967 /* set the correct pool for the PF default MAC address in entry 0 */
2968 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2969 adapter->vfs_allocated_count);
2971 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2972 * the Base and Length of the Rx Descriptor Ring */
2973 for (i = 0; i < adapter->num_rx_queues; i++)
2974 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2978 * igb_free_tx_resources - Free Tx Resources per Queue
2979 * @tx_ring: Tx descriptor ring for a specific queue
2981 * Free all transmit software resources
2983 void igb_free_tx_resources(struct igb_ring *tx_ring)
2985 igb_clean_tx_ring(tx_ring);
2987 vfree(tx_ring->buffer_info);
2988 tx_ring->buffer_info = NULL;
2990 /* if not set, then don't free */
2994 dma_free_coherent(tx_ring->dev, tx_ring->size,
2995 tx_ring->desc, tx_ring->dma);
2997 tx_ring->desc = NULL;
3001 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3002 * @adapter: board private structure
3004 * Free all transmit software resources
3006 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3010 for (i = 0; i < adapter->num_tx_queues; i++)
3011 igb_free_tx_resources(adapter->tx_ring[i]);
3014 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3015 struct igb_buffer *buffer_info)
3017 if (buffer_info->dma) {
3018 if (buffer_info->mapped_as_page)
3019 dma_unmap_page(tx_ring->dev,
3021 buffer_info->length,
3024 dma_unmap_single(tx_ring->dev,
3026 buffer_info->length,
3028 buffer_info->dma = 0;
3030 if (buffer_info->skb) {
3031 dev_kfree_skb_any(buffer_info->skb);
3032 buffer_info->skb = NULL;
3034 buffer_info->time_stamp = 0;
3035 buffer_info->length = 0;
3036 buffer_info->next_to_watch = 0;
3037 buffer_info->mapped_as_page = false;
3041 * igb_clean_tx_ring - Free Tx Buffers
3042 * @tx_ring: ring to be cleaned
3044 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3046 struct igb_buffer *buffer_info;
3050 if (!tx_ring->buffer_info)
3052 /* Free all the Tx ring sk_buffs */
3054 for (i = 0; i < tx_ring->count; i++) {
3055 buffer_info = &tx_ring->buffer_info[i];
3056 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3059 size = sizeof(struct igb_buffer) * tx_ring->count;
3060 memset(tx_ring->buffer_info, 0, size);
3062 /* Zero out the descriptor ring */
3063 memset(tx_ring->desc, 0, tx_ring->size);
3065 tx_ring->next_to_use = 0;
3066 tx_ring->next_to_clean = 0;
3070 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3071 * @adapter: board private structure
3073 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3077 for (i = 0; i < adapter->num_tx_queues; i++)
3078 igb_clean_tx_ring(adapter->tx_ring[i]);
3082 * igb_free_rx_resources - Free Rx Resources
3083 * @rx_ring: ring to clean the resources from
3085 * Free all receive software resources
3087 void igb_free_rx_resources(struct igb_ring *rx_ring)
3089 igb_clean_rx_ring(rx_ring);
3091 vfree(rx_ring->buffer_info);
3092 rx_ring->buffer_info = NULL;
3094 /* if not set, then don't free */
3098 dma_free_coherent(rx_ring->dev, rx_ring->size,
3099 rx_ring->desc, rx_ring->dma);
3101 rx_ring->desc = NULL;
3105 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3106 * @adapter: board private structure
3108 * Free all receive software resources
3110 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3114 for (i = 0; i < adapter->num_rx_queues; i++)
3115 igb_free_rx_resources(adapter->rx_ring[i]);
3119 * igb_clean_rx_ring - Free Rx Buffers per Queue
3120 * @rx_ring: ring to free buffers from
3122 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3124 struct igb_buffer *buffer_info;
3128 if (!rx_ring->buffer_info)
3131 /* Free all the Rx ring sk_buffs */
3132 for (i = 0; i < rx_ring->count; i++) {
3133 buffer_info = &rx_ring->buffer_info[i];
3134 if (buffer_info->dma) {
3135 dma_unmap_single(rx_ring->dev,
3137 rx_ring->rx_buffer_len,
3139 buffer_info->dma = 0;
3142 if (buffer_info->skb) {
3143 dev_kfree_skb(buffer_info->skb);
3144 buffer_info->skb = NULL;
3146 if (buffer_info->page_dma) {
3147 dma_unmap_page(rx_ring->dev,
3148 buffer_info->page_dma,
3151 buffer_info->page_dma = 0;
3153 if (buffer_info->page) {
3154 put_page(buffer_info->page);
3155 buffer_info->page = NULL;
3156 buffer_info->page_offset = 0;
3160 size = sizeof(struct igb_buffer) * rx_ring->count;
3161 memset(rx_ring->buffer_info, 0, size);
3163 /* Zero out the descriptor ring */
3164 memset(rx_ring->desc, 0, rx_ring->size);
3166 rx_ring->next_to_clean = 0;
3167 rx_ring->next_to_use = 0;
3171 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3172 * @adapter: board private structure
3174 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3178 for (i = 0; i < adapter->num_rx_queues; i++)
3179 igb_clean_rx_ring(adapter->rx_ring[i]);
3183 * igb_set_mac - Change the Ethernet Address of the NIC
3184 * @netdev: network interface device structure
3185 * @p: pointer to an address structure
3187 * Returns 0 on success, negative on failure
3189 static int igb_set_mac(struct net_device *netdev, void *p)
3191 struct igb_adapter *adapter = netdev_priv(netdev);
3192 struct e1000_hw *hw = &adapter->hw;
3193 struct sockaddr *addr = p;
3195 if (!is_valid_ether_addr(addr->sa_data))
3196 return -EADDRNOTAVAIL;
3198 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3199 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3201 /* set the correct pool for the new PF MAC address in entry 0 */
3202 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3203 adapter->vfs_allocated_count);
3209 * igb_write_mc_addr_list - write multicast addresses to MTA
3210 * @netdev: network interface device structure
3212 * Writes multicast address list to the MTA hash table.
3213 * Returns: -ENOMEM on failure
3214 * 0 on no addresses written
3215 * X on writing X addresses to MTA
3217 static int igb_write_mc_addr_list(struct net_device *netdev)
3219 struct igb_adapter *adapter = netdev_priv(netdev);
3220 struct e1000_hw *hw = &adapter->hw;
3221 struct netdev_hw_addr *ha;
3225 if (netdev_mc_empty(netdev)) {
3226 /* nothing to program, so clear mc list */
3227 igb_update_mc_addr_list(hw, NULL, 0);
3228 igb_restore_vf_multicasts(adapter);
3232 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3236 /* The shared function expects a packed array of only addresses. */
3238 netdev_for_each_mc_addr(ha, netdev)
3239 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3241 igb_update_mc_addr_list(hw, mta_list, i);
3244 return netdev_mc_count(netdev);
3248 * igb_write_uc_addr_list - write unicast addresses to RAR table
3249 * @netdev: network interface device structure
3251 * Writes unicast address list to the RAR table.
3252 * Returns: -ENOMEM on failure/insufficient address space
3253 * 0 on no addresses written
3254 * X on writing X addresses to the RAR table
3256 static int igb_write_uc_addr_list(struct net_device *netdev)
3258 struct igb_adapter *adapter = netdev_priv(netdev);
3259 struct e1000_hw *hw = &adapter->hw;
3260 unsigned int vfn = adapter->vfs_allocated_count;
3261 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3264 /* return ENOMEM indicating insufficient memory for addresses */
3265 if (netdev_uc_count(netdev) > rar_entries)
3268 if (!netdev_uc_empty(netdev) && rar_entries) {
3269 struct netdev_hw_addr *ha;
3271 netdev_for_each_uc_addr(ha, netdev) {
3274 igb_rar_set_qsel(adapter, ha->addr,
3280 /* write the addresses in reverse order to avoid write combining */
3281 for (; rar_entries > 0 ; rar_entries--) {
3282 wr32(E1000_RAH(rar_entries), 0);
3283 wr32(E1000_RAL(rar_entries), 0);
3291 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3292 * @netdev: network interface device structure
3294 * The set_rx_mode entry point is called whenever the unicast or multicast
3295 * address lists or the network interface flags are updated. This routine is
3296 * responsible for configuring the hardware for proper unicast, multicast,
3297 * promiscuous mode, and all-multi behavior.
3299 static void igb_set_rx_mode(struct net_device *netdev)
3301 struct igb_adapter *adapter = netdev_priv(netdev);
3302 struct e1000_hw *hw = &adapter->hw;
3303 unsigned int vfn = adapter->vfs_allocated_count;
3304 u32 rctl, vmolr = 0;
3307 /* Check for Promiscuous and All Multicast modes */
3308 rctl = rd32(E1000_RCTL);
3310 /* clear the effected bits */
3311 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3313 if (netdev->flags & IFF_PROMISC) {
3314 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3315 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3317 if (netdev->flags & IFF_ALLMULTI) {
3318 rctl |= E1000_RCTL_MPE;
3319 vmolr |= E1000_VMOLR_MPME;
3322 * Write addresses to the MTA, if the attempt fails
3323 * then we should just turn on promiscous mode so
3324 * that we can at least receive multicast traffic
3326 count = igb_write_mc_addr_list(netdev);
3328 rctl |= E1000_RCTL_MPE;
3329 vmolr |= E1000_VMOLR_MPME;
3331 vmolr |= E1000_VMOLR_ROMPE;
3335 * Write addresses to available RAR registers, if there is not
3336 * sufficient space to store all the addresses then enable
3337 * unicast promiscous mode
3339 count = igb_write_uc_addr_list(netdev);
3341 rctl |= E1000_RCTL_UPE;
3342 vmolr |= E1000_VMOLR_ROPE;
3344 rctl |= E1000_RCTL_VFE;
3346 wr32(E1000_RCTL, rctl);
3349 * In order to support SR-IOV and eventually VMDq it is necessary to set
3350 * the VMOLR to enable the appropriate modes. Without this workaround
3351 * we will have issues with VLAN tag stripping not being done for frames
3352 * that are only arriving because we are the default pool
3354 if (hw->mac.type < e1000_82576)
3357 vmolr |= rd32(E1000_VMOLR(vfn)) &
3358 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3359 wr32(E1000_VMOLR(vfn), vmolr);
3360 igb_restore_vf_multicasts(adapter);
3363 /* Need to wait a few seconds after link up to get diagnostic information from
3365 static void igb_update_phy_info(unsigned long data)
3367 struct igb_adapter *adapter = (struct igb_adapter *) data;
3368 igb_get_phy_info(&adapter->hw);
3372 * igb_has_link - check shared code for link and determine up/down
3373 * @adapter: pointer to driver private info
3375 bool igb_has_link(struct igb_adapter *adapter)
3377 struct e1000_hw *hw = &adapter->hw;
3378 bool link_active = false;
3381 /* get_link_status is set on LSC (link status) interrupt or
3382 * rx sequence error interrupt. get_link_status will stay
3383 * false until the e1000_check_for_link establishes link
3384 * for copper adapters ONLY
3386 switch (hw->phy.media_type) {
3387 case e1000_media_type_copper:
3388 if (hw->mac.get_link_status) {
3389 ret_val = hw->mac.ops.check_for_link(hw);
3390 link_active = !hw->mac.get_link_status;
3395 case e1000_media_type_internal_serdes:
3396 ret_val = hw->mac.ops.check_for_link(hw);
3397 link_active = hw->mac.serdes_has_link;
3400 case e1000_media_type_unknown:
3408 * igb_watchdog - Timer Call-back
3409 * @data: pointer to adapter cast into an unsigned long
3411 static void igb_watchdog(unsigned long data)
3413 struct igb_adapter *adapter = (struct igb_adapter *)data;
3414 /* Do the rest outside of interrupt context */
3415 schedule_work(&adapter->watchdog_task);
3418 static void igb_watchdog_task(struct work_struct *work)
3420 struct igb_adapter *adapter = container_of(work,
3423 struct e1000_hw *hw = &adapter->hw;
3424 struct net_device *netdev = adapter->netdev;
3428 link = igb_has_link(adapter);
3430 if (!netif_carrier_ok(netdev)) {
3432 hw->mac.ops.get_speed_and_duplex(hw,
3433 &adapter->link_speed,
3434 &adapter->link_duplex);
3436 ctrl = rd32(E1000_CTRL);
3437 /* Links status message must follow this format */
3438 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3439 "Flow Control: %s\n",
3441 adapter->link_speed,
3442 adapter->link_duplex == FULL_DUPLEX ?
3443 "Full Duplex" : "Half Duplex",
3444 ((ctrl & E1000_CTRL_TFCE) &&
3445 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3446 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3447 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3449 /* adjust timeout factor according to speed/duplex */
3450 adapter->tx_timeout_factor = 1;
3451 switch (adapter->link_speed) {
3453 adapter->tx_timeout_factor = 14;
3456 /* maybe add some timeout factor ? */
3460 netif_carrier_on(netdev);
3462 igb_ping_all_vfs(adapter);
3464 /* link state has changed, schedule phy info update */
3465 if (!test_bit(__IGB_DOWN, &adapter->state))
3466 mod_timer(&adapter->phy_info_timer,
3467 round_jiffies(jiffies + 2 * HZ));
3470 if (netif_carrier_ok(netdev)) {
3471 adapter->link_speed = 0;
3472 adapter->link_duplex = 0;
3473 /* Links status message must follow this format */
3474 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3476 netif_carrier_off(netdev);
3478 igb_ping_all_vfs(adapter);
3480 /* link state has changed, schedule phy info update */
3481 if (!test_bit(__IGB_DOWN, &adapter->state))
3482 mod_timer(&adapter->phy_info_timer,
3483 round_jiffies(jiffies + 2 * HZ));
3487 spin_lock(&adapter->stats64_lock);
3488 igb_update_stats(adapter, &adapter->stats64);
3489 spin_unlock(&adapter->stats64_lock);
3491 for (i = 0; i < adapter->num_tx_queues; i++) {
3492 struct igb_ring *tx_ring = adapter->tx_ring[i];
3493 if (!netif_carrier_ok(netdev)) {
3494 /* We've lost link, so the controller stops DMA,
3495 * but we've got queued Tx work that's never going
3496 * to get done, so reset controller to flush Tx.
3497 * (Do the reset outside of interrupt context). */
3498 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3499 adapter->tx_timeout_count++;
3500 schedule_work(&adapter->reset_task);
3501 /* return immediately since reset is imminent */
3506 /* Force detection of hung controller every watchdog period */
3507 tx_ring->detect_tx_hung = true;
3510 /* Cause software interrupt to ensure rx ring is cleaned */
3511 if (adapter->msix_entries) {
3513 for (i = 0; i < adapter->num_q_vectors; i++) {
3514 struct igb_q_vector *q_vector = adapter->q_vector[i];
3515 eics |= q_vector->eims_value;
3517 wr32(E1000_EICS, eics);
3519 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3522 /* Reset the timer */
3523 if (!test_bit(__IGB_DOWN, &adapter->state))
3524 mod_timer(&adapter->watchdog_timer,
3525 round_jiffies(jiffies + 2 * HZ));
3528 enum latency_range {
3532 latency_invalid = 255
3536 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3538 * Stores a new ITR value based on strictly on packet size. This
3539 * algorithm is less sophisticated than that used in igb_update_itr,
3540 * due to the difficulty of synchronizing statistics across multiple
3541 * receive rings. The divisors and thresholds used by this function
3542 * were determined based on theoretical maximum wire speed and testing
3543 * data, in order to minimize response time while increasing bulk
3545 * This functionality is controlled by the InterruptThrottleRate module
3546 * parameter (see igb_param.c)
3547 * NOTE: This function is called only when operating in a multiqueue
3548 * receive environment.
3549 * @q_vector: pointer to q_vector
3551 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3553 int new_val = q_vector->itr_val;
3554 int avg_wire_size = 0;
3555 struct igb_adapter *adapter = q_vector->adapter;
3556 struct igb_ring *ring;
3557 unsigned int packets;
3559 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3560 * ints/sec - ITR timer value of 120 ticks.
3562 if (adapter->link_speed != SPEED_1000) {
3567 ring = q_vector->rx_ring;
3569 packets = ACCESS_ONCE(ring->total_packets);
3572 avg_wire_size = ring->total_bytes / packets;
3575 ring = q_vector->tx_ring;
3577 packets = ACCESS_ONCE(ring->total_packets);
3580 avg_wire_size = max_t(u32, avg_wire_size,
3581 ring->total_bytes / packets);
3584 /* if avg_wire_size isn't set no work was done */
3588 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3589 avg_wire_size += 24;
3591 /* Don't starve jumbo frames */
3592 avg_wire_size = min(avg_wire_size, 3000);
3594 /* Give a little boost to mid-size frames */
3595 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3596 new_val = avg_wire_size / 3;
3598 new_val = avg_wire_size / 2;
3600 /* when in itr mode 3 do not exceed 20K ints/sec */
3601 if (adapter->rx_itr_setting == 3 && new_val < 196)
3605 if (new_val != q_vector->itr_val) {
3606 q_vector->itr_val = new_val;
3607 q_vector->set_itr = 1;
3610 if (q_vector->rx_ring) {
3611 q_vector->rx_ring->total_bytes = 0;
3612 q_vector->rx_ring->total_packets = 0;
3614 if (q_vector->tx_ring) {
3615 q_vector->tx_ring->total_bytes = 0;
3616 q_vector->tx_ring->total_packets = 0;
3621 * igb_update_itr - update the dynamic ITR value based on statistics
3622 * Stores a new ITR value based on packets and byte
3623 * counts during the last interrupt. The advantage of per interrupt
3624 * computation is faster updates and more accurate ITR for the current
3625 * traffic pattern. Constants in this function were computed
3626 * based on theoretical maximum wire speed and thresholds were set based
3627 * on testing data as well as attempting to minimize response time
3628 * while increasing bulk throughput.
3629 * this functionality is controlled by the InterruptThrottleRate module
3630 * parameter (see igb_param.c)
3631 * NOTE: These calculations are only valid when operating in a single-
3632 * queue environment.
3633 * @adapter: pointer to adapter
3634 * @itr_setting: current q_vector->itr_val
3635 * @packets: the number of packets during this measurement interval
3636 * @bytes: the number of bytes during this measurement interval
3638 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3639 int packets, int bytes)
3641 unsigned int retval = itr_setting;
3644 goto update_itr_done;
3646 switch (itr_setting) {
3647 case lowest_latency:
3648 /* handle TSO and jumbo frames */
3649 if (bytes/packets > 8000)
3650 retval = bulk_latency;
3651 else if ((packets < 5) && (bytes > 512))
3652 retval = low_latency;
3654 case low_latency: /* 50 usec aka 20000 ints/s */
3655 if (bytes > 10000) {
3656 /* this if handles the TSO accounting */
3657 if (bytes/packets > 8000) {
3658 retval = bulk_latency;
3659 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3660 retval = bulk_latency;
3661 } else if ((packets > 35)) {
3662 retval = lowest_latency;
3664 } else if (bytes/packets > 2000) {
3665 retval = bulk_latency;
3666 } else if (packets <= 2 && bytes < 512) {
3667 retval = lowest_latency;
3670 case bulk_latency: /* 250 usec aka 4000 ints/s */
3671 if (bytes > 25000) {
3673 retval = low_latency;
3674 } else if (bytes < 1500) {
3675 retval = low_latency;
3684 static void igb_set_itr(struct igb_adapter *adapter)
3686 struct igb_q_vector *q_vector = adapter->q_vector[0];
3688 u32 new_itr = q_vector->itr_val;
3690 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3691 if (adapter->link_speed != SPEED_1000) {
3697 adapter->rx_itr = igb_update_itr(adapter,
3699 q_vector->rx_ring->total_packets,
3700 q_vector->rx_ring->total_bytes);
3702 adapter->tx_itr = igb_update_itr(adapter,
3704 q_vector->tx_ring->total_packets,
3705 q_vector->tx_ring->total_bytes);
3706 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3708 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3709 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3710 current_itr = low_latency;
3712 switch (current_itr) {
3713 /* counts and packets in update_itr are dependent on these numbers */
3714 case lowest_latency:
3715 new_itr = 56; /* aka 70,000 ints/sec */
3718 new_itr = 196; /* aka 20,000 ints/sec */
3721 new_itr = 980; /* aka 4,000 ints/sec */
3728 q_vector->rx_ring->total_bytes = 0;
3729 q_vector->rx_ring->total_packets = 0;
3730 q_vector->tx_ring->total_bytes = 0;
3731 q_vector->tx_ring->total_packets = 0;
3733 if (new_itr != q_vector->itr_val) {
3734 /* this attempts to bias the interrupt rate towards Bulk
3735 * by adding intermediate steps when interrupt rate is
3737 new_itr = new_itr > q_vector->itr_val ?
3738 max((new_itr * q_vector->itr_val) /
3739 (new_itr + (q_vector->itr_val >> 2)),
3742 /* Don't write the value here; it resets the adapter's
3743 * internal timer, and causes us to delay far longer than
3744 * we should between interrupts. Instead, we write the ITR
3745 * value at the beginning of the next interrupt so the timing
3746 * ends up being correct.
3748 q_vector->itr_val = new_itr;
3749 q_vector->set_itr = 1;
3753 #define IGB_TX_FLAGS_CSUM 0x00000001
3754 #define IGB_TX_FLAGS_VLAN 0x00000002
3755 #define IGB_TX_FLAGS_TSO 0x00000004
3756 #define IGB_TX_FLAGS_IPV4 0x00000008
3757 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3758 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3759 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3761 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3762 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3764 struct e1000_adv_tx_context_desc *context_desc;
3767 struct igb_buffer *buffer_info;
3768 u32 info = 0, tu_cmd = 0;
3772 if (skb_header_cloned(skb)) {
3773 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3778 l4len = tcp_hdrlen(skb);
3781 if (skb->protocol == htons(ETH_P_IP)) {
3782 struct iphdr *iph = ip_hdr(skb);
3785 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3789 } else if (skb_is_gso_v6(skb)) {
3790 ipv6_hdr(skb)->payload_len = 0;
3791 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3792 &ipv6_hdr(skb)->daddr,
3796 i = tx_ring->next_to_use;
3798 buffer_info = &tx_ring->buffer_info[i];
3799 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3800 /* VLAN MACLEN IPLEN */
3801 if (tx_flags & IGB_TX_FLAGS_VLAN)
3802 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3803 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3804 *hdr_len += skb_network_offset(skb);
3805 info |= skb_network_header_len(skb);
3806 *hdr_len += skb_network_header_len(skb);
3807 context_desc->vlan_macip_lens = cpu_to_le32(info);
3809 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3810 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3812 if (skb->protocol == htons(ETH_P_IP))
3813 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3814 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3816 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3819 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3820 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3822 /* For 82575, context index must be unique per ring. */
3823 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3824 mss_l4len_idx |= tx_ring->reg_idx << 4;
3826 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3827 context_desc->seqnum_seed = 0;
3829 buffer_info->time_stamp = jiffies;
3830 buffer_info->next_to_watch = i;
3831 buffer_info->dma = 0;
3833 if (i == tx_ring->count)
3836 tx_ring->next_to_use = i;
3841 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3842 struct sk_buff *skb, u32 tx_flags)
3844 struct e1000_adv_tx_context_desc *context_desc;
3845 struct device *dev = tx_ring->dev;
3846 struct igb_buffer *buffer_info;
3847 u32 info = 0, tu_cmd = 0;
3850 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3851 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3852 i = tx_ring->next_to_use;
3853 buffer_info = &tx_ring->buffer_info[i];
3854 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3856 if (tx_flags & IGB_TX_FLAGS_VLAN)
3857 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3859 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3860 if (skb->ip_summed == CHECKSUM_PARTIAL)
3861 info |= skb_network_header_len(skb);
3863 context_desc->vlan_macip_lens = cpu_to_le32(info);
3865 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3867 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3870 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3871 const struct vlan_ethhdr *vhdr =
3872 (const struct vlan_ethhdr*)skb->data;
3874 protocol = vhdr->h_vlan_encapsulated_proto;
3876 protocol = skb->protocol;
3880 case cpu_to_be16(ETH_P_IP):
3881 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3882 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3883 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3884 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3885 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3887 case cpu_to_be16(ETH_P_IPV6):
3888 /* XXX what about other V6 headers?? */
3889 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3890 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3891 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3892 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3895 if (unlikely(net_ratelimit()))
3897 "partial checksum but proto=%x!\n",
3903 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3904 context_desc->seqnum_seed = 0;
3905 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3906 context_desc->mss_l4len_idx =
3907 cpu_to_le32(tx_ring->reg_idx << 4);
3909 buffer_info->time_stamp = jiffies;
3910 buffer_info->next_to_watch = i;
3911 buffer_info->dma = 0;
3914 if (i == tx_ring->count)
3916 tx_ring->next_to_use = i;
3923 #define IGB_MAX_TXD_PWR 16
3924 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3926 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3929 struct igb_buffer *buffer_info;
3930 struct device *dev = tx_ring->dev;
3931 unsigned int hlen = skb_headlen(skb);
3932 unsigned int count = 0, i;
3934 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3936 i = tx_ring->next_to_use;
3938 buffer_info = &tx_ring->buffer_info[i];
3939 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3940 buffer_info->length = hlen;
3941 /* set time_stamp *before* dma to help avoid a possible race */
3942 buffer_info->time_stamp = jiffies;
3943 buffer_info->next_to_watch = i;
3944 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3946 if (dma_mapping_error(dev, buffer_info->dma))
3949 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3950 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3951 unsigned int len = frag->size;
3955 if (i == tx_ring->count)
3958 buffer_info = &tx_ring->buffer_info[i];
3959 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3960 buffer_info->length = len;
3961 buffer_info->time_stamp = jiffies;
3962 buffer_info->next_to_watch = i;
3963 buffer_info->mapped_as_page = true;
3964 buffer_info->dma = dma_map_page(dev,
3969 if (dma_mapping_error(dev, buffer_info->dma))
3974 tx_ring->buffer_info[i].skb = skb;
3975 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3976 /* multiply data chunks by size of headers */
3977 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3978 tx_ring->buffer_info[i].gso_segs = gso_segs;
3979 tx_ring->buffer_info[first].next_to_watch = i;
3984 dev_err(dev, "TX DMA map failed\n");
3986 /* clear timestamp and dma mappings for failed buffer_info mapping */
3987 buffer_info->dma = 0;
3988 buffer_info->time_stamp = 0;
3989 buffer_info->length = 0;
3990 buffer_info->next_to_watch = 0;
3991 buffer_info->mapped_as_page = false;
3993 /* clear timestamp and dma mappings for remaining portion of packet */
3998 buffer_info = &tx_ring->buffer_info[i];
3999 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4005 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4006 u32 tx_flags, int count, u32 paylen,
4009 union e1000_adv_tx_desc *tx_desc;
4010 struct igb_buffer *buffer_info;
4011 u32 olinfo_status = 0, cmd_type_len;
4012 unsigned int i = tx_ring->next_to_use;
4014 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4015 E1000_ADVTXD_DCMD_DEXT);
4017 if (tx_flags & IGB_TX_FLAGS_VLAN)
4018 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4020 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4021 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4023 if (tx_flags & IGB_TX_FLAGS_TSO) {
4024 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4026 /* insert tcp checksum */
4027 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4029 /* insert ip checksum */
4030 if (tx_flags & IGB_TX_FLAGS_IPV4)
4031 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4033 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4034 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4037 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4038 (tx_flags & (IGB_TX_FLAGS_CSUM |
4040 IGB_TX_FLAGS_VLAN)))
4041 olinfo_status |= tx_ring->reg_idx << 4;
4043 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4046 buffer_info = &tx_ring->buffer_info[i];
4047 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4048 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4049 tx_desc->read.cmd_type_len =
4050 cpu_to_le32(cmd_type_len | buffer_info->length);
4051 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4054 if (i == tx_ring->count)
4056 } while (count > 0);
4058 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4059 /* Force memory writes to complete before letting h/w
4060 * know there are new descriptors to fetch. (Only
4061 * applicable for weak-ordered memory model archs,
4062 * such as IA-64). */
4065 tx_ring->next_to_use = i;
4066 writel(i, tx_ring->tail);
4067 /* we need this if more than one processor can write to our tail
4068 * at a time, it syncronizes IO on IA64/Altix systems */
4072 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4074 struct net_device *netdev = tx_ring->netdev;
4076 netif_stop_subqueue(netdev, tx_ring->queue_index);
4078 /* Herbert's original patch had:
4079 * smp_mb__after_netif_stop_queue();
4080 * but since that doesn't exist yet, just open code it. */
4083 /* We need to check again in a case another CPU has just
4084 * made room available. */
4085 if (igb_desc_unused(tx_ring) < size)
4089 netif_wake_subqueue(netdev, tx_ring->queue_index);
4091 u64_stats_update_begin(&tx_ring->tx_syncp2);
4092 tx_ring->tx_stats.restart_queue2++;
4093 u64_stats_update_end(&tx_ring->tx_syncp2);
4098 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4100 if (igb_desc_unused(tx_ring) >= size)
4102 return __igb_maybe_stop_tx(tx_ring, size);
4105 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4106 struct igb_ring *tx_ring)
4113 /* need: 1 descriptor per page,
4114 * + 2 desc gap to keep tail from touching head,
4115 * + 1 desc for skb->data,
4116 * + 1 desc for context descriptor,
4117 * otherwise try next time */
4118 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4119 /* this is a hard error */
4120 return NETDEV_TX_BUSY;
4123 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4124 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4125 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4128 if (vlan_tx_tag_present(skb)) {
4129 tx_flags |= IGB_TX_FLAGS_VLAN;
4130 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4133 if (skb->protocol == htons(ETH_P_IP))
4134 tx_flags |= IGB_TX_FLAGS_IPV4;
4136 first = tx_ring->next_to_use;
4137 if (skb_is_gso(skb)) {
4138 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4141 dev_kfree_skb_any(skb);
4142 return NETDEV_TX_OK;
4147 tx_flags |= IGB_TX_FLAGS_TSO;
4148 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4149 (skb->ip_summed == CHECKSUM_PARTIAL))
4150 tx_flags |= IGB_TX_FLAGS_CSUM;
4153 * count reflects descriptors mapped, if 0 or less then mapping error
4154 * has occured and we need to rewind the descriptor queue
4156 count = igb_tx_map_adv(tx_ring, skb, first);
4158 dev_kfree_skb_any(skb);
4159 tx_ring->buffer_info[first].time_stamp = 0;
4160 tx_ring->next_to_use = first;
4161 return NETDEV_TX_OK;
4164 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4166 /* Make sure there is space in the ring for the next send. */
4167 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4169 return NETDEV_TX_OK;
4172 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4173 struct net_device *netdev)
4175 struct igb_adapter *adapter = netdev_priv(netdev);
4176 struct igb_ring *tx_ring;
4179 if (test_bit(__IGB_DOWN, &adapter->state)) {
4180 dev_kfree_skb_any(skb);
4181 return NETDEV_TX_OK;
4184 if (skb->len <= 0) {
4185 dev_kfree_skb_any(skb);
4186 return NETDEV_TX_OK;
4189 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4190 tx_ring = adapter->multi_tx_table[r_idx];
4192 /* This goes back to the question of how to logically map a tx queue
4193 * to a flow. Right now, performance is impacted slightly negatively
4194 * if using multiple tx queues. If the stack breaks away from a
4195 * single qdisc implementation, we can look at this again. */
4196 return igb_xmit_frame_ring_adv(skb, tx_ring);
4200 * igb_tx_timeout - Respond to a Tx Hang
4201 * @netdev: network interface device structure
4203 static void igb_tx_timeout(struct net_device *netdev)
4205 struct igb_adapter *adapter = netdev_priv(netdev);
4206 struct e1000_hw *hw = &adapter->hw;
4208 /* Do the reset outside of interrupt context */
4209 adapter->tx_timeout_count++;
4211 if (hw->mac.type == e1000_82580)
4212 hw->dev_spec._82575.global_device_reset = true;
4214 schedule_work(&adapter->reset_task);
4216 (adapter->eims_enable_mask & ~adapter->eims_other));
4219 static void igb_reset_task(struct work_struct *work)
4221 struct igb_adapter *adapter;
4222 adapter = container_of(work, struct igb_adapter, reset_task);
4225 netdev_err(adapter->netdev, "Reset adapter\n");
4226 igb_reinit_locked(adapter);
4230 * igb_get_stats64 - Get System Network Statistics
4231 * @netdev: network interface device structure
4232 * @stats: rtnl_link_stats64 pointer
4235 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4236 struct rtnl_link_stats64 *stats)
4238 struct igb_adapter *adapter = netdev_priv(netdev);
4240 spin_lock(&adapter->stats64_lock);
4241 igb_update_stats(adapter, &adapter->stats64);
4242 memcpy(stats, &adapter->stats64, sizeof(*stats));
4243 spin_unlock(&adapter->stats64_lock);
4249 * igb_change_mtu - Change the Maximum Transfer Unit
4250 * @netdev: network interface device structure
4251 * @new_mtu: new value for maximum frame size
4253 * Returns 0 on success, negative on failure
4255 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4257 struct igb_adapter *adapter = netdev_priv(netdev);
4258 struct pci_dev *pdev = adapter->pdev;
4259 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4260 u32 rx_buffer_len, i;
4262 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4263 dev_err(&pdev->dev, "Invalid MTU setting\n");
4267 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4268 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4272 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4275 /* igb_down has a dependency on max_frame_size */
4276 adapter->max_frame_size = max_frame;
4278 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4279 * means we reserve 2 more, this pushes us to allocate from the next
4281 * i.e. RXBUFFER_2048 --> size-4096 slab
4284 if (adapter->hw.mac.type == e1000_82580)
4285 max_frame += IGB_TS_HDR_LEN;
4287 if (max_frame <= IGB_RXBUFFER_1024)
4288 rx_buffer_len = IGB_RXBUFFER_1024;
4289 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4290 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4292 rx_buffer_len = IGB_RXBUFFER_128;
4294 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4295 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4296 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4298 if ((adapter->hw.mac.type == e1000_82580) &&
4299 (rx_buffer_len == IGB_RXBUFFER_128))
4300 rx_buffer_len += IGB_RXBUFFER_64;
4302 if (netif_running(netdev))
4305 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4306 netdev->mtu, new_mtu);
4307 netdev->mtu = new_mtu;
4309 for (i = 0; i < adapter->num_rx_queues; i++)
4310 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4312 if (netif_running(netdev))
4317 clear_bit(__IGB_RESETTING, &adapter->state);
4323 * igb_update_stats - Update the board statistics counters
4324 * @adapter: board private structure
4327 void igb_update_stats(struct igb_adapter *adapter,
4328 struct rtnl_link_stats64 *net_stats)
4330 struct e1000_hw *hw = &adapter->hw;
4331 struct pci_dev *pdev = adapter->pdev;
4337 u64 _bytes, _packets;
4339 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4342 * Prevent stats update while adapter is being reset, or if the pci
4343 * connection is down.
4345 if (adapter->link_speed == 0)
4347 if (pci_channel_offline(pdev))
4352 for (i = 0; i < adapter->num_rx_queues; i++) {
4353 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4354 struct igb_ring *ring = adapter->rx_ring[i];
4356 ring->rx_stats.drops += rqdpc_tmp;
4357 net_stats->rx_fifo_errors += rqdpc_tmp;
4360 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4361 _bytes = ring->rx_stats.bytes;
4362 _packets = ring->rx_stats.packets;
4363 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4365 packets += _packets;
4368 net_stats->rx_bytes = bytes;
4369 net_stats->rx_packets = packets;
4373 for (i = 0; i < adapter->num_tx_queues; i++) {
4374 struct igb_ring *ring = adapter->tx_ring[i];
4376 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4377 _bytes = ring->tx_stats.bytes;
4378 _packets = ring->tx_stats.packets;
4379 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4381 packets += _packets;
4383 net_stats->tx_bytes = bytes;
4384 net_stats->tx_packets = packets;
4386 /* read stats registers */
4387 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4388 adapter->stats.gprc += rd32(E1000_GPRC);
4389 adapter->stats.gorc += rd32(E1000_GORCL);
4390 rd32(E1000_GORCH); /* clear GORCL */
4391 adapter->stats.bprc += rd32(E1000_BPRC);
4392 adapter->stats.mprc += rd32(E1000_MPRC);
4393 adapter->stats.roc += rd32(E1000_ROC);
4395 adapter->stats.prc64 += rd32(E1000_PRC64);
4396 adapter->stats.prc127 += rd32(E1000_PRC127);
4397 adapter->stats.prc255 += rd32(E1000_PRC255);
4398 adapter->stats.prc511 += rd32(E1000_PRC511);
4399 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4400 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4401 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4402 adapter->stats.sec += rd32(E1000_SEC);
4404 mpc = rd32(E1000_MPC);
4405 adapter->stats.mpc += mpc;
4406 net_stats->rx_fifo_errors += mpc;
4407 adapter->stats.scc += rd32(E1000_SCC);
4408 adapter->stats.ecol += rd32(E1000_ECOL);
4409 adapter->stats.mcc += rd32(E1000_MCC);
4410 adapter->stats.latecol += rd32(E1000_LATECOL);
4411 adapter->stats.dc += rd32(E1000_DC);
4412 adapter->stats.rlec += rd32(E1000_RLEC);
4413 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4414 adapter->stats.xontxc += rd32(E1000_XONTXC);
4415 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4416 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4417 adapter->stats.fcruc += rd32(E1000_FCRUC);
4418 adapter->stats.gptc += rd32(E1000_GPTC);
4419 adapter->stats.gotc += rd32(E1000_GOTCL);
4420 rd32(E1000_GOTCH); /* clear GOTCL */
4421 adapter->stats.rnbc += rd32(E1000_RNBC);
4422 adapter->stats.ruc += rd32(E1000_RUC);
4423 adapter->stats.rfc += rd32(E1000_RFC);
4424 adapter->stats.rjc += rd32(E1000_RJC);
4425 adapter->stats.tor += rd32(E1000_TORH);
4426 adapter->stats.tot += rd32(E1000_TOTH);
4427 adapter->stats.tpr += rd32(E1000_TPR);
4429 adapter->stats.ptc64 += rd32(E1000_PTC64);
4430 adapter->stats.ptc127 += rd32(E1000_PTC127);
4431 adapter->stats.ptc255 += rd32(E1000_PTC255);
4432 adapter->stats.ptc511 += rd32(E1000_PTC511);
4433 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4434 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4436 adapter->stats.mptc += rd32(E1000_MPTC);
4437 adapter->stats.bptc += rd32(E1000_BPTC);
4439 adapter->stats.tpt += rd32(E1000_TPT);
4440 adapter->stats.colc += rd32(E1000_COLC);
4442 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4443 /* read internal phy specific stats */
4444 reg = rd32(E1000_CTRL_EXT);
4445 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4446 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4447 adapter->stats.tncrs += rd32(E1000_TNCRS);
4450 adapter->stats.tsctc += rd32(E1000_TSCTC);
4451 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4453 adapter->stats.iac += rd32(E1000_IAC);
4454 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4455 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4456 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4457 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4458 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4459 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4460 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4461 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4463 /* Fill out the OS statistics structure */
4464 net_stats->multicast = adapter->stats.mprc;
4465 net_stats->collisions = adapter->stats.colc;
4469 /* RLEC on some newer hardware can be incorrect so build
4470 * our own version based on RUC and ROC */
4471 net_stats->rx_errors = adapter->stats.rxerrc +
4472 adapter->stats.crcerrs + adapter->stats.algnerrc +
4473 adapter->stats.ruc + adapter->stats.roc +
4474 adapter->stats.cexterr;
4475 net_stats->rx_length_errors = adapter->stats.ruc +
4477 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4478 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4479 net_stats->rx_missed_errors = adapter->stats.mpc;
4482 net_stats->tx_errors = adapter->stats.ecol +
4483 adapter->stats.latecol;
4484 net_stats->tx_aborted_errors = adapter->stats.ecol;
4485 net_stats->tx_window_errors = adapter->stats.latecol;
4486 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4488 /* Tx Dropped needs to be maintained elsewhere */
4491 if (hw->phy.media_type == e1000_media_type_copper) {
4492 if ((adapter->link_speed == SPEED_1000) &&
4493 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4494 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4495 adapter->phy_stats.idle_errors += phy_tmp;
4499 /* Management Stats */
4500 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4501 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4502 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4505 static irqreturn_t igb_msix_other(int irq, void *data)
4507 struct igb_adapter *adapter = data;
4508 struct e1000_hw *hw = &adapter->hw;
4509 u32 icr = rd32(E1000_ICR);
4510 /* reading ICR causes bit 31 of EICR to be cleared */
4512 if (icr & E1000_ICR_DRSTA)
4513 schedule_work(&adapter->reset_task);
4515 if (icr & E1000_ICR_DOUTSYNC) {
4516 /* HW is reporting DMA is out of sync */
4517 adapter->stats.doosync++;
4520 /* Check for a mailbox event */
4521 if (icr & E1000_ICR_VMMB)
4522 igb_msg_task(adapter);
4524 if (icr & E1000_ICR_LSC) {
4525 hw->mac.get_link_status = 1;
4526 /* guard against interrupt when we're going down */
4527 if (!test_bit(__IGB_DOWN, &adapter->state))
4528 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4531 if (adapter->vfs_allocated_count)
4532 wr32(E1000_IMS, E1000_IMS_LSC |
4534 E1000_IMS_DOUTSYNC);
4536 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4537 wr32(E1000_EIMS, adapter->eims_other);
4542 static void igb_write_itr(struct igb_q_vector *q_vector)
4544 struct igb_adapter *adapter = q_vector->adapter;
4545 u32 itr_val = q_vector->itr_val & 0x7FFC;
4547 if (!q_vector->set_itr)
4553 if (adapter->hw.mac.type == e1000_82575)
4554 itr_val |= itr_val << 16;
4556 itr_val |= 0x8000000;
4558 writel(itr_val, q_vector->itr_register);
4559 q_vector->set_itr = 0;
4562 static irqreturn_t igb_msix_ring(int irq, void *data)
4564 struct igb_q_vector *q_vector = data;
4566 /* Write the ITR value calculated from the previous interrupt. */
4567 igb_write_itr(q_vector);
4569 napi_schedule(&q_vector->napi);
4574 #ifdef CONFIG_IGB_DCA
4575 static void igb_update_dca(struct igb_q_vector *q_vector)
4577 struct igb_adapter *adapter = q_vector->adapter;
4578 struct e1000_hw *hw = &adapter->hw;
4579 int cpu = get_cpu();
4581 if (q_vector->cpu == cpu)
4584 if (q_vector->tx_ring) {
4585 int q = q_vector->tx_ring->reg_idx;
4586 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4587 if (hw->mac.type == e1000_82575) {
4588 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4589 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4591 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4592 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4593 E1000_DCA_TXCTRL_CPUID_SHIFT;
4595 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4596 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4598 if (q_vector->rx_ring) {
4599 int q = q_vector->rx_ring->reg_idx;
4600 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4601 if (hw->mac.type == e1000_82575) {
4602 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4603 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4605 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4606 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4607 E1000_DCA_RXCTRL_CPUID_SHIFT;
4609 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4610 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4611 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4612 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4614 q_vector->cpu = cpu;
4619 static void igb_setup_dca(struct igb_adapter *adapter)
4621 struct e1000_hw *hw = &adapter->hw;
4624 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4627 /* Always use CB2 mode, difference is masked in the CB driver. */
4628 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4630 for (i = 0; i < adapter->num_q_vectors; i++) {
4631 adapter->q_vector[i]->cpu = -1;
4632 igb_update_dca(adapter->q_vector[i]);
4636 static int __igb_notify_dca(struct device *dev, void *data)
4638 struct net_device *netdev = dev_get_drvdata(dev);
4639 struct igb_adapter *adapter = netdev_priv(netdev);
4640 struct pci_dev *pdev = adapter->pdev;
4641 struct e1000_hw *hw = &adapter->hw;
4642 unsigned long event = *(unsigned long *)data;
4645 case DCA_PROVIDER_ADD:
4646 /* if already enabled, don't do it again */
4647 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4649 if (dca_add_requester(dev) == 0) {
4650 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4651 dev_info(&pdev->dev, "DCA enabled\n");
4652 igb_setup_dca(adapter);
4655 /* Fall Through since DCA is disabled. */
4656 case DCA_PROVIDER_REMOVE:
4657 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4658 /* without this a class_device is left
4659 * hanging around in the sysfs model */
4660 dca_remove_requester(dev);
4661 dev_info(&pdev->dev, "DCA disabled\n");
4662 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4663 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4671 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4676 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4679 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4681 #endif /* CONFIG_IGB_DCA */
4683 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4685 struct e1000_hw *hw = &adapter->hw;
4689 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4690 ping = E1000_PF_CONTROL_MSG;
4691 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4692 ping |= E1000_VT_MSGTYPE_CTS;
4693 igb_write_mbx(hw, &ping, 1, i);
4697 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4699 struct e1000_hw *hw = &adapter->hw;
4700 u32 vmolr = rd32(E1000_VMOLR(vf));
4701 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4703 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4704 IGB_VF_FLAG_MULTI_PROMISC);
4705 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4707 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4708 vmolr |= E1000_VMOLR_MPME;
4709 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4710 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4713 * if we have hashes and we are clearing a multicast promisc
4714 * flag we need to write the hashes to the MTA as this step
4715 * was previously skipped
4717 if (vf_data->num_vf_mc_hashes > 30) {
4718 vmolr |= E1000_VMOLR_MPME;
4719 } else if (vf_data->num_vf_mc_hashes) {
4721 vmolr |= E1000_VMOLR_ROMPE;
4722 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4723 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4727 wr32(E1000_VMOLR(vf), vmolr);
4729 /* there are flags left unprocessed, likely not supported */
4730 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4737 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4738 u32 *msgbuf, u32 vf)
4740 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4741 u16 *hash_list = (u16 *)&msgbuf[1];
4742 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4745 /* salt away the number of multicast addresses assigned
4746 * to this VF for later use to restore when the PF multi cast
4749 vf_data->num_vf_mc_hashes = n;
4751 /* only up to 30 hash values supported */
4755 /* store the hashes for later use */
4756 for (i = 0; i < n; i++)
4757 vf_data->vf_mc_hashes[i] = hash_list[i];
4759 /* Flush and reset the mta with the new values */
4760 igb_set_rx_mode(adapter->netdev);
4765 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4767 struct e1000_hw *hw = &adapter->hw;
4768 struct vf_data_storage *vf_data;
4771 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4772 u32 vmolr = rd32(E1000_VMOLR(i));
4773 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4775 vf_data = &adapter->vf_data[i];
4777 if ((vf_data->num_vf_mc_hashes > 30) ||
4778 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4779 vmolr |= E1000_VMOLR_MPME;
4780 } else if (vf_data->num_vf_mc_hashes) {
4781 vmolr |= E1000_VMOLR_ROMPE;
4782 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4783 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4785 wr32(E1000_VMOLR(i), vmolr);
4789 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4791 struct e1000_hw *hw = &adapter->hw;
4792 u32 pool_mask, reg, vid;
4795 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4797 /* Find the vlan filter for this id */
4798 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4799 reg = rd32(E1000_VLVF(i));
4801 /* remove the vf from the pool */
4804 /* if pool is empty then remove entry from vfta */
4805 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4806 (reg & E1000_VLVF_VLANID_ENABLE)) {
4808 vid = reg & E1000_VLVF_VLANID_MASK;
4809 igb_vfta_set(hw, vid, false);
4812 wr32(E1000_VLVF(i), reg);
4815 adapter->vf_data[vf].vlans_enabled = 0;
4818 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4820 struct e1000_hw *hw = &adapter->hw;
4823 /* The vlvf table only exists on 82576 hardware and newer */
4824 if (hw->mac.type < e1000_82576)
4827 /* we only need to do this if VMDq is enabled */
4828 if (!adapter->vfs_allocated_count)
4831 /* Find the vlan filter for this id */
4832 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4833 reg = rd32(E1000_VLVF(i));
4834 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4835 vid == (reg & E1000_VLVF_VLANID_MASK))
4840 if (i == E1000_VLVF_ARRAY_SIZE) {
4841 /* Did not find a matching VLAN ID entry that was
4842 * enabled. Search for a free filter entry, i.e.
4843 * one without the enable bit set
4845 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4846 reg = rd32(E1000_VLVF(i));
4847 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4851 if (i < E1000_VLVF_ARRAY_SIZE) {
4852 /* Found an enabled/available entry */
4853 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4855 /* if !enabled we need to set this up in vfta */
4856 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4857 /* add VID to filter table */
4858 igb_vfta_set(hw, vid, true);
4859 reg |= E1000_VLVF_VLANID_ENABLE;
4861 reg &= ~E1000_VLVF_VLANID_MASK;
4863 wr32(E1000_VLVF(i), reg);
4865 /* do not modify RLPML for PF devices */
4866 if (vf >= adapter->vfs_allocated_count)
4869 if (!adapter->vf_data[vf].vlans_enabled) {
4871 reg = rd32(E1000_VMOLR(vf));
4872 size = reg & E1000_VMOLR_RLPML_MASK;
4874 reg &= ~E1000_VMOLR_RLPML_MASK;
4876 wr32(E1000_VMOLR(vf), reg);
4879 adapter->vf_data[vf].vlans_enabled++;
4883 if (i < E1000_VLVF_ARRAY_SIZE) {
4884 /* remove vf from the pool */
4885 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4886 /* if pool is empty then remove entry from vfta */
4887 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4889 igb_vfta_set(hw, vid, false);
4891 wr32(E1000_VLVF(i), reg);
4893 /* do not modify RLPML for PF devices */
4894 if (vf >= adapter->vfs_allocated_count)
4897 adapter->vf_data[vf].vlans_enabled--;
4898 if (!adapter->vf_data[vf].vlans_enabled) {
4900 reg = rd32(E1000_VMOLR(vf));
4901 size = reg & E1000_VMOLR_RLPML_MASK;
4903 reg &= ~E1000_VMOLR_RLPML_MASK;
4905 wr32(E1000_VMOLR(vf), reg);
4912 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4914 struct e1000_hw *hw = &adapter->hw;
4917 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4919 wr32(E1000_VMVIR(vf), 0);
4922 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4923 int vf, u16 vlan, u8 qos)
4926 struct igb_adapter *adapter = netdev_priv(netdev);
4928 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4931 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4934 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4935 igb_set_vmolr(adapter, vf, !vlan);
4936 adapter->vf_data[vf].pf_vlan = vlan;
4937 adapter->vf_data[vf].pf_qos = qos;
4938 dev_info(&adapter->pdev->dev,
4939 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4940 if (test_bit(__IGB_DOWN, &adapter->state)) {
4941 dev_warn(&adapter->pdev->dev,
4942 "The VF VLAN has been set,"
4943 " but the PF device is not up.\n");
4944 dev_warn(&adapter->pdev->dev,
4945 "Bring the PF device up before"
4946 " attempting to use the VF device.\n");
4949 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4951 igb_set_vmvir(adapter, vlan, vf);
4952 igb_set_vmolr(adapter, vf, true);
4953 adapter->vf_data[vf].pf_vlan = 0;
4954 adapter->vf_data[vf].pf_qos = 0;
4960 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4962 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4963 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4965 return igb_vlvf_set(adapter, vid, add, vf);
4968 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4971 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4972 adapter->vf_data[vf].last_nack = jiffies;
4974 /* reset offloads to defaults */
4975 igb_set_vmolr(adapter, vf, true);
4977 /* reset vlans for device */
4978 igb_clear_vf_vfta(adapter, vf);
4979 if (adapter->vf_data[vf].pf_vlan)
4980 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4981 adapter->vf_data[vf].pf_vlan,
4982 adapter->vf_data[vf].pf_qos);
4984 igb_clear_vf_vfta(adapter, vf);
4986 /* reset multicast table array for vf */
4987 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4989 /* Flush and reset the mta with the new values */
4990 igb_set_rx_mode(adapter->netdev);
4993 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4995 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4997 /* generate a new mac address as we were hotplug removed/added */
4998 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4999 random_ether_addr(vf_mac);
5001 /* process remaining reset events */
5002 igb_vf_reset(adapter, vf);
5005 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5007 struct e1000_hw *hw = &adapter->hw;
5008 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5009 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5011 u8 *addr = (u8 *)(&msgbuf[1]);
5013 /* process all the same items cleared in a function level reset */
5014 igb_vf_reset(adapter, vf);
5016 /* set vf mac address */
5017 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5019 /* enable transmit and receive for vf */
5020 reg = rd32(E1000_VFTE);
5021 wr32(E1000_VFTE, reg | (1 << vf));
5022 reg = rd32(E1000_VFRE);
5023 wr32(E1000_VFRE, reg | (1 << vf));
5025 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
5027 /* reply to reset with ack and vf mac address */
5028 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5029 memcpy(addr, vf_mac, 6);
5030 igb_write_mbx(hw, msgbuf, 3, vf);
5033 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5036 * The VF MAC Address is stored in a packed array of bytes
5037 * starting at the second 32 bit word of the msg array
5039 unsigned char *addr = (char *)&msg[1];
5042 if (is_valid_ether_addr(addr))
5043 err = igb_set_vf_mac(adapter, vf, addr);
5048 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5050 struct e1000_hw *hw = &adapter->hw;
5051 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5052 u32 msg = E1000_VT_MSGTYPE_NACK;
5054 /* if device isn't clear to send it shouldn't be reading either */
5055 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5056 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5057 igb_write_mbx(hw, &msg, 1, vf);
5058 vf_data->last_nack = jiffies;
5062 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5064 struct pci_dev *pdev = adapter->pdev;
5065 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5066 struct e1000_hw *hw = &adapter->hw;
5067 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5070 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5073 /* if receive failed revoke VF CTS stats and restart init */
5074 dev_err(&pdev->dev, "Error receiving message from VF\n");
5075 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5076 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5081 /* this is a message we already processed, do nothing */
5082 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5086 * until the vf completes a reset it should not be
5087 * allowed to start any configuration.
5090 if (msgbuf[0] == E1000_VF_RESET) {
5091 igb_vf_reset_msg(adapter, vf);
5095 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5096 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5102 switch ((msgbuf[0] & 0xFFFF)) {
5103 case E1000_VF_SET_MAC_ADDR:
5104 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5106 case E1000_VF_SET_PROMISC:
5107 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5109 case E1000_VF_SET_MULTICAST:
5110 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5112 case E1000_VF_SET_LPE:
5113 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5115 case E1000_VF_SET_VLAN:
5116 if (adapter->vf_data[vf].pf_vlan)
5119 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5122 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5127 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5129 /* notify the VF of the results of what it sent us */
5131 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5133 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5135 igb_write_mbx(hw, msgbuf, 1, vf);
5138 static void igb_msg_task(struct igb_adapter *adapter)
5140 struct e1000_hw *hw = &adapter->hw;
5143 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5144 /* process any reset requests */
5145 if (!igb_check_for_rst(hw, vf))
5146 igb_vf_reset_event(adapter, vf);
5148 /* process any messages pending */
5149 if (!igb_check_for_msg(hw, vf))
5150 igb_rcv_msg_from_vf(adapter, vf);
5152 /* process any acks */
5153 if (!igb_check_for_ack(hw, vf))
5154 igb_rcv_ack_from_vf(adapter, vf);
5159 * igb_set_uta - Set unicast filter table address
5160 * @adapter: board private structure
5162 * The unicast table address is a register array of 32-bit registers.
5163 * The table is meant to be used in a way similar to how the MTA is used
5164 * however due to certain limitations in the hardware it is necessary to
5165 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5166 * enable bit to allow vlan tag stripping when promiscous mode is enabled
5168 static void igb_set_uta(struct igb_adapter *adapter)
5170 struct e1000_hw *hw = &adapter->hw;
5173 /* The UTA table only exists on 82576 hardware and newer */
5174 if (hw->mac.type < e1000_82576)
5177 /* we only need to do this if VMDq is enabled */
5178 if (!adapter->vfs_allocated_count)
5181 for (i = 0; i < hw->mac.uta_reg_count; i++)
5182 array_wr32(E1000_UTA, i, ~0);
5186 * igb_intr_msi - Interrupt Handler
5187 * @irq: interrupt number
5188 * @data: pointer to a network interface device structure
5190 static irqreturn_t igb_intr_msi(int irq, void *data)
5192 struct igb_adapter *adapter = data;
5193 struct igb_q_vector *q_vector = adapter->q_vector[0];
5194 struct e1000_hw *hw = &adapter->hw;
5195 /* read ICR disables interrupts using IAM */
5196 u32 icr = rd32(E1000_ICR);
5198 igb_write_itr(q_vector);
5200 if (icr & E1000_ICR_DRSTA)
5201 schedule_work(&adapter->reset_task);
5203 if (icr & E1000_ICR_DOUTSYNC) {
5204 /* HW is reporting DMA is out of sync */
5205 adapter->stats.doosync++;
5208 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5209 hw->mac.get_link_status = 1;
5210 if (!test_bit(__IGB_DOWN, &adapter->state))
5211 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5214 napi_schedule(&q_vector->napi);
5220 * igb_intr - Legacy Interrupt Handler
5221 * @irq: interrupt number
5222 * @data: pointer to a network interface device structure
5224 static irqreturn_t igb_intr(int irq, void *data)
5226 struct igb_adapter *adapter = data;
5227 struct igb_q_vector *q_vector = adapter->q_vector[0];
5228 struct e1000_hw *hw = &adapter->hw;
5229 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5230 * need for the IMC write */
5231 u32 icr = rd32(E1000_ICR);
5233 return IRQ_NONE; /* Not our interrupt */
5235 igb_write_itr(q_vector);
5237 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5238 * not set, then the adapter didn't send an interrupt */
5239 if (!(icr & E1000_ICR_INT_ASSERTED))
5242 if (icr & E1000_ICR_DRSTA)
5243 schedule_work(&adapter->reset_task);
5245 if (icr & E1000_ICR_DOUTSYNC) {
5246 /* HW is reporting DMA is out of sync */
5247 adapter->stats.doosync++;
5250 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5251 hw->mac.get_link_status = 1;
5252 /* guard against interrupt when we're going down */
5253 if (!test_bit(__IGB_DOWN, &adapter->state))
5254 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5257 napi_schedule(&q_vector->napi);
5262 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5264 struct igb_adapter *adapter = q_vector->adapter;
5265 struct e1000_hw *hw = &adapter->hw;
5267 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5268 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5269 if (!adapter->msix_entries)
5270 igb_set_itr(adapter);
5272 igb_update_ring_itr(q_vector);
5275 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5276 if (adapter->msix_entries)
5277 wr32(E1000_EIMS, q_vector->eims_value);
5279 igb_irq_enable(adapter);
5284 * igb_poll - NAPI Rx polling callback
5285 * @napi: napi polling structure
5286 * @budget: count of how many packets we should handle
5288 static int igb_poll(struct napi_struct *napi, int budget)
5290 struct igb_q_vector *q_vector = container_of(napi,
5291 struct igb_q_vector,
5293 int tx_clean_complete = 1, work_done = 0;
5295 #ifdef CONFIG_IGB_DCA
5296 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5297 igb_update_dca(q_vector);
5299 if (q_vector->tx_ring)
5300 tx_clean_complete = igb_clean_tx_irq(q_vector);
5302 if (q_vector->rx_ring)
5303 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5305 if (!tx_clean_complete)
5308 /* If not enough Rx work done, exit the polling mode */
5309 if (work_done < budget) {
5310 napi_complete(napi);
5311 igb_ring_irq_enable(q_vector);
5318 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5319 * @adapter: board private structure
5320 * @shhwtstamps: timestamp structure to update
5321 * @regval: unsigned 64bit system time value.
5323 * We need to convert the system time value stored in the RX/TXSTMP registers
5324 * into a hwtstamp which can be used by the upper level timestamping functions
5326 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5327 struct skb_shared_hwtstamps *shhwtstamps,
5333 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5334 * 24 to match clock shift we setup earlier.
5336 if (adapter->hw.mac.type == e1000_82580)
5337 regval <<= IGB_82580_TSYNC_SHIFT;
5339 ns = timecounter_cyc2time(&adapter->clock, regval);
5340 timecompare_update(&adapter->compare, ns);
5341 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5342 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5343 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5347 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5348 * @q_vector: pointer to q_vector containing needed info
5349 * @buffer: pointer to igb_buffer structure
5351 * If we were asked to do hardware stamping and such a time stamp is
5352 * available, then it must have been for this skb here because we only
5353 * allow only one such packet into the queue.
5355 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5357 struct igb_adapter *adapter = q_vector->adapter;
5358 struct e1000_hw *hw = &adapter->hw;
5359 struct skb_shared_hwtstamps shhwtstamps;
5362 /* if skb does not support hw timestamp or TX stamp not valid exit */
5363 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5364 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5367 regval = rd32(E1000_TXSTMPL);
5368 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5370 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5371 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5375 * igb_clean_tx_irq - Reclaim resources after transmit completes
5376 * @q_vector: pointer to q_vector containing needed info
5377 * returns true if ring is completely cleaned
5379 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5381 struct igb_adapter *adapter = q_vector->adapter;
5382 struct igb_ring *tx_ring = q_vector->tx_ring;
5383 struct net_device *netdev = tx_ring->netdev;
5384 struct e1000_hw *hw = &adapter->hw;
5385 struct igb_buffer *buffer_info;
5386 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5387 unsigned int total_bytes = 0, total_packets = 0;
5388 unsigned int i, eop, count = 0;
5389 bool cleaned = false;
5391 i = tx_ring->next_to_clean;
5392 eop = tx_ring->buffer_info[i].next_to_watch;
5393 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5395 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5396 (count < tx_ring->count)) {
5397 rmb(); /* read buffer_info after eop_desc status */
5398 for (cleaned = false; !cleaned; count++) {
5399 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5400 buffer_info = &tx_ring->buffer_info[i];
5401 cleaned = (i == eop);
5403 if (buffer_info->skb) {
5404 total_bytes += buffer_info->bytecount;
5405 /* gso_segs is currently only valid for tcp */
5406 total_packets += buffer_info->gso_segs;
5407 igb_tx_hwtstamp(q_vector, buffer_info);
5410 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5411 tx_desc->wb.status = 0;
5414 if (i == tx_ring->count)
5417 eop = tx_ring->buffer_info[i].next_to_watch;
5418 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5421 tx_ring->next_to_clean = i;
5423 if (unlikely(count &&
5424 netif_carrier_ok(netdev) &&
5425 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5426 /* Make sure that anybody stopping the queue after this
5427 * sees the new next_to_clean.
5430 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5431 !(test_bit(__IGB_DOWN, &adapter->state))) {
5432 netif_wake_subqueue(netdev, tx_ring->queue_index);
5434 u64_stats_update_begin(&tx_ring->tx_syncp);
5435 tx_ring->tx_stats.restart_queue++;
5436 u64_stats_update_end(&tx_ring->tx_syncp);
5440 if (tx_ring->detect_tx_hung) {
5441 /* Detect a transmit hang in hardware, this serializes the
5442 * check with the clearing of time_stamp and movement of i */
5443 tx_ring->detect_tx_hung = false;
5444 if (tx_ring->buffer_info[i].time_stamp &&
5445 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5446 (adapter->tx_timeout_factor * HZ)) &&
5447 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5449 /* detected Tx unit hang */
5450 dev_err(tx_ring->dev,
5451 "Detected Tx Unit Hang\n"
5455 " next_to_use <%x>\n"
5456 " next_to_clean <%x>\n"
5457 "buffer_info[next_to_clean]\n"
5458 " time_stamp <%lx>\n"
5459 " next_to_watch <%x>\n"
5461 " desc.status <%x>\n",
5462 tx_ring->queue_index,
5463 readl(tx_ring->head),
5464 readl(tx_ring->tail),
5465 tx_ring->next_to_use,
5466 tx_ring->next_to_clean,
5467 tx_ring->buffer_info[eop].time_stamp,
5470 eop_desc->wb.status);
5471 netif_stop_subqueue(netdev, tx_ring->queue_index);
5474 tx_ring->total_bytes += total_bytes;
5475 tx_ring->total_packets += total_packets;
5476 u64_stats_update_begin(&tx_ring->tx_syncp);
5477 tx_ring->tx_stats.bytes += total_bytes;
5478 tx_ring->tx_stats.packets += total_packets;
5479 u64_stats_update_end(&tx_ring->tx_syncp);
5480 return count < tx_ring->count;
5484 * igb_receive_skb - helper function to handle rx indications
5485 * @q_vector: structure containing interrupt and ring information
5486 * @skb: packet to send up
5487 * @vlan_tag: vlan tag for packet
5489 static void igb_receive_skb(struct igb_q_vector *q_vector,
5490 struct sk_buff *skb,
5493 struct igb_adapter *adapter = q_vector->adapter;
5495 if (vlan_tag && adapter->vlgrp)
5496 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5499 napi_gro_receive(&q_vector->napi, skb);
5502 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5503 u32 status_err, struct sk_buff *skb)
5505 skb_checksum_none_assert(skb);
5507 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5508 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5509 (status_err & E1000_RXD_STAT_IXSM))
5512 /* TCP/UDP checksum error bit is set */
5514 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5516 * work around errata with sctp packets where the TCPE aka
5517 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5518 * packets, (aka let the stack check the crc32c)
5520 if ((skb->len == 60) &&
5521 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5522 u64_stats_update_begin(&ring->rx_syncp);
5523 ring->rx_stats.csum_err++;
5524 u64_stats_update_end(&ring->rx_syncp);
5526 /* let the stack verify checksum errors */
5529 /* It must be a TCP or UDP packet with a valid checksum */
5530 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5531 skb->ip_summed = CHECKSUM_UNNECESSARY;
5533 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5536 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5537 struct sk_buff *skb)
5539 struct igb_adapter *adapter = q_vector->adapter;
5540 struct e1000_hw *hw = &adapter->hw;
5544 * If this bit is set, then the RX registers contain the time stamp. No
5545 * other packet will be time stamped until we read these registers, so
5546 * read the registers to make them available again. Because only one
5547 * packet can be time stamped at a time, we know that the register
5548 * values must belong to this one here and therefore we don't need to
5549 * compare any of the additional attributes stored for it.
5551 * If nothing went wrong, then it should have a shared tx_flags that we
5552 * can turn into a skb_shared_hwtstamps.
5554 if (staterr & E1000_RXDADV_STAT_TSIP) {
5555 u32 *stamp = (u32 *)skb->data;
5556 regval = le32_to_cpu(*(stamp + 2));
5557 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5558 skb_pull(skb, IGB_TS_HDR_LEN);
5560 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5563 regval = rd32(E1000_RXSTMPL);
5564 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5567 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5569 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5570 union e1000_adv_rx_desc *rx_desc)
5572 /* HW will not DMA in data larger than the given buffer, even if it
5573 * parses the (NFS, of course) header to be larger. In that case, it
5574 * fills the header buffer and spills the rest into the page.
5576 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5577 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5578 if (hlen > rx_ring->rx_buffer_len)
5579 hlen = rx_ring->rx_buffer_len;
5583 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5584 int *work_done, int budget)
5586 struct igb_ring *rx_ring = q_vector->rx_ring;
5587 struct net_device *netdev = rx_ring->netdev;
5588 struct device *dev = rx_ring->dev;
5589 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5590 struct igb_buffer *buffer_info , *next_buffer;
5591 struct sk_buff *skb;
5592 bool cleaned = false;
5593 int cleaned_count = 0;
5594 int current_node = numa_node_id();
5595 unsigned int total_bytes = 0, total_packets = 0;
5601 i = rx_ring->next_to_clean;
5602 buffer_info = &rx_ring->buffer_info[i];
5603 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5604 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5606 while (staterr & E1000_RXD_STAT_DD) {
5607 if (*work_done >= budget)
5610 rmb(); /* read descriptor and rx_buffer_info after status DD */
5612 skb = buffer_info->skb;
5613 prefetch(skb->data - NET_IP_ALIGN);
5614 buffer_info->skb = NULL;
5617 if (i == rx_ring->count)
5620 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5622 next_buffer = &rx_ring->buffer_info[i];
5624 length = le16_to_cpu(rx_desc->wb.upper.length);
5628 if (buffer_info->dma) {
5629 dma_unmap_single(dev, buffer_info->dma,
5630 rx_ring->rx_buffer_len,
5632 buffer_info->dma = 0;
5633 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5634 skb_put(skb, length);
5637 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5641 dma_unmap_page(dev, buffer_info->page_dma,
5642 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5643 buffer_info->page_dma = 0;
5645 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5647 buffer_info->page_offset,
5650 if ((page_count(buffer_info->page) != 1) ||
5651 (page_to_nid(buffer_info->page) != current_node))
5652 buffer_info->page = NULL;
5654 get_page(buffer_info->page);
5657 skb->data_len += length;
5658 skb->truesize += length;
5661 if (!(staterr & E1000_RXD_STAT_EOP)) {
5662 buffer_info->skb = next_buffer->skb;
5663 buffer_info->dma = next_buffer->dma;
5664 next_buffer->skb = skb;
5665 next_buffer->dma = 0;
5669 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5670 dev_kfree_skb_irq(skb);
5674 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5675 igb_rx_hwtstamp(q_vector, staterr, skb);
5676 total_bytes += skb->len;
5679 igb_rx_checksum_adv(rx_ring, staterr, skb);
5681 skb->protocol = eth_type_trans(skb, netdev);
5682 skb_record_rx_queue(skb, rx_ring->queue_index);
5684 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5685 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5687 igb_receive_skb(q_vector, skb, vlan_tag);
5690 rx_desc->wb.upper.status_error = 0;
5692 /* return some buffers to hardware, one at a time is too slow */
5693 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5694 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5698 /* use prefetched values */
5700 buffer_info = next_buffer;
5701 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5704 rx_ring->next_to_clean = i;
5705 cleaned_count = igb_desc_unused(rx_ring);
5708 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5710 rx_ring->total_packets += total_packets;
5711 rx_ring->total_bytes += total_bytes;
5712 u64_stats_update_begin(&rx_ring->rx_syncp);
5713 rx_ring->rx_stats.packets += total_packets;
5714 rx_ring->rx_stats.bytes += total_bytes;
5715 u64_stats_update_end(&rx_ring->rx_syncp);
5720 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5721 * @adapter: address of board private structure
5723 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5725 struct net_device *netdev = rx_ring->netdev;
5726 union e1000_adv_rx_desc *rx_desc;
5727 struct igb_buffer *buffer_info;
5728 struct sk_buff *skb;
5732 i = rx_ring->next_to_use;
5733 buffer_info = &rx_ring->buffer_info[i];
5735 bufsz = rx_ring->rx_buffer_len;
5737 while (cleaned_count--) {
5738 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5740 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5741 if (!buffer_info->page) {
5742 buffer_info->page = netdev_alloc_page(netdev);
5743 if (unlikely(!buffer_info->page)) {
5744 u64_stats_update_begin(&rx_ring->rx_syncp);
5745 rx_ring->rx_stats.alloc_failed++;
5746 u64_stats_update_end(&rx_ring->rx_syncp);
5749 buffer_info->page_offset = 0;
5751 buffer_info->page_offset ^= PAGE_SIZE / 2;
5753 buffer_info->page_dma =
5754 dma_map_page(rx_ring->dev, buffer_info->page,
5755 buffer_info->page_offset,
5758 if (dma_mapping_error(rx_ring->dev,
5759 buffer_info->page_dma)) {
5760 buffer_info->page_dma = 0;
5761 u64_stats_update_begin(&rx_ring->rx_syncp);
5762 rx_ring->rx_stats.alloc_failed++;
5763 u64_stats_update_end(&rx_ring->rx_syncp);
5768 skb = buffer_info->skb;
5770 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5771 if (unlikely(!skb)) {
5772 u64_stats_update_begin(&rx_ring->rx_syncp);
5773 rx_ring->rx_stats.alloc_failed++;
5774 u64_stats_update_end(&rx_ring->rx_syncp);
5778 buffer_info->skb = skb;
5780 if (!buffer_info->dma) {
5781 buffer_info->dma = dma_map_single(rx_ring->dev,
5785 if (dma_mapping_error(rx_ring->dev,
5786 buffer_info->dma)) {
5787 buffer_info->dma = 0;
5788 u64_stats_update_begin(&rx_ring->rx_syncp);
5789 rx_ring->rx_stats.alloc_failed++;
5790 u64_stats_update_end(&rx_ring->rx_syncp);
5794 /* Refresh the desc even if buffer_addrs didn't change because
5795 * each write-back erases this info. */
5796 if (bufsz < IGB_RXBUFFER_1024) {
5797 rx_desc->read.pkt_addr =
5798 cpu_to_le64(buffer_info->page_dma);
5799 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5801 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5802 rx_desc->read.hdr_addr = 0;
5806 if (i == rx_ring->count)
5808 buffer_info = &rx_ring->buffer_info[i];
5812 if (rx_ring->next_to_use != i) {
5813 rx_ring->next_to_use = i;
5815 i = (rx_ring->count - 1);
5819 /* Force memory writes to complete before letting h/w
5820 * know there are new descriptors to fetch. (Only
5821 * applicable for weak-ordered memory model archs,
5822 * such as IA-64). */
5824 writel(i, rx_ring->tail);
5834 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5836 struct igb_adapter *adapter = netdev_priv(netdev);
5837 struct mii_ioctl_data *data = if_mii(ifr);
5839 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5844 data->phy_id = adapter->hw.phy.addr;
5847 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5859 * igb_hwtstamp_ioctl - control hardware time stamping
5864 * Outgoing time stamping can be enabled and disabled. Play nice and
5865 * disable it when requested, although it shouldn't case any overhead
5866 * when no packet needs it. At most one packet in the queue may be
5867 * marked for time stamping, otherwise it would be impossible to tell
5868 * for sure to which packet the hardware time stamp belongs.
5870 * Incoming time stamping has to be configured via the hardware
5871 * filters. Not all combinations are supported, in particular event
5872 * type has to be specified. Matching the kind of event packet is
5873 * not supported, with the exception of "all V2 events regardless of
5877 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5878 struct ifreq *ifr, int cmd)
5880 struct igb_adapter *adapter = netdev_priv(netdev);
5881 struct e1000_hw *hw = &adapter->hw;
5882 struct hwtstamp_config config;
5883 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5884 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5885 u32 tsync_rx_cfg = 0;
5890 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5893 /* reserved for future extensions */
5897 switch (config.tx_type) {
5898 case HWTSTAMP_TX_OFF:
5900 case HWTSTAMP_TX_ON:
5906 switch (config.rx_filter) {
5907 case HWTSTAMP_FILTER_NONE:
5910 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5911 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5912 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5913 case HWTSTAMP_FILTER_ALL:
5915 * register TSYNCRXCFG must be set, therefore it is not
5916 * possible to time stamp both Sync and Delay_Req messages
5917 * => fall back to time stamping all packets
5919 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5920 config.rx_filter = HWTSTAMP_FILTER_ALL;
5922 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5923 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5924 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5927 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5928 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5929 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5932 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5933 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5934 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5935 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5938 config.rx_filter = HWTSTAMP_FILTER_SOME;
5940 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5941 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5942 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5943 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5946 config.rx_filter = HWTSTAMP_FILTER_SOME;
5948 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5949 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5950 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5951 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5952 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5959 if (hw->mac.type == e1000_82575) {
5960 if (tsync_rx_ctl | tsync_tx_ctl)
5966 * Per-packet timestamping only works if all packets are
5967 * timestamped, so enable timestamping in all packets as
5968 * long as one rx filter was configured.
5970 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5971 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5972 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5975 /* enable/disable TX */
5976 regval = rd32(E1000_TSYNCTXCTL);
5977 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5978 regval |= tsync_tx_ctl;
5979 wr32(E1000_TSYNCTXCTL, regval);
5981 /* enable/disable RX */
5982 regval = rd32(E1000_TSYNCRXCTL);
5983 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5984 regval |= tsync_rx_ctl;
5985 wr32(E1000_TSYNCRXCTL, regval);
5987 /* define which PTP packets are time stamped */
5988 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5990 /* define ethertype filter for timestamped packets */
5993 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5994 E1000_ETQF_1588 | /* enable timestamping */
5995 ETH_P_1588)); /* 1588 eth protocol type */
5997 wr32(E1000_ETQF(3), 0);
5999 #define PTP_PORT 319
6000 /* L4 Queue Filter[3]: filter by destination port and protocol */
6002 u32 ftqf = (IPPROTO_UDP /* UDP */
6003 | E1000_FTQF_VF_BP /* VF not compared */
6004 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6005 | E1000_FTQF_MASK); /* mask all inputs */
6006 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6008 wr32(E1000_IMIR(3), htons(PTP_PORT));
6009 wr32(E1000_IMIREXT(3),
6010 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6011 if (hw->mac.type == e1000_82576) {
6012 /* enable source port check */
6013 wr32(E1000_SPQF(3), htons(PTP_PORT));
6014 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6016 wr32(E1000_FTQF(3), ftqf);
6018 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6022 adapter->hwtstamp_config = config;
6024 /* clear TX/RX time stamp registers, just to be sure */
6025 regval = rd32(E1000_TXSTMPH);
6026 regval = rd32(E1000_RXSTMPH);
6028 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6038 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6044 return igb_mii_ioctl(netdev, ifr, cmd);
6046 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6052 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6054 struct igb_adapter *adapter = hw->back;
6057 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6059 return -E1000_ERR_CONFIG;
6061 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6066 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6068 struct igb_adapter *adapter = hw->back;
6071 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6073 return -E1000_ERR_CONFIG;
6075 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6080 static void igb_vlan_rx_register(struct net_device *netdev,
6081 struct vlan_group *grp)
6083 struct igb_adapter *adapter = netdev_priv(netdev);
6084 struct e1000_hw *hw = &adapter->hw;
6087 igb_irq_disable(adapter);
6088 adapter->vlgrp = grp;
6091 /* enable VLAN tag insert/strip */
6092 ctrl = rd32(E1000_CTRL);
6093 ctrl |= E1000_CTRL_VME;
6094 wr32(E1000_CTRL, ctrl);
6096 /* Disable CFI check */
6097 rctl = rd32(E1000_RCTL);
6098 rctl &= ~E1000_RCTL_CFIEN;
6099 wr32(E1000_RCTL, rctl);
6101 /* disable VLAN tag insert/strip */
6102 ctrl = rd32(E1000_CTRL);
6103 ctrl &= ~E1000_CTRL_VME;
6104 wr32(E1000_CTRL, ctrl);
6107 igb_rlpml_set(adapter);
6109 if (!test_bit(__IGB_DOWN, &adapter->state))
6110 igb_irq_enable(adapter);
6113 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6115 struct igb_adapter *adapter = netdev_priv(netdev);
6116 struct e1000_hw *hw = &adapter->hw;
6117 int pf_id = adapter->vfs_allocated_count;
6119 /* attempt to add filter to vlvf array */
6120 igb_vlvf_set(adapter, vid, true, pf_id);
6122 /* add the filter since PF can receive vlans w/o entry in vlvf */
6123 igb_vfta_set(hw, vid, true);
6126 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6128 struct igb_adapter *adapter = netdev_priv(netdev);
6129 struct e1000_hw *hw = &adapter->hw;
6130 int pf_id = adapter->vfs_allocated_count;
6133 igb_irq_disable(adapter);
6134 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6136 if (!test_bit(__IGB_DOWN, &adapter->state))
6137 igb_irq_enable(adapter);
6139 /* remove vlan from VLVF table array */
6140 err = igb_vlvf_set(adapter, vid, false, pf_id);
6142 /* if vid was not present in VLVF just remove it from table */
6144 igb_vfta_set(hw, vid, false);
6147 static void igb_restore_vlan(struct igb_adapter *adapter)
6149 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6151 if (adapter->vlgrp) {
6153 for (vid = 0; vid < VLAN_N_VID; vid++) {
6154 if (!vlan_group_get_device(adapter->vlgrp, vid))
6156 igb_vlan_rx_add_vid(adapter->netdev, vid);
6161 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6163 struct pci_dev *pdev = adapter->pdev;
6164 struct e1000_mac_info *mac = &adapter->hw.mac;
6168 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6169 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6170 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6171 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6176 case SPEED_10 + DUPLEX_HALF:
6177 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6179 case SPEED_10 + DUPLEX_FULL:
6180 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6182 case SPEED_100 + DUPLEX_HALF:
6183 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6185 case SPEED_100 + DUPLEX_FULL:
6186 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6188 case SPEED_1000 + DUPLEX_FULL:
6190 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6192 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6194 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6200 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6202 struct net_device *netdev = pci_get_drvdata(pdev);
6203 struct igb_adapter *adapter = netdev_priv(netdev);
6204 struct e1000_hw *hw = &adapter->hw;
6205 u32 ctrl, rctl, status;
6206 u32 wufc = adapter->wol;
6211 netif_device_detach(netdev);
6213 if (netif_running(netdev))
6216 igb_clear_interrupt_scheme(adapter);
6219 retval = pci_save_state(pdev);
6224 status = rd32(E1000_STATUS);
6225 if (status & E1000_STATUS_LU)
6226 wufc &= ~E1000_WUFC_LNKC;
6229 igb_setup_rctl(adapter);
6230 igb_set_rx_mode(netdev);
6232 /* turn on all-multi mode if wake on multicast is enabled */
6233 if (wufc & E1000_WUFC_MC) {
6234 rctl = rd32(E1000_RCTL);
6235 rctl |= E1000_RCTL_MPE;
6236 wr32(E1000_RCTL, rctl);
6239 ctrl = rd32(E1000_CTRL);
6240 /* advertise wake from D3Cold */
6241 #define E1000_CTRL_ADVD3WUC 0x00100000
6242 /* phy power management enable */
6243 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6244 ctrl |= E1000_CTRL_ADVD3WUC;
6245 wr32(E1000_CTRL, ctrl);
6247 /* Allow time for pending master requests to run */
6248 igb_disable_pcie_master(hw);
6250 wr32(E1000_WUC, E1000_WUC_PME_EN);
6251 wr32(E1000_WUFC, wufc);
6254 wr32(E1000_WUFC, 0);
6257 *enable_wake = wufc || adapter->en_mng_pt;
6259 igb_power_down_link(adapter);
6261 igb_power_up_link(adapter);
6263 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6264 * would have already happened in close and is redundant. */
6265 igb_release_hw_control(adapter);
6267 pci_disable_device(pdev);
6273 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6278 retval = __igb_shutdown(pdev, &wake);
6283 pci_prepare_to_sleep(pdev);
6285 pci_wake_from_d3(pdev, false);
6286 pci_set_power_state(pdev, PCI_D3hot);
6292 static int igb_resume(struct pci_dev *pdev)
6294 struct net_device *netdev = pci_get_drvdata(pdev);
6295 struct igb_adapter *adapter = netdev_priv(netdev);
6296 struct e1000_hw *hw = &adapter->hw;
6299 pci_set_power_state(pdev, PCI_D0);
6300 pci_restore_state(pdev);
6301 pci_save_state(pdev);
6303 err = pci_enable_device_mem(pdev);
6306 "igb: Cannot enable PCI device from suspend\n");
6309 pci_set_master(pdev);
6311 pci_enable_wake(pdev, PCI_D3hot, 0);
6312 pci_enable_wake(pdev, PCI_D3cold, 0);
6314 if (igb_init_interrupt_scheme(adapter)) {
6315 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6321 /* let the f/w know that the h/w is now under the control of the
6323 igb_get_hw_control(adapter);
6325 wr32(E1000_WUS, ~0);
6327 if (netif_running(netdev)) {
6328 err = igb_open(netdev);
6333 netif_device_attach(netdev);
6339 static void igb_shutdown(struct pci_dev *pdev)
6343 __igb_shutdown(pdev, &wake);
6345 if (system_state == SYSTEM_POWER_OFF) {
6346 pci_wake_from_d3(pdev, wake);
6347 pci_set_power_state(pdev, PCI_D3hot);
6351 #ifdef CONFIG_NET_POLL_CONTROLLER
6353 * Polling 'interrupt' - used by things like netconsole to send skbs
6354 * without having to re-enable interrupts. It's not called while
6355 * the interrupt routine is executing.
6357 static void igb_netpoll(struct net_device *netdev)
6359 struct igb_adapter *adapter = netdev_priv(netdev);
6360 struct e1000_hw *hw = &adapter->hw;
6363 if (!adapter->msix_entries) {
6364 struct igb_q_vector *q_vector = adapter->q_vector[0];
6365 igb_irq_disable(adapter);
6366 napi_schedule(&q_vector->napi);
6370 for (i = 0; i < adapter->num_q_vectors; i++) {
6371 struct igb_q_vector *q_vector = adapter->q_vector[i];
6372 wr32(E1000_EIMC, q_vector->eims_value);
6373 napi_schedule(&q_vector->napi);
6376 #endif /* CONFIG_NET_POLL_CONTROLLER */
6379 * igb_io_error_detected - called when PCI error is detected
6380 * @pdev: Pointer to PCI device
6381 * @state: The current pci connection state
6383 * This function is called after a PCI bus error affecting
6384 * this device has been detected.
6386 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6387 pci_channel_state_t state)
6389 struct net_device *netdev = pci_get_drvdata(pdev);
6390 struct igb_adapter *adapter = netdev_priv(netdev);
6392 netif_device_detach(netdev);
6394 if (state == pci_channel_io_perm_failure)
6395 return PCI_ERS_RESULT_DISCONNECT;
6397 if (netif_running(netdev))
6399 pci_disable_device(pdev);
6401 /* Request a slot slot reset. */
6402 return PCI_ERS_RESULT_NEED_RESET;
6406 * igb_io_slot_reset - called after the pci bus has been reset.
6407 * @pdev: Pointer to PCI device
6409 * Restart the card from scratch, as if from a cold-boot. Implementation
6410 * resembles the first-half of the igb_resume routine.
6412 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6414 struct net_device *netdev = pci_get_drvdata(pdev);
6415 struct igb_adapter *adapter = netdev_priv(netdev);
6416 struct e1000_hw *hw = &adapter->hw;
6417 pci_ers_result_t result;
6420 if (pci_enable_device_mem(pdev)) {
6422 "Cannot re-enable PCI device after reset.\n");
6423 result = PCI_ERS_RESULT_DISCONNECT;
6425 pci_set_master(pdev);
6426 pci_restore_state(pdev);
6427 pci_save_state(pdev);
6429 pci_enable_wake(pdev, PCI_D3hot, 0);
6430 pci_enable_wake(pdev, PCI_D3cold, 0);
6433 wr32(E1000_WUS, ~0);
6434 result = PCI_ERS_RESULT_RECOVERED;
6437 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6439 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6440 "failed 0x%0x\n", err);
6441 /* non-fatal, continue */
6448 * igb_io_resume - called when traffic can start flowing again.
6449 * @pdev: Pointer to PCI device
6451 * This callback is called when the error recovery driver tells us that
6452 * its OK to resume normal operation. Implementation resembles the
6453 * second-half of the igb_resume routine.
6455 static void igb_io_resume(struct pci_dev *pdev)
6457 struct net_device *netdev = pci_get_drvdata(pdev);
6458 struct igb_adapter *adapter = netdev_priv(netdev);
6460 if (netif_running(netdev)) {
6461 if (igb_up(adapter)) {
6462 dev_err(&pdev->dev, "igb_up failed after reset\n");
6467 netif_device_attach(netdev);
6469 /* let the f/w know that the h/w is now under the control of the
6471 igb_get_hw_control(adapter);
6474 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6477 u32 rar_low, rar_high;
6478 struct e1000_hw *hw = &adapter->hw;
6480 /* HW expects these in little endian so we reverse the byte order
6481 * from network order (big endian) to little endian
6483 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6484 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6485 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6487 /* Indicate to hardware the Address is Valid. */
6488 rar_high |= E1000_RAH_AV;
6490 if (hw->mac.type == e1000_82575)
6491 rar_high |= E1000_RAH_POOL_1 * qsel;
6493 rar_high |= E1000_RAH_POOL_1 << qsel;
6495 wr32(E1000_RAL(index), rar_low);
6497 wr32(E1000_RAH(index), rar_high);
6501 static int igb_set_vf_mac(struct igb_adapter *adapter,
6502 int vf, unsigned char *mac_addr)
6504 struct e1000_hw *hw = &adapter->hw;
6505 /* VF MAC addresses start at end of receive addresses and moves
6506 * torwards the first, as a result a collision should not be possible */
6507 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6509 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6511 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6516 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6518 struct igb_adapter *adapter = netdev_priv(netdev);
6519 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6521 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6522 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6523 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6524 " change effective.");
6525 if (test_bit(__IGB_DOWN, &adapter->state)) {
6526 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6527 " but the PF device is not up.\n");
6528 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6529 " attempting to use the VF device.\n");
6531 return igb_set_vf_mac(adapter, vf, mac);
6534 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6539 static int igb_ndo_get_vf_config(struct net_device *netdev,
6540 int vf, struct ifla_vf_info *ivi)
6542 struct igb_adapter *adapter = netdev_priv(netdev);
6543 if (vf >= adapter->vfs_allocated_count)
6546 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6548 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6549 ivi->qos = adapter->vf_data[vf].pf_qos;
6553 static void igb_vmm_control(struct igb_adapter *adapter)
6555 struct e1000_hw *hw = &adapter->hw;
6558 switch (hw->mac.type) {
6561 /* replication is not supported for 82575 */
6564 /* notify HW that the MAC is adding vlan tags */
6565 reg = rd32(E1000_DTXCTL);
6566 reg |= E1000_DTXCTL_VLAN_ADDED;
6567 wr32(E1000_DTXCTL, reg);
6569 /* enable replication vlan tag stripping */
6570 reg = rd32(E1000_RPLOLR);
6571 reg |= E1000_RPLOLR_STRVLAN;
6572 wr32(E1000_RPLOLR, reg);
6574 /* none of the above registers are supported by i350 */
6578 if (adapter->vfs_allocated_count) {
6579 igb_vmdq_set_loopback_pf(hw, true);
6580 igb_vmdq_set_replication_pf(hw, true);
6582 igb_vmdq_set_loopback_pf(hw, false);
6583 igb_vmdq_set_replication_pf(hw, false);