1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
48 #include <linux/dca.h>
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79 /* required last entry */
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138 int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141 struct ifla_vf_info *ivi);
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151 .notifier_call = igb_notify_dca,
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164 "per physical function");
165 #endif /* CONFIG_PCI_IOV */
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168 pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
172 static struct pci_error_handlers igb_err_handler = {
173 .error_detected = igb_io_error_detected,
174 .slot_reset = igb_io_slot_reset,
175 .resume = igb_io_resume,
179 static struct pci_driver igb_driver = {
180 .name = igb_driver_name,
181 .id_table = igb_pci_tbl,
183 .remove = __devexit_p(igb_remove),
185 /* Power Managment Hooks */
186 .suspend = igb_suspend,
187 .resume = igb_resume,
189 .shutdown = igb_shutdown,
190 .err_handler = &igb_err_handler
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
199 * igb_read_clock - read raw cycle counter (to be used by time counter)
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
203 struct igb_adapter *adapter =
204 container_of(tc, struct igb_adapter, cycles);
205 struct e1000_hw *hw = &adapter->hw;
210 * The timestamp latches on lowest register read. For the 82580
211 * the lowest register is SYSTIMR instead of SYSTIML. However we never
212 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
214 if (hw->mac.type == e1000_82580) {
215 stamp = rd32(E1000_SYSTIMR) >> 8;
216 shift = IGB_82580_TSYNC_SHIFT;
219 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
226 * igb_get_hw_dev_name - return device name string
227 * used by hardware layer to print debugging information
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
231 struct igb_adapter *adapter = hw->back;
232 return adapter->netdev->name;
236 * igb_get_time_str - format current NIC and system time as string
238 static char *igb_get_time_str(struct igb_adapter *adapter,
241 cycle_t hw = adapter->cycles.read(&adapter->cycles);
242 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
244 struct timespec delta;
245 getnstimeofday(&sys);
247 delta = timespec_sub(nic, sys);
250 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
252 (long)nic.tv_sec, nic.tv_nsec,
253 (long)sys.tv_sec, sys.tv_nsec,
254 (long)delta.tv_sec, delta.tv_nsec);
261 * igb_init_module - Driver Registration Routine
263 * igb_init_module is the first routine called when the driver is
264 * loaded. All it does is register with the PCI subsystem.
266 static int __init igb_init_module(void)
269 printk(KERN_INFO "%s - version %s\n",
270 igb_driver_string, igb_driver_version);
272 printk(KERN_INFO "%s\n", igb_copyright);
274 #ifdef CONFIG_IGB_DCA
275 dca_register_notify(&dca_notifier);
277 ret = pci_register_driver(&igb_driver);
281 module_init(igb_init_module);
284 * igb_exit_module - Driver Exit Cleanup Routine
286 * igb_exit_module is called just before the driver is removed
289 static void __exit igb_exit_module(void)
291 #ifdef CONFIG_IGB_DCA
292 dca_unregister_notify(&dca_notifier);
294 pci_unregister_driver(&igb_driver);
297 module_exit(igb_exit_module);
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
301 * igb_cache_ring_register - Descriptor ring to register mapping
302 * @adapter: board private structure to initialize
304 * Once we know the feature-set enabled for the device, we'll cache
305 * the register offset the descriptor ring is assigned to.
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
310 u32 rbase_offset = adapter->vfs_allocated_count;
312 switch (adapter->hw.mac.type) {
314 /* The queues are allocated for virtualization such that VF 0
315 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316 * In order to avoid collision we start at the first free queue
317 * and continue consuming queues in the same sequence
319 if (adapter->vfs_allocated_count) {
320 for (; i < adapter->rss_queues; i++)
321 adapter->rx_ring[i]->reg_idx = rbase_offset +
323 for (; j < adapter->rss_queues; j++)
324 adapter->tx_ring[j]->reg_idx = rbase_offset +
330 for (; i < adapter->num_rx_queues; i++)
331 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332 for (; j < adapter->num_tx_queues; j++)
333 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
338 static void igb_free_queues(struct igb_adapter *adapter)
342 for (i = 0; i < adapter->num_tx_queues; i++) {
343 kfree(adapter->tx_ring[i]);
344 adapter->tx_ring[i] = NULL;
346 for (i = 0; i < adapter->num_rx_queues; i++) {
347 kfree(adapter->rx_ring[i]);
348 adapter->rx_ring[i] = NULL;
350 adapter->num_rx_queues = 0;
351 adapter->num_tx_queues = 0;
355 * igb_alloc_queues - Allocate memory for all rings
356 * @adapter: board private structure to initialize
358 * We allocate one ring per queue at run-time since we don't know the
359 * number of queues at compile-time.
361 static int igb_alloc_queues(struct igb_adapter *adapter)
363 struct igb_ring *ring;
366 for (i = 0; i < adapter->num_tx_queues; i++) {
367 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
370 ring->count = adapter->tx_ring_count;
371 ring->queue_index = i;
372 ring->pdev = adapter->pdev;
373 ring->netdev = adapter->netdev;
374 /* For 82575, context index must be unique per ring. */
375 if (adapter->hw.mac.type == e1000_82575)
376 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377 adapter->tx_ring[i] = ring;
380 for (i = 0; i < adapter->num_rx_queues; i++) {
381 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
384 ring->count = adapter->rx_ring_count;
385 ring->queue_index = i;
386 ring->pdev = adapter->pdev;
387 ring->netdev = adapter->netdev;
388 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390 /* set flag indicating ring supports SCTP checksum offload */
391 if (adapter->hw.mac.type >= e1000_82576)
392 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393 adapter->rx_ring[i] = ring;
396 igb_cache_ring_register(adapter);
401 igb_free_queues(adapter);
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
410 struct igb_adapter *adapter = q_vector->adapter;
411 struct e1000_hw *hw = &adapter->hw;
413 int rx_queue = IGB_N0_QUEUE;
414 int tx_queue = IGB_N0_QUEUE;
416 if (q_vector->rx_ring)
417 rx_queue = q_vector->rx_ring->reg_idx;
418 if (q_vector->tx_ring)
419 tx_queue = q_vector->tx_ring->reg_idx;
421 switch (hw->mac.type) {
423 /* The 82575 assigns vectors using a bitmask, which matches the
424 bitmask for the EICR/EIMS/EIMC registers. To assign one
425 or more queues to a vector, we write the appropriate bits
426 into the MSIXBM register for that vector. */
427 if (rx_queue > IGB_N0_QUEUE)
428 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429 if (tx_queue > IGB_N0_QUEUE)
430 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431 if (!adapter->msix_entries && msix_vector == 0)
432 msixbm |= E1000_EIMS_OTHER;
433 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434 q_vector->eims_value = msixbm;
437 /* 82576 uses a table-based method for assigning vectors.
438 Each queue has a single entry in the table to which we write
439 a vector number along with a "valid" bit. Sadly, the layout
440 of the table is somewhat counterintuitive. */
441 if (rx_queue > IGB_N0_QUEUE) {
442 index = (rx_queue & 0x7);
443 ivar = array_rd32(E1000_IVAR0, index);
445 /* vector goes into low byte of register */
446 ivar = ivar & 0xFFFFFF00;
447 ivar |= msix_vector | E1000_IVAR_VALID;
449 /* vector goes into third byte of register */
450 ivar = ivar & 0xFF00FFFF;
451 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
453 array_wr32(E1000_IVAR0, index, ivar);
455 if (tx_queue > IGB_N0_QUEUE) {
456 index = (tx_queue & 0x7);
457 ivar = array_rd32(E1000_IVAR0, index);
459 /* vector goes into second byte of register */
460 ivar = ivar & 0xFFFF00FF;
461 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
463 /* vector goes into high byte of register */
464 ivar = ivar & 0x00FFFFFF;
465 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
467 array_wr32(E1000_IVAR0, index, ivar);
469 q_vector->eims_value = 1 << msix_vector;
472 /* 82580 uses the same table-based approach as 82576 but has fewer
473 entries as a result we carry over for queues greater than 4. */
474 if (rx_queue > IGB_N0_QUEUE) {
475 index = (rx_queue >> 1);
476 ivar = array_rd32(E1000_IVAR0, index);
477 if (rx_queue & 0x1) {
478 /* vector goes into third byte of register */
479 ivar = ivar & 0xFF00FFFF;
480 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
482 /* vector goes into low byte of register */
483 ivar = ivar & 0xFFFFFF00;
484 ivar |= msix_vector | E1000_IVAR_VALID;
486 array_wr32(E1000_IVAR0, index, ivar);
488 if (tx_queue > IGB_N0_QUEUE) {
489 index = (tx_queue >> 1);
490 ivar = array_rd32(E1000_IVAR0, index);
491 if (tx_queue & 0x1) {
492 /* vector goes into high byte of register */
493 ivar = ivar & 0x00FFFFFF;
494 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
496 /* vector goes into second byte of register */
497 ivar = ivar & 0xFFFF00FF;
498 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
500 array_wr32(E1000_IVAR0, index, ivar);
502 q_vector->eims_value = 1 << msix_vector;
509 /* add q_vector eims value to global eims_enable_mask */
510 adapter->eims_enable_mask |= q_vector->eims_value;
512 /* configure q_vector to set itr on first interrupt */
513 q_vector->set_itr = 1;
517 * igb_configure_msix - Configure MSI-X hardware
519 * igb_configure_msix sets up the hardware to properly
520 * generate MSI-X interrupts.
522 static void igb_configure_msix(struct igb_adapter *adapter)
526 struct e1000_hw *hw = &adapter->hw;
528 adapter->eims_enable_mask = 0;
530 /* set vector for other causes, i.e. link changes */
531 switch (hw->mac.type) {
533 tmp = rd32(E1000_CTRL_EXT);
534 /* enable MSI-X PBA support*/
535 tmp |= E1000_CTRL_EXT_PBA_CLR;
537 /* Auto-Mask interrupts upon ICR read. */
538 tmp |= E1000_CTRL_EXT_EIAME;
539 tmp |= E1000_CTRL_EXT_IRCA;
541 wr32(E1000_CTRL_EXT, tmp);
543 /* enable msix_other interrupt */
544 array_wr32(E1000_MSIXBM(0), vector++,
546 adapter->eims_other = E1000_EIMS_OTHER;
552 /* Turn on MSI-X capability first, or our settings
553 * won't stick. And it will take days to debug. */
554 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555 E1000_GPIE_PBA | E1000_GPIE_EIAME |
558 /* enable msix_other interrupt */
559 adapter->eims_other = 1 << vector;
560 tmp = (vector++ | E1000_IVAR_VALID) << 8;
562 wr32(E1000_IVAR_MISC, tmp);
565 /* do nothing, since nothing else supports MSI-X */
567 } /* switch (hw->mac.type) */
569 adapter->eims_enable_mask |= adapter->eims_other;
571 for (i = 0; i < adapter->num_q_vectors; i++)
572 igb_assign_vector(adapter->q_vector[i], vector++);
578 * igb_request_msix - Initialize MSI-X interrupts
580 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
583 static int igb_request_msix(struct igb_adapter *adapter)
585 struct net_device *netdev = adapter->netdev;
586 struct e1000_hw *hw = &adapter->hw;
587 int i, err = 0, vector = 0;
589 err = request_irq(adapter->msix_entries[vector].vector,
590 igb_msix_other, 0, netdev->name, adapter);
595 for (i = 0; i < adapter->num_q_vectors; i++) {
596 struct igb_q_vector *q_vector = adapter->q_vector[i];
598 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
600 if (q_vector->rx_ring && q_vector->tx_ring)
601 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602 q_vector->rx_ring->queue_index);
603 else if (q_vector->tx_ring)
604 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605 q_vector->tx_ring->queue_index);
606 else if (q_vector->rx_ring)
607 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608 q_vector->rx_ring->queue_index);
610 sprintf(q_vector->name, "%s-unused", netdev->name);
612 err = request_irq(adapter->msix_entries[vector].vector,
613 igb_msix_ring, 0, q_vector->name,
620 igb_configure_msix(adapter);
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
628 if (adapter->msix_entries) {
629 pci_disable_msix(adapter->pdev);
630 kfree(adapter->msix_entries);
631 adapter->msix_entries = NULL;
632 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633 pci_disable_msi(adapter->pdev);
638 * igb_free_q_vectors - Free memory allocated for interrupt vectors
639 * @adapter: board private structure to initialize
641 * This function frees the memory allocated to the q_vectors. In addition if
642 * NAPI is enabled it will delete any references to the NAPI struct prior
643 * to freeing the q_vector.
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
649 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651 adapter->q_vector[v_idx] = NULL;
652 netif_napi_del(&q_vector->napi);
655 adapter->num_q_vectors = 0;
659 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
661 * This function resets the device so that it has 0 rx queues, tx queues, and
662 * MSI-X interrupts allocated.
664 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
666 igb_free_queues(adapter);
667 igb_free_q_vectors(adapter);
668 igb_reset_interrupt_capability(adapter);
672 * igb_set_interrupt_capability - set MSI or MSI-X if supported
674 * Attempt to configure interrupts using the best available
675 * capabilities of the hardware and kernel.
677 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
682 /* Number of supported queues. */
683 adapter->num_rx_queues = adapter->rss_queues;
684 adapter->num_tx_queues = adapter->rss_queues;
686 /* start with one vector for every rx queue */
687 numvecs = adapter->num_rx_queues;
689 /* if tx handler is seperate add 1 for every tx queue */
690 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
691 numvecs += adapter->num_tx_queues;
693 /* store the number of vectors reserved for queues */
694 adapter->num_q_vectors = numvecs;
696 /* add 1 vector for link status interrupts */
698 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
700 if (!adapter->msix_entries)
703 for (i = 0; i < numvecs; i++)
704 adapter->msix_entries[i].entry = i;
706 err = pci_enable_msix(adapter->pdev,
707 adapter->msix_entries,
712 igb_reset_interrupt_capability(adapter);
714 /* If we can't do MSI-X, try MSI */
716 #ifdef CONFIG_PCI_IOV
717 /* disable SR-IOV for non MSI-X configurations */
718 if (adapter->vf_data) {
719 struct e1000_hw *hw = &adapter->hw;
720 /* disable iov and allow time for transactions to clear */
721 pci_disable_sriov(adapter->pdev);
724 kfree(adapter->vf_data);
725 adapter->vf_data = NULL;
726 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
728 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
731 adapter->vfs_allocated_count = 0;
732 adapter->rss_queues = 1;
733 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
734 adapter->num_rx_queues = 1;
735 adapter->num_tx_queues = 1;
736 adapter->num_q_vectors = 1;
737 if (!pci_enable_msi(adapter->pdev))
738 adapter->flags |= IGB_FLAG_HAS_MSI;
740 /* Notify the stack of the (possibly) reduced Tx Queue count. */
741 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
746 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
747 * @adapter: board private structure to initialize
749 * We allocate one q_vector per queue interrupt. If allocation fails we
752 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
754 struct igb_q_vector *q_vector;
755 struct e1000_hw *hw = &adapter->hw;
758 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
759 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
762 q_vector->adapter = adapter;
763 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
764 q_vector->itr_val = IGB_START_ITR;
765 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
766 adapter->q_vector[v_idx] = q_vector;
773 q_vector = adapter->q_vector[v_idx];
774 netif_napi_del(&q_vector->napi);
776 adapter->q_vector[v_idx] = NULL;
781 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
782 int ring_idx, int v_idx)
784 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
786 q_vector->rx_ring = adapter->rx_ring[ring_idx];
787 q_vector->rx_ring->q_vector = q_vector;
788 q_vector->itr_val = adapter->rx_itr_setting;
789 if (q_vector->itr_val && q_vector->itr_val <= 3)
790 q_vector->itr_val = IGB_START_ITR;
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794 int ring_idx, int v_idx)
796 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
798 q_vector->tx_ring = adapter->tx_ring[ring_idx];
799 q_vector->tx_ring->q_vector = q_vector;
800 q_vector->itr_val = adapter->tx_itr_setting;
801 if (q_vector->itr_val && q_vector->itr_val <= 3)
802 q_vector->itr_val = IGB_START_ITR;
806 * igb_map_ring_to_vector - maps allocated queues to vectors
808 * This function maps the recently allocated queues to vectors.
810 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
815 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
816 (adapter->num_q_vectors < adapter->num_tx_queues))
819 if (adapter->num_q_vectors >=
820 (adapter->num_rx_queues + adapter->num_tx_queues)) {
821 for (i = 0; i < adapter->num_rx_queues; i++)
822 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
823 for (i = 0; i < adapter->num_tx_queues; i++)
824 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
826 for (i = 0; i < adapter->num_rx_queues; i++) {
827 if (i < adapter->num_tx_queues)
828 igb_map_tx_ring_to_vector(adapter, i, v_idx);
829 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
831 for (; i < adapter->num_tx_queues; i++)
832 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
838 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
840 * This function initializes the interrupts and allocates all of the queues.
842 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
844 struct pci_dev *pdev = adapter->pdev;
847 igb_set_interrupt_capability(adapter);
849 err = igb_alloc_q_vectors(adapter);
851 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
852 goto err_alloc_q_vectors;
855 err = igb_alloc_queues(adapter);
857 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
858 goto err_alloc_queues;
861 err = igb_map_ring_to_vector(adapter);
863 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
870 igb_free_queues(adapter);
872 igb_free_q_vectors(adapter);
874 igb_reset_interrupt_capability(adapter);
879 * igb_request_irq - initialize interrupts
881 * Attempts to configure interrupts using the best available
882 * capabilities of the hardware and kernel.
884 static int igb_request_irq(struct igb_adapter *adapter)
886 struct net_device *netdev = adapter->netdev;
887 struct pci_dev *pdev = adapter->pdev;
890 if (adapter->msix_entries) {
891 err = igb_request_msix(adapter);
894 /* fall back to MSI */
895 igb_clear_interrupt_scheme(adapter);
896 if (!pci_enable_msi(adapter->pdev))
897 adapter->flags |= IGB_FLAG_HAS_MSI;
898 igb_free_all_tx_resources(adapter);
899 igb_free_all_rx_resources(adapter);
900 adapter->num_tx_queues = 1;
901 adapter->num_rx_queues = 1;
902 adapter->num_q_vectors = 1;
903 err = igb_alloc_q_vectors(adapter);
906 "Unable to allocate memory for vectors\n");
909 err = igb_alloc_queues(adapter);
912 "Unable to allocate memory for queues\n");
913 igb_free_q_vectors(adapter);
916 igb_setup_all_tx_resources(adapter);
917 igb_setup_all_rx_resources(adapter);
919 igb_assign_vector(adapter->q_vector[0], 0);
922 if (adapter->flags & IGB_FLAG_HAS_MSI) {
923 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
924 netdev->name, adapter);
928 /* fall back to legacy interrupts */
929 igb_reset_interrupt_capability(adapter);
930 adapter->flags &= ~IGB_FLAG_HAS_MSI;
933 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
934 netdev->name, adapter);
937 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
944 static void igb_free_irq(struct igb_adapter *adapter)
946 if (adapter->msix_entries) {
949 free_irq(adapter->msix_entries[vector++].vector, adapter);
951 for (i = 0; i < adapter->num_q_vectors; i++) {
952 struct igb_q_vector *q_vector = adapter->q_vector[i];
953 free_irq(adapter->msix_entries[vector++].vector,
957 free_irq(adapter->pdev->irq, adapter);
962 * igb_irq_disable - Mask off interrupt generation on the NIC
963 * @adapter: board private structure
965 static void igb_irq_disable(struct igb_adapter *adapter)
967 struct e1000_hw *hw = &adapter->hw;
970 * we need to be careful when disabling interrupts. The VFs are also
971 * mapped into these registers and so clearing the bits can cause
972 * issues on the VF drivers so we only need to clear what we set
974 if (adapter->msix_entries) {
975 u32 regval = rd32(E1000_EIAM);
976 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
977 wr32(E1000_EIMC, adapter->eims_enable_mask);
978 regval = rd32(E1000_EIAC);
979 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
985 synchronize_irq(adapter->pdev->irq);
989 * igb_irq_enable - Enable default interrupt generation settings
990 * @adapter: board private structure
992 static void igb_irq_enable(struct igb_adapter *adapter)
994 struct e1000_hw *hw = &adapter->hw;
996 if (adapter->msix_entries) {
997 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
998 u32 regval = rd32(E1000_EIAC);
999 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1000 regval = rd32(E1000_EIAM);
1001 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1002 wr32(E1000_EIMS, adapter->eims_enable_mask);
1003 if (adapter->vfs_allocated_count) {
1004 wr32(E1000_MBVFIMR, 0xFF);
1005 ims |= E1000_IMS_VMMB;
1007 if (adapter->hw.mac.type == e1000_82580)
1008 ims |= E1000_IMS_DRSTA;
1010 wr32(E1000_IMS, ims);
1012 wr32(E1000_IMS, IMS_ENABLE_MASK |
1014 wr32(E1000_IAM, IMS_ENABLE_MASK |
1019 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1021 struct e1000_hw *hw = &adapter->hw;
1022 u16 vid = adapter->hw.mng_cookie.vlan_id;
1023 u16 old_vid = adapter->mng_vlan_id;
1025 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1026 /* add VID to filter table */
1027 igb_vfta_set(hw, vid, true);
1028 adapter->mng_vlan_id = vid;
1030 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1033 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1035 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1036 /* remove VID from filter table */
1037 igb_vfta_set(hw, old_vid, false);
1042 * igb_release_hw_control - release control of the h/w to f/w
1043 * @adapter: address of board private structure
1045 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1046 * For ASF and Pass Through versions of f/w this means that the
1047 * driver is no longer loaded.
1050 static void igb_release_hw_control(struct igb_adapter *adapter)
1052 struct e1000_hw *hw = &adapter->hw;
1055 /* Let firmware take over control of h/w */
1056 ctrl_ext = rd32(E1000_CTRL_EXT);
1057 wr32(E1000_CTRL_EXT,
1058 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1062 * igb_get_hw_control - get control of the h/w from f/w
1063 * @adapter: address of board private structure
1065 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1066 * For ASF and Pass Through versions of f/w this means that
1067 * the driver is loaded.
1070 static void igb_get_hw_control(struct igb_adapter *adapter)
1072 struct e1000_hw *hw = &adapter->hw;
1075 /* Let firmware know the driver has taken over */
1076 ctrl_ext = rd32(E1000_CTRL_EXT);
1077 wr32(E1000_CTRL_EXT,
1078 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1082 * igb_configure - configure the hardware for RX and TX
1083 * @adapter: private board structure
1085 static void igb_configure(struct igb_adapter *adapter)
1087 struct net_device *netdev = adapter->netdev;
1090 igb_get_hw_control(adapter);
1091 igb_set_rx_mode(netdev);
1093 igb_restore_vlan(adapter);
1095 igb_setup_tctl(adapter);
1096 igb_setup_mrqc(adapter);
1097 igb_setup_rctl(adapter);
1099 igb_configure_tx(adapter);
1100 igb_configure_rx(adapter);
1102 igb_rx_fifo_flush_82575(&adapter->hw);
1104 /* call igb_desc_unused which always leaves
1105 * at least 1 descriptor unused to make sure
1106 * next_to_use != next_to_clean */
1107 for (i = 0; i < adapter->num_rx_queues; i++) {
1108 struct igb_ring *ring = adapter->rx_ring[i];
1109 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1113 adapter->tx_queue_len = netdev->tx_queue_len;
1117 * igb_power_up_link - Power up the phy/serdes link
1118 * @adapter: address of board private structure
1120 void igb_power_up_link(struct igb_adapter *adapter)
1122 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1123 igb_power_up_phy_copper(&adapter->hw);
1125 igb_power_up_serdes_link_82575(&adapter->hw);
1129 * igb_power_down_link - Power down the phy/serdes link
1130 * @adapter: address of board private structure
1132 static void igb_power_down_link(struct igb_adapter *adapter)
1134 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1135 igb_power_down_phy_copper_82575(&adapter->hw);
1137 igb_shutdown_serdes_link_82575(&adapter->hw);
1141 * igb_up - Open the interface and prepare it to handle traffic
1142 * @adapter: board private structure
1144 int igb_up(struct igb_adapter *adapter)
1146 struct e1000_hw *hw = &adapter->hw;
1149 /* hardware has been reset, we need to reload some things */
1150 igb_configure(adapter);
1152 clear_bit(__IGB_DOWN, &adapter->state);
1154 for (i = 0; i < adapter->num_q_vectors; i++) {
1155 struct igb_q_vector *q_vector = adapter->q_vector[i];
1156 napi_enable(&q_vector->napi);
1158 if (adapter->msix_entries)
1159 igb_configure_msix(adapter);
1161 igb_assign_vector(adapter->q_vector[0], 0);
1163 /* Clear any pending interrupts. */
1165 igb_irq_enable(adapter);
1167 /* notify VFs that reset has been completed */
1168 if (adapter->vfs_allocated_count) {
1169 u32 reg_data = rd32(E1000_CTRL_EXT);
1170 reg_data |= E1000_CTRL_EXT_PFRSTD;
1171 wr32(E1000_CTRL_EXT, reg_data);
1174 netif_tx_start_all_queues(adapter->netdev);
1176 /* start the watchdog. */
1177 hw->mac.get_link_status = 1;
1178 schedule_work(&adapter->watchdog_task);
1183 void igb_down(struct igb_adapter *adapter)
1185 struct net_device *netdev = adapter->netdev;
1186 struct e1000_hw *hw = &adapter->hw;
1190 /* signal that we're down so the interrupt handler does not
1191 * reschedule our watchdog timer */
1192 set_bit(__IGB_DOWN, &adapter->state);
1194 /* disable receives in the hardware */
1195 rctl = rd32(E1000_RCTL);
1196 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1197 /* flush and sleep below */
1199 netif_tx_stop_all_queues(netdev);
1201 /* disable transmits in the hardware */
1202 tctl = rd32(E1000_TCTL);
1203 tctl &= ~E1000_TCTL_EN;
1204 wr32(E1000_TCTL, tctl);
1205 /* flush both disables and wait for them to finish */
1209 for (i = 0; i < adapter->num_q_vectors; i++) {
1210 struct igb_q_vector *q_vector = adapter->q_vector[i];
1211 napi_disable(&q_vector->napi);
1214 igb_irq_disable(adapter);
1216 del_timer_sync(&adapter->watchdog_timer);
1217 del_timer_sync(&adapter->phy_info_timer);
1219 netdev->tx_queue_len = adapter->tx_queue_len;
1220 netif_carrier_off(netdev);
1222 /* record the stats before reset*/
1223 igb_update_stats(adapter);
1225 adapter->link_speed = 0;
1226 adapter->link_duplex = 0;
1228 if (!pci_channel_offline(adapter->pdev))
1230 igb_clean_all_tx_rings(adapter);
1231 igb_clean_all_rx_rings(adapter);
1232 #ifdef CONFIG_IGB_DCA
1234 /* since we reset the hardware DCA settings were cleared */
1235 igb_setup_dca(adapter);
1239 void igb_reinit_locked(struct igb_adapter *adapter)
1241 WARN_ON(in_interrupt());
1242 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1246 clear_bit(__IGB_RESETTING, &adapter->state);
1249 void igb_reset(struct igb_adapter *adapter)
1251 struct pci_dev *pdev = adapter->pdev;
1252 struct e1000_hw *hw = &adapter->hw;
1253 struct e1000_mac_info *mac = &hw->mac;
1254 struct e1000_fc_info *fc = &hw->fc;
1255 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1258 /* Repartition Pba for greater than 9k mtu
1259 * To take effect CTRL.RST is required.
1261 switch (mac->type) {
1263 pba = rd32(E1000_RXPBS);
1264 pba = igb_rxpbs_adjust_82580(pba);
1267 pba = rd32(E1000_RXPBS);
1268 pba &= E1000_RXPBS_SIZE_MASK_82576;
1272 pba = E1000_PBA_34K;
1276 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1277 (mac->type < e1000_82576)) {
1278 /* adjust PBA for jumbo frames */
1279 wr32(E1000_PBA, pba);
1281 /* To maintain wire speed transmits, the Tx FIFO should be
1282 * large enough to accommodate two full transmit packets,
1283 * rounded up to the next 1KB and expressed in KB. Likewise,
1284 * the Rx FIFO should be large enough to accommodate at least
1285 * one full receive packet and is similarly rounded up and
1286 * expressed in KB. */
1287 pba = rd32(E1000_PBA);
1288 /* upper 16 bits has Tx packet buffer allocation size in KB */
1289 tx_space = pba >> 16;
1290 /* lower 16 bits has Rx packet buffer allocation size in KB */
1292 /* the tx fifo also stores 16 bytes of information about the tx
1293 * but don't include ethernet FCS because hardware appends it */
1294 min_tx_space = (adapter->max_frame_size +
1295 sizeof(union e1000_adv_tx_desc) -
1297 min_tx_space = ALIGN(min_tx_space, 1024);
1298 min_tx_space >>= 10;
1299 /* software strips receive CRC, so leave room for it */
1300 min_rx_space = adapter->max_frame_size;
1301 min_rx_space = ALIGN(min_rx_space, 1024);
1302 min_rx_space >>= 10;
1304 /* If current Tx allocation is less than the min Tx FIFO size,
1305 * and the min Tx FIFO size is less than the current Rx FIFO
1306 * allocation, take space away from current Rx allocation */
1307 if (tx_space < min_tx_space &&
1308 ((min_tx_space - tx_space) < pba)) {
1309 pba = pba - (min_tx_space - tx_space);
1311 /* if short on rx space, rx wins and must trump tx
1313 if (pba < min_rx_space)
1316 wr32(E1000_PBA, pba);
1319 /* flow control settings */
1320 /* The high water mark must be low enough to fit one full frame
1321 * (or the size used for early receive) above it in the Rx FIFO.
1322 * Set it to the lower of:
1323 * - 90% of the Rx FIFO size, or
1324 * - the full Rx FIFO size minus one full frame */
1325 hwm = min(((pba << 10) * 9 / 10),
1326 ((pba << 10) - 2 * adapter->max_frame_size));
1328 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1329 fc->low_water = fc->high_water - 16;
1330 fc->pause_time = 0xFFFF;
1332 fc->current_mode = fc->requested_mode;
1334 /* disable receive for all VFs and wait one second */
1335 if (adapter->vfs_allocated_count) {
1337 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1338 adapter->vf_data[i].flags = 0;
1340 /* ping all the active vfs to let them know we are going down */
1341 igb_ping_all_vfs(adapter);
1343 /* disable transmits and receives */
1344 wr32(E1000_VFRE, 0);
1345 wr32(E1000_VFTE, 0);
1348 /* Allow time for pending master requests to run */
1349 hw->mac.ops.reset_hw(hw);
1352 if (hw->mac.ops.init_hw(hw))
1353 dev_err(&pdev->dev, "Hardware Error\n");
1355 if (hw->mac.type == e1000_82580) {
1356 u32 reg = rd32(E1000_PCIEMISC);
1357 wr32(E1000_PCIEMISC,
1358 reg & ~E1000_PCIEMISC_LX_DECISION);
1360 if (!netif_running(adapter->netdev))
1361 igb_power_down_link(adapter);
1363 igb_update_mng_vlan(adapter);
1365 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1366 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1368 igb_get_phy_info(hw);
1371 static const struct net_device_ops igb_netdev_ops = {
1372 .ndo_open = igb_open,
1373 .ndo_stop = igb_close,
1374 .ndo_start_xmit = igb_xmit_frame_adv,
1375 .ndo_get_stats = igb_get_stats,
1376 .ndo_set_rx_mode = igb_set_rx_mode,
1377 .ndo_set_multicast_list = igb_set_rx_mode,
1378 .ndo_set_mac_address = igb_set_mac,
1379 .ndo_change_mtu = igb_change_mtu,
1380 .ndo_do_ioctl = igb_ioctl,
1381 .ndo_tx_timeout = igb_tx_timeout,
1382 .ndo_validate_addr = eth_validate_addr,
1383 .ndo_vlan_rx_register = igb_vlan_rx_register,
1384 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1385 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1386 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1387 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1388 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1389 .ndo_get_vf_config = igb_ndo_get_vf_config,
1390 #ifdef CONFIG_NET_POLL_CONTROLLER
1391 .ndo_poll_controller = igb_netpoll,
1396 * igb_probe - Device Initialization Routine
1397 * @pdev: PCI device information struct
1398 * @ent: entry in igb_pci_tbl
1400 * Returns 0 on success, negative on failure
1402 * igb_probe initializes an adapter identified by a pci_dev structure.
1403 * The OS initialization, configuring of the adapter private structure,
1404 * and a hardware reset occur.
1406 static int __devinit igb_probe(struct pci_dev *pdev,
1407 const struct pci_device_id *ent)
1409 struct net_device *netdev;
1410 struct igb_adapter *adapter;
1411 struct e1000_hw *hw;
1412 u16 eeprom_data = 0;
1413 static int global_quad_port_a; /* global quad port a indication */
1414 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1415 unsigned long mmio_start, mmio_len;
1416 int err, pci_using_dac;
1417 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1420 err = pci_enable_device_mem(pdev);
1425 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1427 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1431 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1433 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1435 dev_err(&pdev->dev, "No usable DMA "
1436 "configuration, aborting\n");
1442 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1448 pci_enable_pcie_error_reporting(pdev);
1450 pci_set_master(pdev);
1451 pci_save_state(pdev);
1454 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1455 IGB_ABS_MAX_TX_QUEUES);
1457 goto err_alloc_etherdev;
1459 SET_NETDEV_DEV(netdev, &pdev->dev);
1461 pci_set_drvdata(pdev, netdev);
1462 adapter = netdev_priv(netdev);
1463 adapter->netdev = netdev;
1464 adapter->pdev = pdev;
1467 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1469 mmio_start = pci_resource_start(pdev, 0);
1470 mmio_len = pci_resource_len(pdev, 0);
1473 hw->hw_addr = ioremap(mmio_start, mmio_len);
1477 netdev->netdev_ops = &igb_netdev_ops;
1478 igb_set_ethtool_ops(netdev);
1479 netdev->watchdog_timeo = 5 * HZ;
1481 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1483 netdev->mem_start = mmio_start;
1484 netdev->mem_end = mmio_start + mmio_len;
1486 /* PCI config space info */
1487 hw->vendor_id = pdev->vendor;
1488 hw->device_id = pdev->device;
1489 hw->revision_id = pdev->revision;
1490 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1491 hw->subsystem_device_id = pdev->subsystem_device;
1493 /* Copy the default MAC, PHY and NVM function pointers */
1494 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1495 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1496 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1497 /* Initialize skew-specific constants */
1498 err = ei->get_invariants(hw);
1502 /* setup the private structure */
1503 err = igb_sw_init(adapter);
1507 igb_get_bus_info_pcie(hw);
1509 hw->phy.autoneg_wait_to_complete = false;
1511 /* Copper options */
1512 if (hw->phy.media_type == e1000_media_type_copper) {
1513 hw->phy.mdix = AUTO_ALL_MODES;
1514 hw->phy.disable_polarity_correction = false;
1515 hw->phy.ms_type = e1000_ms_hw_default;
1518 if (igb_check_reset_block(hw))
1519 dev_info(&pdev->dev,
1520 "PHY reset is blocked due to SOL/IDER session.\n");
1522 netdev->features = NETIF_F_SG |
1524 NETIF_F_HW_VLAN_TX |
1525 NETIF_F_HW_VLAN_RX |
1526 NETIF_F_HW_VLAN_FILTER;
1528 netdev->features |= NETIF_F_IPV6_CSUM;
1529 netdev->features |= NETIF_F_TSO;
1530 netdev->features |= NETIF_F_TSO6;
1531 netdev->features |= NETIF_F_GRO;
1533 netdev->vlan_features |= NETIF_F_TSO;
1534 netdev->vlan_features |= NETIF_F_TSO6;
1535 netdev->vlan_features |= NETIF_F_IP_CSUM;
1536 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1537 netdev->vlan_features |= NETIF_F_SG;
1540 netdev->features |= NETIF_F_HIGHDMA;
1542 if (hw->mac.type >= e1000_82576)
1543 netdev->features |= NETIF_F_SCTP_CSUM;
1545 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1547 /* before reading the NVM, reset the controller to put the device in a
1548 * known good starting state */
1549 hw->mac.ops.reset_hw(hw);
1551 /* make sure the NVM is good */
1552 if (igb_validate_nvm_checksum(hw) < 0) {
1553 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1558 /* copy the MAC address out of the NVM */
1559 if (hw->mac.ops.read_mac_addr(hw))
1560 dev_err(&pdev->dev, "NVM Read Error\n");
1562 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1563 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1565 if (!is_valid_ether_addr(netdev->perm_addr)) {
1566 dev_err(&pdev->dev, "Invalid MAC Address\n");
1571 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1572 (unsigned long) adapter);
1573 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1574 (unsigned long) adapter);
1576 INIT_WORK(&adapter->reset_task, igb_reset_task);
1577 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1579 /* Initialize link properties that are user-changeable */
1580 adapter->fc_autoneg = true;
1581 hw->mac.autoneg = true;
1582 hw->phy.autoneg_advertised = 0x2f;
1584 hw->fc.requested_mode = e1000_fc_default;
1585 hw->fc.current_mode = e1000_fc_default;
1587 igb_validate_mdi_setting(hw);
1589 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1590 * enable the ACPI Magic Packet filter
1593 if (hw->bus.func == 0)
1594 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1595 else if (hw->mac.type == e1000_82580)
1596 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1597 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1599 else if (hw->bus.func == 1)
1600 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1602 if (eeprom_data & eeprom_apme_mask)
1603 adapter->eeprom_wol |= E1000_WUFC_MAG;
1605 /* now that we have the eeprom settings, apply the special cases where
1606 * the eeprom may be wrong or the board simply won't support wake on
1607 * lan on a particular port */
1608 switch (pdev->device) {
1609 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1610 adapter->eeprom_wol = 0;
1612 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1613 case E1000_DEV_ID_82576_FIBER:
1614 case E1000_DEV_ID_82576_SERDES:
1615 /* Wake events only supported on port A for dual fiber
1616 * regardless of eeprom setting */
1617 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1618 adapter->eeprom_wol = 0;
1620 case E1000_DEV_ID_82576_QUAD_COPPER:
1621 /* if quad port adapter, disable WoL on all but port A */
1622 if (global_quad_port_a != 0)
1623 adapter->eeprom_wol = 0;
1625 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1626 /* Reset for multiple quad port adapters */
1627 if (++global_quad_port_a == 4)
1628 global_quad_port_a = 0;
1632 /* initialize the wol settings based on the eeprom settings */
1633 adapter->wol = adapter->eeprom_wol;
1634 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1636 /* reset the hardware with the new settings */
1639 /* let the f/w know that the h/w is now under the control of the
1641 igb_get_hw_control(adapter);
1643 strcpy(netdev->name, "eth%d");
1644 err = register_netdev(netdev);
1648 /* carrier off reporting is important to ethtool even BEFORE open */
1649 netif_carrier_off(netdev);
1651 #ifdef CONFIG_IGB_DCA
1652 if (dca_add_requester(&pdev->dev) == 0) {
1653 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1654 dev_info(&pdev->dev, "DCA enabled\n");
1655 igb_setup_dca(adapter);
1659 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1660 /* print bus type/speed/width info */
1661 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1663 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1665 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1666 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1667 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1671 igb_read_part_num(hw, &part_num);
1672 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1673 (part_num >> 8), (part_num & 0xff));
1675 dev_info(&pdev->dev,
1676 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1677 adapter->msix_entries ? "MSI-X" :
1678 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1679 adapter->num_rx_queues, adapter->num_tx_queues);
1684 igb_release_hw_control(adapter);
1686 if (!igb_check_reset_block(hw))
1689 if (hw->flash_address)
1690 iounmap(hw->flash_address);
1692 igb_clear_interrupt_scheme(adapter);
1693 iounmap(hw->hw_addr);
1695 free_netdev(netdev);
1697 pci_release_selected_regions(pdev,
1698 pci_select_bars(pdev, IORESOURCE_MEM));
1701 pci_disable_device(pdev);
1706 * igb_remove - Device Removal Routine
1707 * @pdev: PCI device information struct
1709 * igb_remove is called by the PCI subsystem to alert the driver
1710 * that it should release a PCI device. The could be caused by a
1711 * Hot-Plug event, or because the driver is going to be removed from
1714 static void __devexit igb_remove(struct pci_dev *pdev)
1716 struct net_device *netdev = pci_get_drvdata(pdev);
1717 struct igb_adapter *adapter = netdev_priv(netdev);
1718 struct e1000_hw *hw = &adapter->hw;
1720 /* flush_scheduled work may reschedule our watchdog task, so
1721 * explicitly disable watchdog tasks from being rescheduled */
1722 set_bit(__IGB_DOWN, &adapter->state);
1723 del_timer_sync(&adapter->watchdog_timer);
1724 del_timer_sync(&adapter->phy_info_timer);
1726 flush_scheduled_work();
1728 #ifdef CONFIG_IGB_DCA
1729 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1730 dev_info(&pdev->dev, "DCA disabled\n");
1731 dca_remove_requester(&pdev->dev);
1732 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1733 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1737 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1738 * would have already happened in close and is redundant. */
1739 igb_release_hw_control(adapter);
1741 unregister_netdev(netdev);
1743 igb_clear_interrupt_scheme(adapter);
1745 #ifdef CONFIG_PCI_IOV
1746 /* reclaim resources allocated to VFs */
1747 if (adapter->vf_data) {
1748 /* disable iov and allow time for transactions to clear */
1749 pci_disable_sriov(pdev);
1752 kfree(adapter->vf_data);
1753 adapter->vf_data = NULL;
1754 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1756 dev_info(&pdev->dev, "IOV Disabled\n");
1760 iounmap(hw->hw_addr);
1761 if (hw->flash_address)
1762 iounmap(hw->flash_address);
1763 pci_release_selected_regions(pdev,
1764 pci_select_bars(pdev, IORESOURCE_MEM));
1766 free_netdev(netdev);
1768 pci_disable_pcie_error_reporting(pdev);
1770 pci_disable_device(pdev);
1774 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1775 * @adapter: board private structure to initialize
1777 * This function initializes the vf specific data storage and then attempts to
1778 * allocate the VFs. The reason for ordering it this way is because it is much
1779 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1780 * the memory for the VFs.
1782 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1784 #ifdef CONFIG_PCI_IOV
1785 struct pci_dev *pdev = adapter->pdev;
1787 if (adapter->vfs_allocated_count > 7)
1788 adapter->vfs_allocated_count = 7;
1790 if (adapter->vfs_allocated_count) {
1791 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1792 sizeof(struct vf_data_storage),
1794 /* if allocation failed then we do not support SR-IOV */
1795 if (!adapter->vf_data) {
1796 adapter->vfs_allocated_count = 0;
1797 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1802 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1803 kfree(adapter->vf_data);
1804 adapter->vf_data = NULL;
1805 #endif /* CONFIG_PCI_IOV */
1806 adapter->vfs_allocated_count = 0;
1807 #ifdef CONFIG_PCI_IOV
1809 unsigned char mac_addr[ETH_ALEN];
1811 dev_info(&pdev->dev, "%d vfs allocated\n",
1812 adapter->vfs_allocated_count);
1813 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1814 random_ether_addr(mac_addr);
1815 igb_set_vf_mac(adapter, i, mac_addr);
1818 #endif /* CONFIG_PCI_IOV */
1823 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1824 * @adapter: board private structure to initialize
1826 * igb_init_hw_timer initializes the function pointer and values for the hw
1827 * timer found in hardware.
1829 static void igb_init_hw_timer(struct igb_adapter *adapter)
1831 struct e1000_hw *hw = &adapter->hw;
1833 switch (hw->mac.type) {
1835 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1836 adapter->cycles.read = igb_read_clock;
1837 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1838 adapter->cycles.mult = 1;
1840 * The 82580 timesync updates the system timer every 8ns by 8ns
1841 * and the value cannot be shifted. Instead we need to shift
1842 * the registers to generate a 64bit timer value. As a result
1843 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1844 * 24 in order to generate a larger value for synchronization.
1846 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1847 /* disable system timer temporarily by setting bit 31 */
1848 wr32(E1000_TSAUXC, 0x80000000);
1851 /* Set registers so that rollover occurs soon to test this. */
1852 wr32(E1000_SYSTIMR, 0x00000000);
1853 wr32(E1000_SYSTIML, 0x80000000);
1854 wr32(E1000_SYSTIMH, 0x000000FF);
1857 /* enable system timer by clearing bit 31 */
1858 wr32(E1000_TSAUXC, 0x0);
1861 timecounter_init(&adapter->clock,
1863 ktime_to_ns(ktime_get_real()));
1865 * Synchronize our NIC clock against system wall clock. NIC
1866 * time stamp reading requires ~3us per sample, each sample
1867 * was pretty stable even under load => only require 10
1868 * samples for each offset comparison.
1870 memset(&adapter->compare, 0, sizeof(adapter->compare));
1871 adapter->compare.source = &adapter->clock;
1872 adapter->compare.target = ktime_get_real;
1873 adapter->compare.num_samples = 10;
1874 timecompare_update(&adapter->compare, 0);
1878 * Initialize hardware timer: we keep it running just in case
1879 * that some program needs it later on.
1881 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1882 adapter->cycles.read = igb_read_clock;
1883 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1884 adapter->cycles.mult = 1;
1886 * Scale the NIC clock cycle by a large factor so that
1887 * relatively small clock corrections can be added or
1888 * substracted at each clock tick. The drawbacks of a large
1889 * factor are a) that the clock register overflows more quickly
1890 * (not such a big deal) and b) that the increment per tick has
1891 * to fit into 24 bits. As a result we need to use a shift of
1892 * 19 so we can fit a value of 16 into the TIMINCA register.
1894 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1896 (1 << E1000_TIMINCA_16NS_SHIFT) |
1897 (16 << IGB_82576_TSYNC_SHIFT));
1899 /* Set registers so that rollover occurs soon to test this. */
1900 wr32(E1000_SYSTIML, 0x00000000);
1901 wr32(E1000_SYSTIMH, 0xFF800000);
1904 timecounter_init(&adapter->clock,
1906 ktime_to_ns(ktime_get_real()));
1908 * Synchronize our NIC clock against system wall clock. NIC
1909 * time stamp reading requires ~3us per sample, each sample
1910 * was pretty stable even under load => only require 10
1911 * samples for each offset comparison.
1913 memset(&adapter->compare, 0, sizeof(adapter->compare));
1914 adapter->compare.source = &adapter->clock;
1915 adapter->compare.target = ktime_get_real;
1916 adapter->compare.num_samples = 10;
1917 timecompare_update(&adapter->compare, 0);
1920 /* 82575 does not support timesync */
1928 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1929 * @adapter: board private structure to initialize
1931 * igb_sw_init initializes the Adapter private data structure.
1932 * Fields are initialized based on PCI device information and
1933 * OS network device settings (MTU size).
1935 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1937 struct e1000_hw *hw = &adapter->hw;
1938 struct net_device *netdev = adapter->netdev;
1939 struct pci_dev *pdev = adapter->pdev;
1941 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1943 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1944 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1945 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1946 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1948 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1949 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1951 #ifdef CONFIG_PCI_IOV
1952 if (hw->mac.type == e1000_82576)
1953 adapter->vfs_allocated_count = max_vfs;
1955 #endif /* CONFIG_PCI_IOV */
1956 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1959 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1960 * then we should combine the queues into a queue pair in order to
1961 * conserve interrupts due to limited supply
1963 if ((adapter->rss_queues > 4) ||
1964 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1965 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1967 /* This call may decrease the number of queues */
1968 if (igb_init_interrupt_scheme(adapter)) {
1969 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1973 igb_init_hw_timer(adapter);
1974 igb_probe_vfs(adapter);
1976 /* Explicitly disable IRQ since the NIC can be in any state. */
1977 igb_irq_disable(adapter);
1979 set_bit(__IGB_DOWN, &adapter->state);
1984 * igb_open - Called when a network interface is made active
1985 * @netdev: network interface device structure
1987 * Returns 0 on success, negative value on failure
1989 * The open entry point is called when a network interface is made
1990 * active by the system (IFF_UP). At this point all resources needed
1991 * for transmit and receive operations are allocated, the interrupt
1992 * handler is registered with the OS, the watchdog timer is started,
1993 * and the stack is notified that the interface is ready.
1995 static int igb_open(struct net_device *netdev)
1997 struct igb_adapter *adapter = netdev_priv(netdev);
1998 struct e1000_hw *hw = &adapter->hw;
2002 /* disallow open during test */
2003 if (test_bit(__IGB_TESTING, &adapter->state))
2006 netif_carrier_off(netdev);
2008 /* allocate transmit descriptors */
2009 err = igb_setup_all_tx_resources(adapter);
2013 /* allocate receive descriptors */
2014 err = igb_setup_all_rx_resources(adapter);
2018 igb_power_up_link(adapter);
2020 /* before we allocate an interrupt, we must be ready to handle it.
2021 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2022 * as soon as we call pci_request_irq, so we have to setup our
2023 * clean_rx handler before we do so. */
2024 igb_configure(adapter);
2026 err = igb_request_irq(adapter);
2030 /* From here on the code is the same as igb_up() */
2031 clear_bit(__IGB_DOWN, &adapter->state);
2033 for (i = 0; i < adapter->num_q_vectors; i++) {
2034 struct igb_q_vector *q_vector = adapter->q_vector[i];
2035 napi_enable(&q_vector->napi);
2038 /* Clear any pending interrupts. */
2041 igb_irq_enable(adapter);
2043 /* notify VFs that reset has been completed */
2044 if (adapter->vfs_allocated_count) {
2045 u32 reg_data = rd32(E1000_CTRL_EXT);
2046 reg_data |= E1000_CTRL_EXT_PFRSTD;
2047 wr32(E1000_CTRL_EXT, reg_data);
2050 netif_tx_start_all_queues(netdev);
2052 /* start the watchdog. */
2053 hw->mac.get_link_status = 1;
2054 schedule_work(&adapter->watchdog_task);
2059 igb_release_hw_control(adapter);
2060 igb_power_down_link(adapter);
2061 igb_free_all_rx_resources(adapter);
2063 igb_free_all_tx_resources(adapter);
2071 * igb_close - Disables a network interface
2072 * @netdev: network interface device structure
2074 * Returns 0, this is not allowed to fail
2076 * The close entry point is called when an interface is de-activated
2077 * by the OS. The hardware is still under the driver's control, but
2078 * needs to be disabled. A global MAC reset is issued to stop the
2079 * hardware, and all transmit and receive resources are freed.
2081 static int igb_close(struct net_device *netdev)
2083 struct igb_adapter *adapter = netdev_priv(netdev);
2085 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2088 igb_free_irq(adapter);
2090 igb_free_all_tx_resources(adapter);
2091 igb_free_all_rx_resources(adapter);
2097 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2098 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2100 * Return 0 on success, negative on failure
2102 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2104 struct pci_dev *pdev = tx_ring->pdev;
2107 size = sizeof(struct igb_buffer) * tx_ring->count;
2108 tx_ring->buffer_info = vmalloc(size);
2109 if (!tx_ring->buffer_info)
2111 memset(tx_ring->buffer_info, 0, size);
2113 /* round up to nearest 4K */
2114 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2115 tx_ring->size = ALIGN(tx_ring->size, 4096);
2117 tx_ring->desc = pci_alloc_consistent(pdev,
2124 tx_ring->next_to_use = 0;
2125 tx_ring->next_to_clean = 0;
2129 vfree(tx_ring->buffer_info);
2131 "Unable to allocate memory for the transmit descriptor ring\n");
2136 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2137 * (Descriptors) for all queues
2138 * @adapter: board private structure
2140 * Return 0 on success, negative on failure
2142 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2144 struct pci_dev *pdev = adapter->pdev;
2147 for (i = 0; i < adapter->num_tx_queues; i++) {
2148 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2151 "Allocation for Tx Queue %u failed\n", i);
2152 for (i--; i >= 0; i--)
2153 igb_free_tx_resources(adapter->tx_ring[i]);
2158 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2159 int r_idx = i % adapter->num_tx_queues;
2160 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2166 * igb_setup_tctl - configure the transmit control registers
2167 * @adapter: Board private structure
2169 void igb_setup_tctl(struct igb_adapter *adapter)
2171 struct e1000_hw *hw = &adapter->hw;
2174 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2175 wr32(E1000_TXDCTL(0), 0);
2177 /* Program the Transmit Control Register */
2178 tctl = rd32(E1000_TCTL);
2179 tctl &= ~E1000_TCTL_CT;
2180 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2181 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2183 igb_config_collision_dist(hw);
2185 /* Enable transmits */
2186 tctl |= E1000_TCTL_EN;
2188 wr32(E1000_TCTL, tctl);
2192 * igb_configure_tx_ring - Configure transmit ring after Reset
2193 * @adapter: board private structure
2194 * @ring: tx ring to configure
2196 * Configure a transmit ring after a reset.
2198 void igb_configure_tx_ring(struct igb_adapter *adapter,
2199 struct igb_ring *ring)
2201 struct e1000_hw *hw = &adapter->hw;
2203 u64 tdba = ring->dma;
2204 int reg_idx = ring->reg_idx;
2206 /* disable the queue */
2207 txdctl = rd32(E1000_TXDCTL(reg_idx));
2208 wr32(E1000_TXDCTL(reg_idx),
2209 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2213 wr32(E1000_TDLEN(reg_idx),
2214 ring->count * sizeof(union e1000_adv_tx_desc));
2215 wr32(E1000_TDBAL(reg_idx),
2216 tdba & 0x00000000ffffffffULL);
2217 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2219 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2220 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2221 writel(0, ring->head);
2222 writel(0, ring->tail);
2224 txdctl |= IGB_TX_PTHRESH;
2225 txdctl |= IGB_TX_HTHRESH << 8;
2226 txdctl |= IGB_TX_WTHRESH << 16;
2228 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2229 wr32(E1000_TXDCTL(reg_idx), txdctl);
2233 * igb_configure_tx - Configure transmit Unit after Reset
2234 * @adapter: board private structure
2236 * Configure the Tx unit of the MAC after a reset.
2238 static void igb_configure_tx(struct igb_adapter *adapter)
2242 for (i = 0; i < adapter->num_tx_queues; i++)
2243 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2247 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2248 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2250 * Returns 0 on success, negative on failure
2252 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2254 struct pci_dev *pdev = rx_ring->pdev;
2257 size = sizeof(struct igb_buffer) * rx_ring->count;
2258 rx_ring->buffer_info = vmalloc(size);
2259 if (!rx_ring->buffer_info)
2261 memset(rx_ring->buffer_info, 0, size);
2263 desc_len = sizeof(union e1000_adv_rx_desc);
2265 /* Round up to nearest 4K */
2266 rx_ring->size = rx_ring->count * desc_len;
2267 rx_ring->size = ALIGN(rx_ring->size, 4096);
2269 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2275 rx_ring->next_to_clean = 0;
2276 rx_ring->next_to_use = 0;
2281 vfree(rx_ring->buffer_info);
2282 rx_ring->buffer_info = NULL;
2283 dev_err(&pdev->dev, "Unable to allocate memory for "
2284 "the receive descriptor ring\n");
2289 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2290 * (Descriptors) for all queues
2291 * @adapter: board private structure
2293 * Return 0 on success, negative on failure
2295 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2297 struct pci_dev *pdev = adapter->pdev;
2300 for (i = 0; i < adapter->num_rx_queues; i++) {
2301 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2304 "Allocation for Rx Queue %u failed\n", i);
2305 for (i--; i >= 0; i--)
2306 igb_free_rx_resources(adapter->rx_ring[i]);
2315 * igb_setup_mrqc - configure the multiple receive queue control registers
2316 * @adapter: Board private structure
2318 static void igb_setup_mrqc(struct igb_adapter *adapter)
2320 struct e1000_hw *hw = &adapter->hw;
2322 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2327 static const u8 rsshash[40] = {
2328 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2329 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2330 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2331 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2333 /* Fill out hash function seeds */
2334 for (j = 0; j < 10; j++) {
2335 u32 rsskey = rsshash[(j * 4)];
2336 rsskey |= rsshash[(j * 4) + 1] << 8;
2337 rsskey |= rsshash[(j * 4) + 2] << 16;
2338 rsskey |= rsshash[(j * 4) + 3] << 24;
2339 array_wr32(E1000_RSSRK(0), j, rsskey);
2342 num_rx_queues = adapter->rss_queues;
2344 if (adapter->vfs_allocated_count) {
2345 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2346 switch (hw->mac.type) {
2362 if (hw->mac.type == e1000_82575)
2366 for (j = 0; j < (32 * 4); j++) {
2367 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2369 reta.bytes[j & 3] |= num_rx_queues << shift2;
2371 wr32(E1000_RETA(j >> 2), reta.dword);
2375 * Disable raw packet checksumming so that RSS hash is placed in
2376 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2377 * offloads as they are enabled by default
2379 rxcsum = rd32(E1000_RXCSUM);
2380 rxcsum |= E1000_RXCSUM_PCSD;
2382 if (adapter->hw.mac.type >= e1000_82576)
2383 /* Enable Receive Checksum Offload for SCTP */
2384 rxcsum |= E1000_RXCSUM_CRCOFL;
2386 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2387 wr32(E1000_RXCSUM, rxcsum);
2389 /* If VMDq is enabled then we set the appropriate mode for that, else
2390 * we default to RSS so that an RSS hash is calculated per packet even
2391 * if we are only using one queue */
2392 if (adapter->vfs_allocated_count) {
2393 if (hw->mac.type > e1000_82575) {
2394 /* Set the default pool for the PF's first queue */
2395 u32 vtctl = rd32(E1000_VT_CTL);
2396 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2397 E1000_VT_CTL_DISABLE_DEF_POOL);
2398 vtctl |= adapter->vfs_allocated_count <<
2399 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2400 wr32(E1000_VT_CTL, vtctl);
2402 if (adapter->rss_queues > 1)
2403 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2405 mrqc = E1000_MRQC_ENABLE_VMDQ;
2407 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2409 igb_vmm_control(adapter);
2411 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2412 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2413 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2414 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2415 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2416 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2417 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2418 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2420 wr32(E1000_MRQC, mrqc);
2424 * igb_setup_rctl - configure the receive control registers
2425 * @adapter: Board private structure
2427 void igb_setup_rctl(struct igb_adapter *adapter)
2429 struct e1000_hw *hw = &adapter->hw;
2432 rctl = rd32(E1000_RCTL);
2434 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2435 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2437 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2438 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2441 * enable stripping of CRC. It's unlikely this will break BMC
2442 * redirection as it did with e1000. Newer features require
2443 * that the HW strips the CRC.
2445 rctl |= E1000_RCTL_SECRC;
2447 /* disable store bad packets and clear size bits. */
2448 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2450 /* enable LPE to prevent packets larger than max_frame_size */
2451 rctl |= E1000_RCTL_LPE;
2453 /* disable queue 0 to prevent tail write w/o re-config */
2454 wr32(E1000_RXDCTL(0), 0);
2456 /* Attention!!! For SR-IOV PF driver operations you must enable
2457 * queue drop for all VF and PF queues to prevent head of line blocking
2458 * if an un-trusted VF does not provide descriptors to hardware.
2460 if (adapter->vfs_allocated_count) {
2461 /* set all queue drop enable bits */
2462 wr32(E1000_QDE, ALL_QUEUES);
2465 wr32(E1000_RCTL, rctl);
2468 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2471 struct e1000_hw *hw = &adapter->hw;
2474 /* if it isn't the PF check to see if VFs are enabled and
2475 * increase the size to support vlan tags */
2476 if (vfn < adapter->vfs_allocated_count &&
2477 adapter->vf_data[vfn].vlans_enabled)
2478 size += VLAN_TAG_SIZE;
2480 vmolr = rd32(E1000_VMOLR(vfn));
2481 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2482 vmolr |= size | E1000_VMOLR_LPE;
2483 wr32(E1000_VMOLR(vfn), vmolr);
2489 * igb_rlpml_set - set maximum receive packet size
2490 * @adapter: board private structure
2492 * Configure maximum receivable packet size.
2494 static void igb_rlpml_set(struct igb_adapter *adapter)
2496 u32 max_frame_size = adapter->max_frame_size;
2497 struct e1000_hw *hw = &adapter->hw;
2498 u16 pf_id = adapter->vfs_allocated_count;
2501 max_frame_size += VLAN_TAG_SIZE;
2503 /* if vfs are enabled we set RLPML to the largest possible request
2504 * size and set the VMOLR RLPML to the size we need */
2506 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2507 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2510 wr32(E1000_RLPML, max_frame_size);
2513 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2516 struct e1000_hw *hw = &adapter->hw;
2520 * This register exists only on 82576 and newer so if we are older then
2521 * we should exit and do nothing
2523 if (hw->mac.type < e1000_82576)
2526 vmolr = rd32(E1000_VMOLR(vfn));
2527 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2529 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2531 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2533 /* clear all bits that might not be set */
2534 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2536 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2537 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2539 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2542 if (vfn <= adapter->vfs_allocated_count)
2543 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2545 wr32(E1000_VMOLR(vfn), vmolr);
2549 * igb_configure_rx_ring - Configure a receive ring after Reset
2550 * @adapter: board private structure
2551 * @ring: receive ring to be configured
2553 * Configure the Rx unit of the MAC after a reset.
2555 void igb_configure_rx_ring(struct igb_adapter *adapter,
2556 struct igb_ring *ring)
2558 struct e1000_hw *hw = &adapter->hw;
2559 u64 rdba = ring->dma;
2560 int reg_idx = ring->reg_idx;
2563 /* disable the queue */
2564 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2565 wr32(E1000_RXDCTL(reg_idx),
2566 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2568 /* Set DMA base address registers */
2569 wr32(E1000_RDBAL(reg_idx),
2570 rdba & 0x00000000ffffffffULL);
2571 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2572 wr32(E1000_RDLEN(reg_idx),
2573 ring->count * sizeof(union e1000_adv_rx_desc));
2575 /* initialize head and tail */
2576 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2577 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2578 writel(0, ring->head);
2579 writel(0, ring->tail);
2581 /* set descriptor configuration */
2582 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2583 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2584 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2585 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2586 srrctl |= IGB_RXBUFFER_16384 >>
2587 E1000_SRRCTL_BSIZEPKT_SHIFT;
2589 srrctl |= (PAGE_SIZE / 2) >>
2590 E1000_SRRCTL_BSIZEPKT_SHIFT;
2592 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2594 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2595 E1000_SRRCTL_BSIZEPKT_SHIFT;
2596 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2598 /* Only set Drop Enable if we are supporting multiple queues */
2599 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2600 srrctl |= E1000_SRRCTL_DROP_EN;
2602 wr32(E1000_SRRCTL(reg_idx), srrctl);
2604 /* set filtering for VMDQ pools */
2605 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2607 /* enable receive descriptor fetching */
2608 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610 rxdctl &= 0xFFF00000;
2611 rxdctl |= IGB_RX_PTHRESH;
2612 rxdctl |= IGB_RX_HTHRESH << 8;
2613 rxdctl |= IGB_RX_WTHRESH << 16;
2614 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2618 * igb_configure_rx - Configure receive Unit after Reset
2619 * @adapter: board private structure
2621 * Configure the Rx unit of the MAC after a reset.
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2627 /* set UTA to appropriate mode */
2628 igb_set_uta(adapter);
2630 /* set the correct pool for the PF default MAC address in entry 0 */
2631 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632 adapter->vfs_allocated_count);
2634 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635 * the Base and Length of the Rx Descriptor Ring */
2636 for (i = 0; i < adapter->num_rx_queues; i++)
2637 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2641 * igb_free_tx_resources - Free Tx Resources per Queue
2642 * @tx_ring: Tx descriptor ring for a specific queue
2644 * Free all transmit software resources
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2648 igb_clean_tx_ring(tx_ring);
2650 vfree(tx_ring->buffer_info);
2651 tx_ring->buffer_info = NULL;
2653 /* if not set, then don't free */
2657 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658 tx_ring->desc, tx_ring->dma);
2660 tx_ring->desc = NULL;
2664 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665 * @adapter: board private structure
2667 * Free all transmit software resources
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2673 for (i = 0; i < adapter->num_tx_queues; i++)
2674 igb_free_tx_resources(adapter->tx_ring[i]);
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678 struct igb_buffer *buffer_info)
2680 if (buffer_info->dma) {
2681 if (buffer_info->mapped_as_page)
2682 pci_unmap_page(tx_ring->pdev,
2684 buffer_info->length,
2687 pci_unmap_single(tx_ring->pdev,
2689 buffer_info->length,
2691 buffer_info->dma = 0;
2693 if (buffer_info->skb) {
2694 dev_kfree_skb_any(buffer_info->skb);
2695 buffer_info->skb = NULL;
2697 buffer_info->time_stamp = 0;
2698 buffer_info->length = 0;
2699 buffer_info->next_to_watch = 0;
2700 buffer_info->mapped_as_page = false;
2704 * igb_clean_tx_ring - Free Tx Buffers
2705 * @tx_ring: ring to be cleaned
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2709 struct igb_buffer *buffer_info;
2713 if (!tx_ring->buffer_info)
2715 /* Free all the Tx ring sk_buffs */
2717 for (i = 0; i < tx_ring->count; i++) {
2718 buffer_info = &tx_ring->buffer_info[i];
2719 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2722 size = sizeof(struct igb_buffer) * tx_ring->count;
2723 memset(tx_ring->buffer_info, 0, size);
2725 /* Zero out the descriptor ring */
2726 memset(tx_ring->desc, 0, tx_ring->size);
2728 tx_ring->next_to_use = 0;
2729 tx_ring->next_to_clean = 0;
2733 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734 * @adapter: board private structure
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2740 for (i = 0; i < adapter->num_tx_queues; i++)
2741 igb_clean_tx_ring(adapter->tx_ring[i]);
2745 * igb_free_rx_resources - Free Rx Resources
2746 * @rx_ring: ring to clean the resources from
2748 * Free all receive software resources
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2752 igb_clean_rx_ring(rx_ring);
2754 vfree(rx_ring->buffer_info);
2755 rx_ring->buffer_info = NULL;
2757 /* if not set, then don't free */
2761 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762 rx_ring->desc, rx_ring->dma);
2764 rx_ring->desc = NULL;
2768 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769 * @adapter: board private structure
2771 * Free all receive software resources
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2777 for (i = 0; i < adapter->num_rx_queues; i++)
2778 igb_free_rx_resources(adapter->rx_ring[i]);
2782 * igb_clean_rx_ring - Free Rx Buffers per Queue
2783 * @rx_ring: ring to free buffers from
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2787 struct igb_buffer *buffer_info;
2791 if (!rx_ring->buffer_info)
2794 /* Free all the Rx ring sk_buffs */
2795 for (i = 0; i < rx_ring->count; i++) {
2796 buffer_info = &rx_ring->buffer_info[i];
2797 if (buffer_info->dma) {
2798 pci_unmap_single(rx_ring->pdev,
2800 rx_ring->rx_buffer_len,
2801 PCI_DMA_FROMDEVICE);
2802 buffer_info->dma = 0;
2805 if (buffer_info->skb) {
2806 dev_kfree_skb(buffer_info->skb);
2807 buffer_info->skb = NULL;
2809 if (buffer_info->page_dma) {
2810 pci_unmap_page(rx_ring->pdev,
2811 buffer_info->page_dma,
2813 PCI_DMA_FROMDEVICE);
2814 buffer_info->page_dma = 0;
2816 if (buffer_info->page) {
2817 put_page(buffer_info->page);
2818 buffer_info->page = NULL;
2819 buffer_info->page_offset = 0;
2823 size = sizeof(struct igb_buffer) * rx_ring->count;
2824 memset(rx_ring->buffer_info, 0, size);
2826 /* Zero out the descriptor ring */
2827 memset(rx_ring->desc, 0, rx_ring->size);
2829 rx_ring->next_to_clean = 0;
2830 rx_ring->next_to_use = 0;
2834 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835 * @adapter: board private structure
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2841 for (i = 0; i < adapter->num_rx_queues; i++)
2842 igb_clean_rx_ring(adapter->rx_ring[i]);
2846 * igb_set_mac - Change the Ethernet Address of the NIC
2847 * @netdev: network interface device structure
2848 * @p: pointer to an address structure
2850 * Returns 0 on success, negative on failure
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2854 struct igb_adapter *adapter = netdev_priv(netdev);
2855 struct e1000_hw *hw = &adapter->hw;
2856 struct sockaddr *addr = p;
2858 if (!is_valid_ether_addr(addr->sa_data))
2859 return -EADDRNOTAVAIL;
2861 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2864 /* set the correct pool for the new PF MAC address in entry 0 */
2865 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866 adapter->vfs_allocated_count);
2872 * igb_write_mc_addr_list - write multicast addresses to MTA
2873 * @netdev: network interface device structure
2875 * Writes multicast address list to the MTA hash table.
2876 * Returns: -ENOMEM on failure
2877 * 0 on no addresses written
2878 * X on writing X addresses to MTA
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2882 struct igb_adapter *adapter = netdev_priv(netdev);
2883 struct e1000_hw *hw = &adapter->hw;
2884 struct dev_mc_list *mc_ptr = netdev->mc_list;
2888 if (netdev_mc_empty(netdev)) {
2889 /* nothing to program, so clear mc list */
2890 igb_update_mc_addr_list(hw, NULL, 0);
2891 igb_restore_vf_multicasts(adapter);
2895 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2899 /* The shared function expects a packed array of only addresses. */
2900 mc_ptr = netdev->mc_list;
2902 for (i = 0; i < netdev_mc_count(netdev); i++) {
2905 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2906 mc_ptr = mc_ptr->next;
2908 igb_update_mc_addr_list(hw, mta_list, i);
2911 return netdev_mc_count(netdev);
2915 * igb_write_uc_addr_list - write unicast addresses to RAR table
2916 * @netdev: network interface device structure
2918 * Writes unicast address list to the RAR table.
2919 * Returns: -ENOMEM on failure/insufficient address space
2920 * 0 on no addresses written
2921 * X on writing X addresses to the RAR table
2923 static int igb_write_uc_addr_list(struct net_device *netdev)
2925 struct igb_adapter *adapter = netdev_priv(netdev);
2926 struct e1000_hw *hw = &adapter->hw;
2927 unsigned int vfn = adapter->vfs_allocated_count;
2928 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2931 /* return ENOMEM indicating insufficient memory for addresses */
2932 if (netdev_uc_count(netdev) > rar_entries)
2935 if (!netdev_uc_empty(netdev) && rar_entries) {
2936 struct netdev_hw_addr *ha;
2938 netdev_for_each_uc_addr(ha, netdev) {
2941 igb_rar_set_qsel(adapter, ha->addr,
2947 /* write the addresses in reverse order to avoid write combining */
2948 for (; rar_entries > 0 ; rar_entries--) {
2949 wr32(E1000_RAH(rar_entries), 0);
2950 wr32(E1000_RAL(rar_entries), 0);
2958 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2959 * @netdev: network interface device structure
2961 * The set_rx_mode entry point is called whenever the unicast or multicast
2962 * address lists or the network interface flags are updated. This routine is
2963 * responsible for configuring the hardware for proper unicast, multicast,
2964 * promiscuous mode, and all-multi behavior.
2966 static void igb_set_rx_mode(struct net_device *netdev)
2968 struct igb_adapter *adapter = netdev_priv(netdev);
2969 struct e1000_hw *hw = &adapter->hw;
2970 unsigned int vfn = adapter->vfs_allocated_count;
2971 u32 rctl, vmolr = 0;
2974 /* Check for Promiscuous and All Multicast modes */
2975 rctl = rd32(E1000_RCTL);
2977 /* clear the effected bits */
2978 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2980 if (netdev->flags & IFF_PROMISC) {
2981 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2982 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2984 if (netdev->flags & IFF_ALLMULTI) {
2985 rctl |= E1000_RCTL_MPE;
2986 vmolr |= E1000_VMOLR_MPME;
2989 * Write addresses to the MTA, if the attempt fails
2990 * then we should just turn on promiscous mode so
2991 * that we can at least receive multicast traffic
2993 count = igb_write_mc_addr_list(netdev);
2995 rctl |= E1000_RCTL_MPE;
2996 vmolr |= E1000_VMOLR_MPME;
2998 vmolr |= E1000_VMOLR_ROMPE;
3002 * Write addresses to available RAR registers, if there is not
3003 * sufficient space to store all the addresses then enable
3004 * unicast promiscous mode
3006 count = igb_write_uc_addr_list(netdev);
3008 rctl |= E1000_RCTL_UPE;
3009 vmolr |= E1000_VMOLR_ROPE;
3011 rctl |= E1000_RCTL_VFE;
3013 wr32(E1000_RCTL, rctl);
3016 * In order to support SR-IOV and eventually VMDq it is necessary to set
3017 * the VMOLR to enable the appropriate modes. Without this workaround
3018 * we will have issues with VLAN tag stripping not being done for frames
3019 * that are only arriving because we are the default pool
3021 if (hw->mac.type < e1000_82576)
3024 vmolr |= rd32(E1000_VMOLR(vfn)) &
3025 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3026 wr32(E1000_VMOLR(vfn), vmolr);
3027 igb_restore_vf_multicasts(adapter);
3030 /* Need to wait a few seconds after link up to get diagnostic information from
3032 static void igb_update_phy_info(unsigned long data)
3034 struct igb_adapter *adapter = (struct igb_adapter *) data;
3035 igb_get_phy_info(&adapter->hw);
3039 * igb_has_link - check shared code for link and determine up/down
3040 * @adapter: pointer to driver private info
3042 bool igb_has_link(struct igb_adapter *adapter)
3044 struct e1000_hw *hw = &adapter->hw;
3045 bool link_active = false;
3048 /* get_link_status is set on LSC (link status) interrupt or
3049 * rx sequence error interrupt. get_link_status will stay
3050 * false until the e1000_check_for_link establishes link
3051 * for copper adapters ONLY
3053 switch (hw->phy.media_type) {
3054 case e1000_media_type_copper:
3055 if (hw->mac.get_link_status) {
3056 ret_val = hw->mac.ops.check_for_link(hw);
3057 link_active = !hw->mac.get_link_status;
3062 case e1000_media_type_internal_serdes:
3063 ret_val = hw->mac.ops.check_for_link(hw);
3064 link_active = hw->mac.serdes_has_link;
3067 case e1000_media_type_unknown:
3075 * igb_watchdog - Timer Call-back
3076 * @data: pointer to adapter cast into an unsigned long
3078 static void igb_watchdog(unsigned long data)
3080 struct igb_adapter *adapter = (struct igb_adapter *)data;
3081 /* Do the rest outside of interrupt context */
3082 schedule_work(&adapter->watchdog_task);
3085 static void igb_watchdog_task(struct work_struct *work)
3087 struct igb_adapter *adapter = container_of(work,
3090 struct e1000_hw *hw = &adapter->hw;
3091 struct net_device *netdev = adapter->netdev;
3095 link = igb_has_link(adapter);
3097 if (!netif_carrier_ok(netdev)) {
3099 hw->mac.ops.get_speed_and_duplex(hw,
3100 &adapter->link_speed,
3101 &adapter->link_duplex);
3103 ctrl = rd32(E1000_CTRL);
3104 /* Links status message must follow this format */
3105 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3106 "Flow Control: %s\n",
3108 adapter->link_speed,
3109 adapter->link_duplex == FULL_DUPLEX ?
3110 "Full Duplex" : "Half Duplex",
3111 ((ctrl & E1000_CTRL_TFCE) &&
3112 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3113 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3114 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3116 /* tweak tx_queue_len according to speed/duplex and
3117 * adjust the timeout factor */
3118 netdev->tx_queue_len = adapter->tx_queue_len;
3119 adapter->tx_timeout_factor = 1;
3120 switch (adapter->link_speed) {
3122 netdev->tx_queue_len = 10;
3123 adapter->tx_timeout_factor = 14;
3126 netdev->tx_queue_len = 100;
3127 /* maybe add some timeout factor ? */
3131 netif_carrier_on(netdev);
3133 igb_ping_all_vfs(adapter);
3135 /* link state has changed, schedule phy info update */
3136 if (!test_bit(__IGB_DOWN, &adapter->state))
3137 mod_timer(&adapter->phy_info_timer,
3138 round_jiffies(jiffies + 2 * HZ));
3141 if (netif_carrier_ok(netdev)) {
3142 adapter->link_speed = 0;
3143 adapter->link_duplex = 0;
3144 /* Links status message must follow this format */
3145 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3147 netif_carrier_off(netdev);
3149 igb_ping_all_vfs(adapter);
3151 /* link state has changed, schedule phy info update */
3152 if (!test_bit(__IGB_DOWN, &adapter->state))
3153 mod_timer(&adapter->phy_info_timer,
3154 round_jiffies(jiffies + 2 * HZ));
3158 igb_update_stats(adapter);
3160 for (i = 0; i < adapter->num_tx_queues; i++) {
3161 struct igb_ring *tx_ring = adapter->tx_ring[i];
3162 if (!netif_carrier_ok(netdev)) {
3163 /* We've lost link, so the controller stops DMA,
3164 * but we've got queued Tx work that's never going
3165 * to get done, so reset controller to flush Tx.
3166 * (Do the reset outside of interrupt context). */
3167 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3168 adapter->tx_timeout_count++;
3169 schedule_work(&adapter->reset_task);
3170 /* return immediately since reset is imminent */
3175 /* Force detection of hung controller every watchdog period */
3176 tx_ring->detect_tx_hung = true;
3179 /* Cause software interrupt to ensure rx ring is cleaned */
3180 if (adapter->msix_entries) {
3182 for (i = 0; i < adapter->num_q_vectors; i++) {
3183 struct igb_q_vector *q_vector = adapter->q_vector[i];
3184 eics |= q_vector->eims_value;
3186 wr32(E1000_EICS, eics);
3188 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3191 /* Reset the timer */
3192 if (!test_bit(__IGB_DOWN, &adapter->state))
3193 mod_timer(&adapter->watchdog_timer,
3194 round_jiffies(jiffies + 2 * HZ));
3197 enum latency_range {
3201 latency_invalid = 255
3205 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3207 * Stores a new ITR value based on strictly on packet size. This
3208 * algorithm is less sophisticated than that used in igb_update_itr,
3209 * due to the difficulty of synchronizing statistics across multiple
3210 * receive rings. The divisors and thresholds used by this fuction
3211 * were determined based on theoretical maximum wire speed and testing
3212 * data, in order to minimize response time while increasing bulk
3214 * This functionality is controlled by the InterruptThrottleRate module
3215 * parameter (see igb_param.c)
3216 * NOTE: This function is called only when operating in a multiqueue
3217 * receive environment.
3218 * @q_vector: pointer to q_vector
3220 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3222 int new_val = q_vector->itr_val;
3223 int avg_wire_size = 0;
3224 struct igb_adapter *adapter = q_vector->adapter;
3226 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3227 * ints/sec - ITR timer value of 120 ticks.
3229 if (adapter->link_speed != SPEED_1000) {
3234 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3235 struct igb_ring *ring = q_vector->rx_ring;
3236 avg_wire_size = ring->total_bytes / ring->total_packets;
3239 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3240 struct igb_ring *ring = q_vector->tx_ring;
3241 avg_wire_size = max_t(u32, avg_wire_size,
3242 (ring->total_bytes /
3243 ring->total_packets));
3246 /* if avg_wire_size isn't set no work was done */
3250 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3251 avg_wire_size += 24;
3253 /* Don't starve jumbo frames */
3254 avg_wire_size = min(avg_wire_size, 3000);
3256 /* Give a little boost to mid-size frames */
3257 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3258 new_val = avg_wire_size / 3;
3260 new_val = avg_wire_size / 2;
3262 /* when in itr mode 3 do not exceed 20K ints/sec */
3263 if (adapter->rx_itr_setting == 3 && new_val < 196)
3267 if (new_val != q_vector->itr_val) {
3268 q_vector->itr_val = new_val;
3269 q_vector->set_itr = 1;
3272 if (q_vector->rx_ring) {
3273 q_vector->rx_ring->total_bytes = 0;
3274 q_vector->rx_ring->total_packets = 0;
3276 if (q_vector->tx_ring) {
3277 q_vector->tx_ring->total_bytes = 0;
3278 q_vector->tx_ring->total_packets = 0;
3283 * igb_update_itr - update the dynamic ITR value based on statistics
3284 * Stores a new ITR value based on packets and byte
3285 * counts during the last interrupt. The advantage of per interrupt
3286 * computation is faster updates and more accurate ITR for the current
3287 * traffic pattern. Constants in this function were computed
3288 * based on theoretical maximum wire speed and thresholds were set based
3289 * on testing data as well as attempting to minimize response time
3290 * while increasing bulk throughput.
3291 * this functionality is controlled by the InterruptThrottleRate module
3292 * parameter (see igb_param.c)
3293 * NOTE: These calculations are only valid when operating in a single-
3294 * queue environment.
3295 * @adapter: pointer to adapter
3296 * @itr_setting: current q_vector->itr_val
3297 * @packets: the number of packets during this measurement interval
3298 * @bytes: the number of bytes during this measurement interval
3300 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3301 int packets, int bytes)
3303 unsigned int retval = itr_setting;
3306 goto update_itr_done;
3308 switch (itr_setting) {
3309 case lowest_latency:
3310 /* handle TSO and jumbo frames */
3311 if (bytes/packets > 8000)
3312 retval = bulk_latency;
3313 else if ((packets < 5) && (bytes > 512))
3314 retval = low_latency;
3316 case low_latency: /* 50 usec aka 20000 ints/s */
3317 if (bytes > 10000) {
3318 /* this if handles the TSO accounting */
3319 if (bytes/packets > 8000) {
3320 retval = bulk_latency;
3321 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3322 retval = bulk_latency;
3323 } else if ((packets > 35)) {
3324 retval = lowest_latency;
3326 } else if (bytes/packets > 2000) {
3327 retval = bulk_latency;
3328 } else if (packets <= 2 && bytes < 512) {
3329 retval = lowest_latency;
3332 case bulk_latency: /* 250 usec aka 4000 ints/s */
3333 if (bytes > 25000) {
3335 retval = low_latency;
3336 } else if (bytes < 1500) {
3337 retval = low_latency;
3346 static void igb_set_itr(struct igb_adapter *adapter)
3348 struct igb_q_vector *q_vector = adapter->q_vector[0];
3350 u32 new_itr = q_vector->itr_val;
3352 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3353 if (adapter->link_speed != SPEED_1000) {
3359 adapter->rx_itr = igb_update_itr(adapter,
3361 q_vector->rx_ring->total_packets,
3362 q_vector->rx_ring->total_bytes);
3364 adapter->tx_itr = igb_update_itr(adapter,
3366 q_vector->tx_ring->total_packets,
3367 q_vector->tx_ring->total_bytes);
3368 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3370 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3371 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3372 current_itr = low_latency;
3374 switch (current_itr) {
3375 /* counts and packets in update_itr are dependent on these numbers */
3376 case lowest_latency:
3377 new_itr = 56; /* aka 70,000 ints/sec */
3380 new_itr = 196; /* aka 20,000 ints/sec */
3383 new_itr = 980; /* aka 4,000 ints/sec */
3390 q_vector->rx_ring->total_bytes = 0;
3391 q_vector->rx_ring->total_packets = 0;
3392 q_vector->tx_ring->total_bytes = 0;
3393 q_vector->tx_ring->total_packets = 0;
3395 if (new_itr != q_vector->itr_val) {
3396 /* this attempts to bias the interrupt rate towards Bulk
3397 * by adding intermediate steps when interrupt rate is
3399 new_itr = new_itr > q_vector->itr_val ?
3400 max((new_itr * q_vector->itr_val) /
3401 (new_itr + (q_vector->itr_val >> 2)),
3404 /* Don't write the value here; it resets the adapter's
3405 * internal timer, and causes us to delay far longer than
3406 * we should between interrupts. Instead, we write the ITR
3407 * value at the beginning of the next interrupt so the timing
3408 * ends up being correct.
3410 q_vector->itr_val = new_itr;
3411 q_vector->set_itr = 1;
3417 #define IGB_TX_FLAGS_CSUM 0x00000001
3418 #define IGB_TX_FLAGS_VLAN 0x00000002
3419 #define IGB_TX_FLAGS_TSO 0x00000004
3420 #define IGB_TX_FLAGS_IPV4 0x00000008
3421 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3422 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3423 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3425 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3426 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3428 struct e1000_adv_tx_context_desc *context_desc;
3431 struct igb_buffer *buffer_info;
3432 u32 info = 0, tu_cmd = 0;
3436 if (skb_header_cloned(skb)) {
3437 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3442 l4len = tcp_hdrlen(skb);
3445 if (skb->protocol == htons(ETH_P_IP)) {
3446 struct iphdr *iph = ip_hdr(skb);
3449 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3453 } else if (skb_is_gso_v6(skb)) {
3454 ipv6_hdr(skb)->payload_len = 0;
3455 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3456 &ipv6_hdr(skb)->daddr,
3460 i = tx_ring->next_to_use;
3462 buffer_info = &tx_ring->buffer_info[i];
3463 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3464 /* VLAN MACLEN IPLEN */
3465 if (tx_flags & IGB_TX_FLAGS_VLAN)
3466 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3467 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3468 *hdr_len += skb_network_offset(skb);
3469 info |= skb_network_header_len(skb);
3470 *hdr_len += skb_network_header_len(skb);
3471 context_desc->vlan_macip_lens = cpu_to_le32(info);
3473 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3474 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3476 if (skb->protocol == htons(ETH_P_IP))
3477 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3478 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3480 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3483 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3484 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3486 /* For 82575, context index must be unique per ring. */
3487 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3488 mss_l4len_idx |= tx_ring->reg_idx << 4;
3490 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3491 context_desc->seqnum_seed = 0;
3493 buffer_info->time_stamp = jiffies;
3494 buffer_info->next_to_watch = i;
3495 buffer_info->dma = 0;
3497 if (i == tx_ring->count)
3500 tx_ring->next_to_use = i;
3505 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3506 struct sk_buff *skb, u32 tx_flags)
3508 struct e1000_adv_tx_context_desc *context_desc;
3509 struct pci_dev *pdev = tx_ring->pdev;
3510 struct igb_buffer *buffer_info;
3511 u32 info = 0, tu_cmd = 0;
3514 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3515 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3516 i = tx_ring->next_to_use;
3517 buffer_info = &tx_ring->buffer_info[i];
3518 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3520 if (tx_flags & IGB_TX_FLAGS_VLAN)
3521 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3523 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3524 if (skb->ip_summed == CHECKSUM_PARTIAL)
3525 info |= skb_network_header_len(skb);
3527 context_desc->vlan_macip_lens = cpu_to_le32(info);
3529 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3531 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3534 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3535 const struct vlan_ethhdr *vhdr =
3536 (const struct vlan_ethhdr*)skb->data;
3538 protocol = vhdr->h_vlan_encapsulated_proto;
3540 protocol = skb->protocol;
3544 case cpu_to_be16(ETH_P_IP):
3545 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3546 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3547 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3548 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3549 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3551 case cpu_to_be16(ETH_P_IPV6):
3552 /* XXX what about other V6 headers?? */
3553 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3554 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3555 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3556 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3559 if (unlikely(net_ratelimit()))
3560 dev_warn(&pdev->dev,
3561 "partial checksum but proto=%x!\n",
3567 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3568 context_desc->seqnum_seed = 0;
3569 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3570 context_desc->mss_l4len_idx =
3571 cpu_to_le32(tx_ring->reg_idx << 4);
3573 buffer_info->time_stamp = jiffies;
3574 buffer_info->next_to_watch = i;
3575 buffer_info->dma = 0;
3578 if (i == tx_ring->count)
3580 tx_ring->next_to_use = i;
3587 #define IGB_MAX_TXD_PWR 16
3588 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3590 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3593 struct igb_buffer *buffer_info;
3594 struct pci_dev *pdev = tx_ring->pdev;
3595 unsigned int len = skb_headlen(skb);
3596 unsigned int count = 0, i;
3599 i = tx_ring->next_to_use;
3601 buffer_info = &tx_ring->buffer_info[i];
3602 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3603 buffer_info->length = len;
3604 /* set time_stamp *before* dma to help avoid a possible race */
3605 buffer_info->time_stamp = jiffies;
3606 buffer_info->next_to_watch = i;
3607 buffer_info->dma = pci_map_single(pdev, skb->data, len,
3609 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3612 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3613 struct skb_frag_struct *frag;
3617 if (i == tx_ring->count)
3620 frag = &skb_shinfo(skb)->frags[f];
3623 buffer_info = &tx_ring->buffer_info[i];
3624 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3625 buffer_info->length = len;
3626 buffer_info->time_stamp = jiffies;
3627 buffer_info->next_to_watch = i;
3628 buffer_info->mapped_as_page = true;
3629 buffer_info->dma = pci_map_page(pdev,
3634 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3639 tx_ring->buffer_info[i].skb = skb;
3640 tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3641 tx_ring->buffer_info[first].next_to_watch = i;
3646 dev_err(&pdev->dev, "TX DMA map failed\n");
3648 /* clear timestamp and dma mappings for failed buffer_info mapping */
3649 buffer_info->dma = 0;
3650 buffer_info->time_stamp = 0;
3651 buffer_info->length = 0;
3652 buffer_info->next_to_watch = 0;
3653 buffer_info->mapped_as_page = false;
3656 /* clear timestamp and dma mappings for remaining portion of packet */
3657 while (count >= 0) {
3661 i += tx_ring->count;
3662 buffer_info = &tx_ring->buffer_info[i];
3663 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3669 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3670 u32 tx_flags, int count, u32 paylen,
3673 union e1000_adv_tx_desc *tx_desc;
3674 struct igb_buffer *buffer_info;
3675 u32 olinfo_status = 0, cmd_type_len;
3676 unsigned int i = tx_ring->next_to_use;
3678 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3679 E1000_ADVTXD_DCMD_DEXT);
3681 if (tx_flags & IGB_TX_FLAGS_VLAN)
3682 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3684 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3685 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3687 if (tx_flags & IGB_TX_FLAGS_TSO) {
3688 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3690 /* insert tcp checksum */
3691 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3693 /* insert ip checksum */
3694 if (tx_flags & IGB_TX_FLAGS_IPV4)
3695 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3697 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3698 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3701 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3702 (tx_flags & (IGB_TX_FLAGS_CSUM |
3704 IGB_TX_FLAGS_VLAN)))
3705 olinfo_status |= tx_ring->reg_idx << 4;
3707 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3710 buffer_info = &tx_ring->buffer_info[i];
3711 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3712 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3713 tx_desc->read.cmd_type_len =
3714 cpu_to_le32(cmd_type_len | buffer_info->length);
3715 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3718 if (i == tx_ring->count)
3720 } while (count > 0);
3722 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3723 /* Force memory writes to complete before letting h/w
3724 * know there are new descriptors to fetch. (Only
3725 * applicable for weak-ordered memory model archs,
3726 * such as IA-64). */
3729 tx_ring->next_to_use = i;
3730 writel(i, tx_ring->tail);
3731 /* we need this if more than one processor can write to our tail
3732 * at a time, it syncronizes IO on IA64/Altix systems */
3736 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3738 struct net_device *netdev = tx_ring->netdev;
3740 netif_stop_subqueue(netdev, tx_ring->queue_index);
3742 /* Herbert's original patch had:
3743 * smp_mb__after_netif_stop_queue();
3744 * but since that doesn't exist yet, just open code it. */
3747 /* We need to check again in a case another CPU has just
3748 * made room available. */
3749 if (igb_desc_unused(tx_ring) < size)
3753 netif_wake_subqueue(netdev, tx_ring->queue_index);
3754 tx_ring->tx_stats.restart_queue++;
3758 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3760 if (igb_desc_unused(tx_ring) >= size)
3762 return __igb_maybe_stop_tx(tx_ring, size);
3765 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3766 struct igb_ring *tx_ring)
3768 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3773 union skb_shared_tx *shtx = skb_tx(skb);
3775 /* need: 1 descriptor per page,
3776 * + 2 desc gap to keep tail from touching head,
3777 * + 1 desc for skb->data,
3778 * + 1 desc for context descriptor,
3779 * otherwise try next time */
3780 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3781 /* this is a hard error */
3782 return NETDEV_TX_BUSY;
3785 if (unlikely(shtx->hardware)) {
3786 shtx->in_progress = 1;
3787 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3790 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3791 tx_flags |= IGB_TX_FLAGS_VLAN;
3792 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3795 if (skb->protocol == htons(ETH_P_IP))
3796 tx_flags |= IGB_TX_FLAGS_IPV4;
3798 first = tx_ring->next_to_use;
3799 if (skb_is_gso(skb)) {
3800 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3803 dev_kfree_skb_any(skb);
3804 return NETDEV_TX_OK;
3809 tx_flags |= IGB_TX_FLAGS_TSO;
3810 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3811 (skb->ip_summed == CHECKSUM_PARTIAL))
3812 tx_flags |= IGB_TX_FLAGS_CSUM;
3815 * count reflects descriptors mapped, if 0 or less then mapping error
3816 * has occured and we need to rewind the descriptor queue
3818 count = igb_tx_map_adv(tx_ring, skb, first);
3820 dev_kfree_skb_any(skb);
3821 tx_ring->buffer_info[first].time_stamp = 0;
3822 tx_ring->next_to_use = first;
3823 return NETDEV_TX_OK;
3826 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3828 /* Make sure there is space in the ring for the next send. */
3829 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3831 return NETDEV_TX_OK;
3834 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3835 struct net_device *netdev)
3837 struct igb_adapter *adapter = netdev_priv(netdev);
3838 struct igb_ring *tx_ring;
3841 if (test_bit(__IGB_DOWN, &adapter->state)) {
3842 dev_kfree_skb_any(skb);
3843 return NETDEV_TX_OK;
3846 if (skb->len <= 0) {
3847 dev_kfree_skb_any(skb);
3848 return NETDEV_TX_OK;
3851 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3852 tx_ring = adapter->multi_tx_table[r_idx];
3854 /* This goes back to the question of how to logically map a tx queue
3855 * to a flow. Right now, performance is impacted slightly negatively
3856 * if using multiple tx queues. If the stack breaks away from a
3857 * single qdisc implementation, we can look at this again. */
3858 return igb_xmit_frame_ring_adv(skb, tx_ring);
3862 * igb_tx_timeout - Respond to a Tx Hang
3863 * @netdev: network interface device structure
3865 static void igb_tx_timeout(struct net_device *netdev)
3867 struct igb_adapter *adapter = netdev_priv(netdev);
3868 struct e1000_hw *hw = &adapter->hw;
3870 /* Do the reset outside of interrupt context */
3871 adapter->tx_timeout_count++;
3873 if (hw->mac.type == e1000_82580)
3874 hw->dev_spec._82575.global_device_reset = true;
3876 schedule_work(&adapter->reset_task);
3878 (adapter->eims_enable_mask & ~adapter->eims_other));
3881 static void igb_reset_task(struct work_struct *work)
3883 struct igb_adapter *adapter;
3884 adapter = container_of(work, struct igb_adapter, reset_task);
3886 igb_reinit_locked(adapter);
3890 * igb_get_stats - Get System Network Statistics
3891 * @netdev: network interface device structure
3893 * Returns the address of the device statistics structure.
3894 * The statistics are actually updated from the timer callback.
3896 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3898 /* only return the current stats */
3899 return &netdev->stats;
3903 * igb_change_mtu - Change the Maximum Transfer Unit
3904 * @netdev: network interface device structure
3905 * @new_mtu: new value for maximum frame size
3907 * Returns 0 on success, negative on failure
3909 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3911 struct igb_adapter *adapter = netdev_priv(netdev);
3912 struct pci_dev *pdev = adapter->pdev;
3913 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3914 u32 rx_buffer_len, i;
3916 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3917 dev_err(&pdev->dev, "Invalid MTU setting\n");
3921 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3922 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3926 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3929 /* igb_down has a dependency on max_frame_size */
3930 adapter->max_frame_size = max_frame;
3932 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3933 * means we reserve 2 more, this pushes us to allocate from the next
3935 * i.e. RXBUFFER_2048 --> size-4096 slab
3938 if (max_frame <= IGB_RXBUFFER_1024)
3939 rx_buffer_len = IGB_RXBUFFER_1024;
3940 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3941 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3943 rx_buffer_len = IGB_RXBUFFER_128;
3945 if (netif_running(netdev))
3948 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3949 netdev->mtu, new_mtu);
3950 netdev->mtu = new_mtu;
3952 for (i = 0; i < adapter->num_rx_queues; i++)
3953 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3955 if (netif_running(netdev))
3960 clear_bit(__IGB_RESETTING, &adapter->state);
3966 * igb_update_stats - Update the board statistics counters
3967 * @adapter: board private structure
3970 void igb_update_stats(struct igb_adapter *adapter)
3972 struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3973 struct e1000_hw *hw = &adapter->hw;
3974 struct pci_dev *pdev = adapter->pdev;
3980 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3983 * Prevent stats update while adapter is being reset, or if the pci
3984 * connection is down.
3986 if (adapter->link_speed == 0)
3988 if (pci_channel_offline(pdev))
3993 for (i = 0; i < adapter->num_rx_queues; i++) {
3994 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3995 struct igb_ring *ring = adapter->rx_ring[i];
3996 ring->rx_stats.drops += rqdpc_tmp;
3997 net_stats->rx_fifo_errors += rqdpc_tmp;
3998 bytes += ring->rx_stats.bytes;
3999 packets += ring->rx_stats.packets;
4002 net_stats->rx_bytes = bytes;
4003 net_stats->rx_packets = packets;
4007 for (i = 0; i < adapter->num_tx_queues; i++) {
4008 struct igb_ring *ring = adapter->tx_ring[i];
4009 bytes += ring->tx_stats.bytes;
4010 packets += ring->tx_stats.packets;
4012 net_stats->tx_bytes = bytes;
4013 net_stats->tx_packets = packets;
4015 /* read stats registers */
4016 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4017 adapter->stats.gprc += rd32(E1000_GPRC);
4018 adapter->stats.gorc += rd32(E1000_GORCL);
4019 rd32(E1000_GORCH); /* clear GORCL */
4020 adapter->stats.bprc += rd32(E1000_BPRC);
4021 adapter->stats.mprc += rd32(E1000_MPRC);
4022 adapter->stats.roc += rd32(E1000_ROC);
4024 adapter->stats.prc64 += rd32(E1000_PRC64);
4025 adapter->stats.prc127 += rd32(E1000_PRC127);
4026 adapter->stats.prc255 += rd32(E1000_PRC255);
4027 adapter->stats.prc511 += rd32(E1000_PRC511);
4028 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4029 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4030 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4031 adapter->stats.sec += rd32(E1000_SEC);
4033 adapter->stats.mpc += rd32(E1000_MPC);
4034 adapter->stats.scc += rd32(E1000_SCC);
4035 adapter->stats.ecol += rd32(E1000_ECOL);
4036 adapter->stats.mcc += rd32(E1000_MCC);
4037 adapter->stats.latecol += rd32(E1000_LATECOL);
4038 adapter->stats.dc += rd32(E1000_DC);
4039 adapter->stats.rlec += rd32(E1000_RLEC);
4040 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4041 adapter->stats.xontxc += rd32(E1000_XONTXC);
4042 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4043 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4044 adapter->stats.fcruc += rd32(E1000_FCRUC);
4045 adapter->stats.gptc += rd32(E1000_GPTC);
4046 adapter->stats.gotc += rd32(E1000_GOTCL);
4047 rd32(E1000_GOTCH); /* clear GOTCL */
4048 rnbc = rd32(E1000_RNBC);
4049 adapter->stats.rnbc += rnbc;
4050 net_stats->rx_fifo_errors += rnbc;
4051 adapter->stats.ruc += rd32(E1000_RUC);
4052 adapter->stats.rfc += rd32(E1000_RFC);
4053 adapter->stats.rjc += rd32(E1000_RJC);
4054 adapter->stats.tor += rd32(E1000_TORH);
4055 adapter->stats.tot += rd32(E1000_TOTH);
4056 adapter->stats.tpr += rd32(E1000_TPR);
4058 adapter->stats.ptc64 += rd32(E1000_PTC64);
4059 adapter->stats.ptc127 += rd32(E1000_PTC127);
4060 adapter->stats.ptc255 += rd32(E1000_PTC255);
4061 adapter->stats.ptc511 += rd32(E1000_PTC511);
4062 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4063 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4065 adapter->stats.mptc += rd32(E1000_MPTC);
4066 adapter->stats.bptc += rd32(E1000_BPTC);
4068 adapter->stats.tpt += rd32(E1000_TPT);
4069 adapter->stats.colc += rd32(E1000_COLC);
4071 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4072 /* read internal phy specific stats */
4073 reg = rd32(E1000_CTRL_EXT);
4074 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4075 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4076 adapter->stats.tncrs += rd32(E1000_TNCRS);
4079 adapter->stats.tsctc += rd32(E1000_TSCTC);
4080 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4082 adapter->stats.iac += rd32(E1000_IAC);
4083 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4084 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4085 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4086 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4087 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4088 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4089 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4090 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4092 /* Fill out the OS statistics structure */
4093 net_stats->multicast = adapter->stats.mprc;
4094 net_stats->collisions = adapter->stats.colc;
4098 /* RLEC on some newer hardware can be incorrect so build
4099 * our own version based on RUC and ROC */
4100 net_stats->rx_errors = adapter->stats.rxerrc +
4101 adapter->stats.crcerrs + adapter->stats.algnerrc +
4102 adapter->stats.ruc + adapter->stats.roc +
4103 adapter->stats.cexterr;
4104 net_stats->rx_length_errors = adapter->stats.ruc +
4106 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4107 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4108 net_stats->rx_missed_errors = adapter->stats.mpc;
4111 net_stats->tx_errors = adapter->stats.ecol +
4112 adapter->stats.latecol;
4113 net_stats->tx_aborted_errors = adapter->stats.ecol;
4114 net_stats->tx_window_errors = adapter->stats.latecol;
4115 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4117 /* Tx Dropped needs to be maintained elsewhere */
4120 if (hw->phy.media_type == e1000_media_type_copper) {
4121 if ((adapter->link_speed == SPEED_1000) &&
4122 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4123 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4124 adapter->phy_stats.idle_errors += phy_tmp;
4128 /* Management Stats */
4129 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4130 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4131 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4134 static irqreturn_t igb_msix_other(int irq, void *data)
4136 struct igb_adapter *adapter = data;
4137 struct e1000_hw *hw = &adapter->hw;
4138 u32 icr = rd32(E1000_ICR);
4139 /* reading ICR causes bit 31 of EICR to be cleared */
4141 if (icr & E1000_ICR_DRSTA)
4142 schedule_work(&adapter->reset_task);
4144 if (icr & E1000_ICR_DOUTSYNC) {
4145 /* HW is reporting DMA is out of sync */
4146 adapter->stats.doosync++;
4149 /* Check for a mailbox event */
4150 if (icr & E1000_ICR_VMMB)
4151 igb_msg_task(adapter);
4153 if (icr & E1000_ICR_LSC) {
4154 hw->mac.get_link_status = 1;
4155 /* guard against interrupt when we're going down */
4156 if (!test_bit(__IGB_DOWN, &adapter->state))
4157 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4160 if (adapter->vfs_allocated_count)
4161 wr32(E1000_IMS, E1000_IMS_LSC |
4163 E1000_IMS_DOUTSYNC);
4165 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4166 wr32(E1000_EIMS, adapter->eims_other);
4171 static void igb_write_itr(struct igb_q_vector *q_vector)
4173 struct igb_adapter *adapter = q_vector->adapter;
4174 u32 itr_val = q_vector->itr_val & 0x7FFC;
4176 if (!q_vector->set_itr)
4182 if (adapter->hw.mac.type == e1000_82575)
4183 itr_val |= itr_val << 16;
4185 itr_val |= 0x8000000;
4187 writel(itr_val, q_vector->itr_register);
4188 q_vector->set_itr = 0;
4191 static irqreturn_t igb_msix_ring(int irq, void *data)
4193 struct igb_q_vector *q_vector = data;
4195 /* Write the ITR value calculated from the previous interrupt. */
4196 igb_write_itr(q_vector);
4198 napi_schedule(&q_vector->napi);
4203 #ifdef CONFIG_IGB_DCA
4204 static void igb_update_dca(struct igb_q_vector *q_vector)
4206 struct igb_adapter *adapter = q_vector->adapter;
4207 struct e1000_hw *hw = &adapter->hw;
4208 int cpu = get_cpu();
4210 if (q_vector->cpu == cpu)
4213 if (q_vector->tx_ring) {
4214 int q = q_vector->tx_ring->reg_idx;
4215 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4216 if (hw->mac.type == e1000_82575) {
4217 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4218 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4220 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4221 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4222 E1000_DCA_TXCTRL_CPUID_SHIFT;
4224 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4225 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4227 if (q_vector->rx_ring) {
4228 int q = q_vector->rx_ring->reg_idx;
4229 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4230 if (hw->mac.type == e1000_82575) {
4231 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4232 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4234 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4235 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4236 E1000_DCA_RXCTRL_CPUID_SHIFT;
4238 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4239 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4240 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4241 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4243 q_vector->cpu = cpu;
4248 static void igb_setup_dca(struct igb_adapter *adapter)
4250 struct e1000_hw *hw = &adapter->hw;
4253 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4256 /* Always use CB2 mode, difference is masked in the CB driver. */
4257 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4259 for (i = 0; i < adapter->num_q_vectors; i++) {
4260 adapter->q_vector[i]->cpu = -1;
4261 igb_update_dca(adapter->q_vector[i]);
4265 static int __igb_notify_dca(struct device *dev, void *data)
4267 struct net_device *netdev = dev_get_drvdata(dev);
4268 struct igb_adapter *adapter = netdev_priv(netdev);
4269 struct pci_dev *pdev = adapter->pdev;
4270 struct e1000_hw *hw = &adapter->hw;
4271 unsigned long event = *(unsigned long *)data;
4274 case DCA_PROVIDER_ADD:
4275 /* if already enabled, don't do it again */
4276 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4278 if (dca_add_requester(dev) == 0) {
4279 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4280 dev_info(&pdev->dev, "DCA enabled\n");
4281 igb_setup_dca(adapter);
4284 /* Fall Through since DCA is disabled. */
4285 case DCA_PROVIDER_REMOVE:
4286 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4287 /* without this a class_device is left
4288 * hanging around in the sysfs model */
4289 dca_remove_requester(dev);
4290 dev_info(&pdev->dev, "DCA disabled\n");
4291 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4292 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4300 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4305 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4308 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4310 #endif /* CONFIG_IGB_DCA */
4312 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4314 struct e1000_hw *hw = &adapter->hw;
4318 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4319 ping = E1000_PF_CONTROL_MSG;
4320 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4321 ping |= E1000_VT_MSGTYPE_CTS;
4322 igb_write_mbx(hw, &ping, 1, i);
4326 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4328 struct e1000_hw *hw = &adapter->hw;
4329 u32 vmolr = rd32(E1000_VMOLR(vf));
4330 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4332 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4333 IGB_VF_FLAG_MULTI_PROMISC);
4334 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4336 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4337 vmolr |= E1000_VMOLR_MPME;
4338 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4341 * if we have hashes and we are clearing a multicast promisc
4342 * flag we need to write the hashes to the MTA as this step
4343 * was previously skipped
4345 if (vf_data->num_vf_mc_hashes > 30) {
4346 vmolr |= E1000_VMOLR_MPME;
4347 } else if (vf_data->num_vf_mc_hashes) {
4349 vmolr |= E1000_VMOLR_ROMPE;
4350 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4351 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4355 wr32(E1000_VMOLR(vf), vmolr);
4357 /* there are flags left unprocessed, likely not supported */
4358 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4365 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4366 u32 *msgbuf, u32 vf)
4368 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4369 u16 *hash_list = (u16 *)&msgbuf[1];
4370 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4373 /* salt away the number of multicast addresses assigned
4374 * to this VF for later use to restore when the PF multi cast
4377 vf_data->num_vf_mc_hashes = n;
4379 /* only up to 30 hash values supported */
4383 /* store the hashes for later use */
4384 for (i = 0; i < n; i++)
4385 vf_data->vf_mc_hashes[i] = hash_list[i];
4387 /* Flush and reset the mta with the new values */
4388 igb_set_rx_mode(adapter->netdev);
4393 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4395 struct e1000_hw *hw = &adapter->hw;
4396 struct vf_data_storage *vf_data;
4399 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4400 u32 vmolr = rd32(E1000_VMOLR(i));
4401 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4403 vf_data = &adapter->vf_data[i];
4405 if ((vf_data->num_vf_mc_hashes > 30) ||
4406 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4407 vmolr |= E1000_VMOLR_MPME;
4408 } else if (vf_data->num_vf_mc_hashes) {
4409 vmolr |= E1000_VMOLR_ROMPE;
4410 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4411 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4413 wr32(E1000_VMOLR(i), vmolr);
4417 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4419 struct e1000_hw *hw = &adapter->hw;
4420 u32 pool_mask, reg, vid;
4423 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4425 /* Find the vlan filter for this id */
4426 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4427 reg = rd32(E1000_VLVF(i));
4429 /* remove the vf from the pool */
4432 /* if pool is empty then remove entry from vfta */
4433 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4434 (reg & E1000_VLVF_VLANID_ENABLE)) {
4436 vid = reg & E1000_VLVF_VLANID_MASK;
4437 igb_vfta_set(hw, vid, false);
4440 wr32(E1000_VLVF(i), reg);
4443 adapter->vf_data[vf].vlans_enabled = 0;
4446 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4448 struct e1000_hw *hw = &adapter->hw;
4451 /* The vlvf table only exists on 82576 hardware and newer */
4452 if (hw->mac.type < e1000_82576)
4455 /* we only need to do this if VMDq is enabled */
4456 if (!adapter->vfs_allocated_count)
4459 /* Find the vlan filter for this id */
4460 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4461 reg = rd32(E1000_VLVF(i));
4462 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4463 vid == (reg & E1000_VLVF_VLANID_MASK))
4468 if (i == E1000_VLVF_ARRAY_SIZE) {
4469 /* Did not find a matching VLAN ID entry that was
4470 * enabled. Search for a free filter entry, i.e.
4471 * one without the enable bit set
4473 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4474 reg = rd32(E1000_VLVF(i));
4475 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4479 if (i < E1000_VLVF_ARRAY_SIZE) {
4480 /* Found an enabled/available entry */
4481 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4483 /* if !enabled we need to set this up in vfta */
4484 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4485 /* add VID to filter table */
4486 igb_vfta_set(hw, vid, true);
4487 reg |= E1000_VLVF_VLANID_ENABLE;
4489 reg &= ~E1000_VLVF_VLANID_MASK;
4491 wr32(E1000_VLVF(i), reg);
4493 /* do not modify RLPML for PF devices */
4494 if (vf >= adapter->vfs_allocated_count)
4497 if (!adapter->vf_data[vf].vlans_enabled) {
4499 reg = rd32(E1000_VMOLR(vf));
4500 size = reg & E1000_VMOLR_RLPML_MASK;
4502 reg &= ~E1000_VMOLR_RLPML_MASK;
4504 wr32(E1000_VMOLR(vf), reg);
4507 adapter->vf_data[vf].vlans_enabled++;
4511 if (i < E1000_VLVF_ARRAY_SIZE) {
4512 /* remove vf from the pool */
4513 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4514 /* if pool is empty then remove entry from vfta */
4515 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4517 igb_vfta_set(hw, vid, false);
4519 wr32(E1000_VLVF(i), reg);
4521 /* do not modify RLPML for PF devices */
4522 if (vf >= adapter->vfs_allocated_count)
4525 adapter->vf_data[vf].vlans_enabled--;
4526 if (!adapter->vf_data[vf].vlans_enabled) {
4528 reg = rd32(E1000_VMOLR(vf));
4529 size = reg & E1000_VMOLR_RLPML_MASK;
4531 reg &= ~E1000_VMOLR_RLPML_MASK;
4533 wr32(E1000_VMOLR(vf), reg);
4540 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4542 struct e1000_hw *hw = &adapter->hw;
4545 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4547 wr32(E1000_VMVIR(vf), 0);
4550 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4551 int vf, u16 vlan, u8 qos)
4554 struct igb_adapter *adapter = netdev_priv(netdev);
4556 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4559 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4562 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4563 igb_set_vmolr(adapter, vf, !vlan);
4564 adapter->vf_data[vf].pf_vlan = vlan;
4565 adapter->vf_data[vf].pf_qos = qos;
4566 dev_info(&adapter->pdev->dev,
4567 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4568 if (test_bit(__IGB_DOWN, &adapter->state)) {
4569 dev_warn(&adapter->pdev->dev,
4570 "The VF VLAN has been set,"
4571 " but the PF device is not up.\n");
4572 dev_warn(&adapter->pdev->dev,
4573 "Bring the PF device up before"
4574 " attempting to use the VF device.\n");
4577 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4579 igb_set_vmvir(adapter, vlan, vf);
4580 igb_set_vmolr(adapter, vf, true);
4581 adapter->vf_data[vf].pf_vlan = 0;
4582 adapter->vf_data[vf].pf_qos = 0;
4588 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4590 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4591 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4593 return igb_vlvf_set(adapter, vid, add, vf);
4596 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4599 adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4600 adapter->vf_data[vf].last_nack = jiffies;
4602 /* reset offloads to defaults */
4603 igb_set_vmolr(adapter, vf, true);
4605 /* reset vlans for device */
4606 igb_clear_vf_vfta(adapter, vf);
4607 if (adapter->vf_data[vf].pf_vlan)
4608 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4609 adapter->vf_data[vf].pf_vlan,
4610 adapter->vf_data[vf].pf_qos);
4612 igb_clear_vf_vfta(adapter, vf);
4614 /* reset multicast table array for vf */
4615 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4617 /* Flush and reset the mta with the new values */
4618 igb_set_rx_mode(adapter->netdev);
4621 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4623 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4625 /* generate a new mac address as we were hotplug removed/added */
4626 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4627 random_ether_addr(vf_mac);
4629 /* process remaining reset events */
4630 igb_vf_reset(adapter, vf);
4633 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4635 struct e1000_hw *hw = &adapter->hw;
4636 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4637 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4639 u8 *addr = (u8 *)(&msgbuf[1]);
4641 /* process all the same items cleared in a function level reset */
4642 igb_vf_reset(adapter, vf);
4644 /* set vf mac address */
4645 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4647 /* enable transmit and receive for vf */
4648 reg = rd32(E1000_VFTE);
4649 wr32(E1000_VFTE, reg | (1 << vf));
4650 reg = rd32(E1000_VFRE);
4651 wr32(E1000_VFRE, reg | (1 << vf));
4653 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4655 /* reply to reset with ack and vf mac address */
4656 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4657 memcpy(addr, vf_mac, 6);
4658 igb_write_mbx(hw, msgbuf, 3, vf);
4661 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4663 unsigned char *addr = (char *)&msg[1];
4666 if (is_valid_ether_addr(addr))
4667 err = igb_set_vf_mac(adapter, vf, addr);
4672 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4674 struct e1000_hw *hw = &adapter->hw;
4675 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4676 u32 msg = E1000_VT_MSGTYPE_NACK;
4678 /* if device isn't clear to send it shouldn't be reading either */
4679 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4680 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4681 igb_write_mbx(hw, &msg, 1, vf);
4682 vf_data->last_nack = jiffies;
4686 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4688 struct pci_dev *pdev = adapter->pdev;
4689 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4690 struct e1000_hw *hw = &adapter->hw;
4691 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4694 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4697 /* if receive failed revoke VF CTS stats and restart init */
4698 dev_err(&pdev->dev, "Error receiving message from VF\n");
4699 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4700 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4705 /* this is a message we already processed, do nothing */
4706 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4710 * until the vf completes a reset it should not be
4711 * allowed to start any configuration.
4714 if (msgbuf[0] == E1000_VF_RESET) {
4715 igb_vf_reset_msg(adapter, vf);
4719 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4720 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4726 switch ((msgbuf[0] & 0xFFFF)) {
4727 case E1000_VF_SET_MAC_ADDR:
4728 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4730 case E1000_VF_SET_PROMISC:
4731 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4733 case E1000_VF_SET_MULTICAST:
4734 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4736 case E1000_VF_SET_LPE:
4737 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4739 case E1000_VF_SET_VLAN:
4740 if (adapter->vf_data[vf].pf_vlan)
4743 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4746 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4751 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4753 /* notify the VF of the results of what it sent us */
4755 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4757 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4759 igb_write_mbx(hw, msgbuf, 1, vf);
4762 static void igb_msg_task(struct igb_adapter *adapter)
4764 struct e1000_hw *hw = &adapter->hw;
4767 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4768 /* process any reset requests */
4769 if (!igb_check_for_rst(hw, vf))
4770 igb_vf_reset_event(adapter, vf);
4772 /* process any messages pending */
4773 if (!igb_check_for_msg(hw, vf))
4774 igb_rcv_msg_from_vf(adapter, vf);
4776 /* process any acks */
4777 if (!igb_check_for_ack(hw, vf))
4778 igb_rcv_ack_from_vf(adapter, vf);
4783 * igb_set_uta - Set unicast filter table address
4784 * @adapter: board private structure
4786 * The unicast table address is a register array of 32-bit registers.
4787 * The table is meant to be used in a way similar to how the MTA is used
4788 * however due to certain limitations in the hardware it is necessary to
4789 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4790 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4792 static void igb_set_uta(struct igb_adapter *adapter)
4794 struct e1000_hw *hw = &adapter->hw;
4797 /* The UTA table only exists on 82576 hardware and newer */
4798 if (hw->mac.type < e1000_82576)
4801 /* we only need to do this if VMDq is enabled */
4802 if (!adapter->vfs_allocated_count)
4805 for (i = 0; i < hw->mac.uta_reg_count; i++)
4806 array_wr32(E1000_UTA, i, ~0);
4810 * igb_intr_msi - Interrupt Handler
4811 * @irq: interrupt number
4812 * @data: pointer to a network interface device structure
4814 static irqreturn_t igb_intr_msi(int irq, void *data)
4816 struct igb_adapter *adapter = data;
4817 struct igb_q_vector *q_vector = adapter->q_vector[0];
4818 struct e1000_hw *hw = &adapter->hw;
4819 /* read ICR disables interrupts using IAM */
4820 u32 icr = rd32(E1000_ICR);
4822 igb_write_itr(q_vector);
4824 if (icr & E1000_ICR_DRSTA)
4825 schedule_work(&adapter->reset_task);
4827 if (icr & E1000_ICR_DOUTSYNC) {
4828 /* HW is reporting DMA is out of sync */
4829 adapter->stats.doosync++;
4832 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4833 hw->mac.get_link_status = 1;
4834 if (!test_bit(__IGB_DOWN, &adapter->state))
4835 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4838 napi_schedule(&q_vector->napi);
4844 * igb_intr - Legacy Interrupt Handler
4845 * @irq: interrupt number
4846 * @data: pointer to a network interface device structure
4848 static irqreturn_t igb_intr(int irq, void *data)
4850 struct igb_adapter *adapter = data;
4851 struct igb_q_vector *q_vector = adapter->q_vector[0];
4852 struct e1000_hw *hw = &adapter->hw;
4853 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4854 * need for the IMC write */
4855 u32 icr = rd32(E1000_ICR);
4857 return IRQ_NONE; /* Not our interrupt */
4859 igb_write_itr(q_vector);
4861 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4862 * not set, then the adapter didn't send an interrupt */
4863 if (!(icr & E1000_ICR_INT_ASSERTED))
4866 if (icr & E1000_ICR_DRSTA)
4867 schedule_work(&adapter->reset_task);
4869 if (icr & E1000_ICR_DOUTSYNC) {
4870 /* HW is reporting DMA is out of sync */
4871 adapter->stats.doosync++;
4874 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4875 hw->mac.get_link_status = 1;
4876 /* guard against interrupt when we're going down */
4877 if (!test_bit(__IGB_DOWN, &adapter->state))
4878 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4881 napi_schedule(&q_vector->napi);
4886 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4888 struct igb_adapter *adapter = q_vector->adapter;
4889 struct e1000_hw *hw = &adapter->hw;
4891 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4892 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4893 if (!adapter->msix_entries)
4894 igb_set_itr(adapter);
4896 igb_update_ring_itr(q_vector);
4899 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4900 if (adapter->msix_entries)
4901 wr32(E1000_EIMS, q_vector->eims_value);
4903 igb_irq_enable(adapter);
4908 * igb_poll - NAPI Rx polling callback
4909 * @napi: napi polling structure
4910 * @budget: count of how many packets we should handle
4912 static int igb_poll(struct napi_struct *napi, int budget)
4914 struct igb_q_vector *q_vector = container_of(napi,
4915 struct igb_q_vector,
4917 int tx_clean_complete = 1, work_done = 0;
4919 #ifdef CONFIG_IGB_DCA
4920 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4921 igb_update_dca(q_vector);
4923 if (q_vector->tx_ring)
4924 tx_clean_complete = igb_clean_tx_irq(q_vector);
4926 if (q_vector->rx_ring)
4927 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4929 if (!tx_clean_complete)
4932 /* If not enough Rx work done, exit the polling mode */
4933 if (work_done < budget) {
4934 napi_complete(napi);
4935 igb_ring_irq_enable(q_vector);
4942 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4943 * @adapter: board private structure
4944 * @shhwtstamps: timestamp structure to update
4945 * @regval: unsigned 64bit system time value.
4947 * We need to convert the system time value stored in the RX/TXSTMP registers
4948 * into a hwtstamp which can be used by the upper level timestamping functions
4950 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4951 struct skb_shared_hwtstamps *shhwtstamps,
4957 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4958 * 24 to match clock shift we setup earlier.
4960 if (adapter->hw.mac.type == e1000_82580)
4961 regval <<= IGB_82580_TSYNC_SHIFT;
4963 ns = timecounter_cyc2time(&adapter->clock, regval);
4964 timecompare_update(&adapter->compare, ns);
4965 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4966 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4967 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4971 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4972 * @q_vector: pointer to q_vector containing needed info
4973 * @skb: packet that was just sent
4975 * If we were asked to do hardware stamping and such a time stamp is
4976 * available, then it must have been for this skb here because we only
4977 * allow only one such packet into the queue.
4979 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4981 struct igb_adapter *adapter = q_vector->adapter;
4982 union skb_shared_tx *shtx = skb_tx(skb);
4983 struct e1000_hw *hw = &adapter->hw;
4984 struct skb_shared_hwtstamps shhwtstamps;
4987 /* if skb does not support hw timestamp or TX stamp not valid exit */
4988 if (likely(!shtx->hardware) ||
4989 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4992 regval = rd32(E1000_TXSTMPL);
4993 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4995 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4996 skb_tstamp_tx(skb, &shhwtstamps);
5000 * igb_clean_tx_irq - Reclaim resources after transmit completes
5001 * @q_vector: pointer to q_vector containing needed info
5002 * returns true if ring is completely cleaned
5004 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5006 struct igb_adapter *adapter = q_vector->adapter;
5007 struct igb_ring *tx_ring = q_vector->tx_ring;
5008 struct net_device *netdev = tx_ring->netdev;
5009 struct e1000_hw *hw = &adapter->hw;
5010 struct igb_buffer *buffer_info;
5011 struct sk_buff *skb;
5012 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5013 unsigned int total_bytes = 0, total_packets = 0;
5014 unsigned int i, eop, count = 0;
5015 bool cleaned = false;
5017 i = tx_ring->next_to_clean;
5018 eop = tx_ring->buffer_info[i].next_to_watch;
5019 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5021 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5022 (count < tx_ring->count)) {
5023 for (cleaned = false; !cleaned; count++) {
5024 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5025 buffer_info = &tx_ring->buffer_info[i];
5026 cleaned = (i == eop);
5027 skb = buffer_info->skb;
5030 unsigned int segs, bytecount;
5031 /* gso_segs is currently only valid for tcp */
5032 segs = buffer_info->gso_segs;
5033 /* multiply data chunks by size of headers */
5034 bytecount = ((segs - 1) * skb_headlen(skb)) +
5036 total_packets += segs;
5037 total_bytes += bytecount;
5039 igb_tx_hwtstamp(q_vector, skb);
5042 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5043 tx_desc->wb.status = 0;
5046 if (i == tx_ring->count)
5049 eop = tx_ring->buffer_info[i].next_to_watch;
5050 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5053 tx_ring->next_to_clean = i;
5055 if (unlikely(count &&
5056 netif_carrier_ok(netdev) &&
5057 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5058 /* Make sure that anybody stopping the queue after this
5059 * sees the new next_to_clean.
5062 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5063 !(test_bit(__IGB_DOWN, &adapter->state))) {
5064 netif_wake_subqueue(netdev, tx_ring->queue_index);
5065 tx_ring->tx_stats.restart_queue++;
5069 if (tx_ring->detect_tx_hung) {
5070 /* Detect a transmit hang in hardware, this serializes the
5071 * check with the clearing of time_stamp and movement of i */
5072 tx_ring->detect_tx_hung = false;
5073 if (tx_ring->buffer_info[i].time_stamp &&
5074 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5075 (adapter->tx_timeout_factor * HZ)) &&
5076 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5078 /* detected Tx unit hang */
5079 dev_err(&tx_ring->pdev->dev,
5080 "Detected Tx Unit Hang\n"
5084 " next_to_use <%x>\n"
5085 " next_to_clean <%x>\n"
5086 "buffer_info[next_to_clean]\n"
5087 " time_stamp <%lx>\n"
5088 " next_to_watch <%x>\n"
5090 " desc.status <%x>\n",
5091 tx_ring->queue_index,
5092 readl(tx_ring->head),
5093 readl(tx_ring->tail),
5094 tx_ring->next_to_use,
5095 tx_ring->next_to_clean,
5096 tx_ring->buffer_info[eop].time_stamp,
5099 eop_desc->wb.status);
5100 netif_stop_subqueue(netdev, tx_ring->queue_index);
5103 tx_ring->total_bytes += total_bytes;
5104 tx_ring->total_packets += total_packets;
5105 tx_ring->tx_stats.bytes += total_bytes;
5106 tx_ring->tx_stats.packets += total_packets;
5107 return (count < tx_ring->count);
5111 * igb_receive_skb - helper function to handle rx indications
5112 * @q_vector: structure containing interrupt and ring information
5113 * @skb: packet to send up
5114 * @vlan_tag: vlan tag for packet
5116 static void igb_receive_skb(struct igb_q_vector *q_vector,
5117 struct sk_buff *skb,
5120 struct igb_adapter *adapter = q_vector->adapter;
5123 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5126 napi_gro_receive(&q_vector->napi, skb);
5129 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5130 u32 status_err, struct sk_buff *skb)
5132 skb->ip_summed = CHECKSUM_NONE;
5134 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5135 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5136 (status_err & E1000_RXD_STAT_IXSM))
5139 /* TCP/UDP checksum error bit is set */
5141 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5143 * work around errata with sctp packets where the TCPE aka
5144 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5145 * packets, (aka let the stack check the crc32c)
5147 if ((skb->len == 60) &&
5148 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5149 ring->rx_stats.csum_err++;
5151 /* let the stack verify checksum errors */
5154 /* It must be a TCP or UDP packet with a valid checksum */
5155 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5156 skb->ip_summed = CHECKSUM_UNNECESSARY;
5158 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5161 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5162 struct sk_buff *skb)
5164 struct igb_adapter *adapter = q_vector->adapter;
5165 struct e1000_hw *hw = &adapter->hw;
5169 * If this bit is set, then the RX registers contain the time stamp. No
5170 * other packet will be time stamped until we read these registers, so
5171 * read the registers to make them available again. Because only one
5172 * packet can be time stamped at a time, we know that the register
5173 * values must belong to this one here and therefore we don't need to
5174 * compare any of the additional attributes stored for it.
5176 * If nothing went wrong, then it should have a skb_shared_tx that we
5177 * can turn into a skb_shared_hwtstamps.
5179 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5181 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5184 regval = rd32(E1000_RXSTMPL);
5185 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5187 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5189 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5190 union e1000_adv_rx_desc *rx_desc)
5192 /* HW will not DMA in data larger than the given buffer, even if it
5193 * parses the (NFS, of course) header to be larger. In that case, it
5194 * fills the header buffer and spills the rest into the page.
5196 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5197 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5198 if (hlen > rx_ring->rx_buffer_len)
5199 hlen = rx_ring->rx_buffer_len;
5203 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5204 int *work_done, int budget)
5206 struct igb_ring *rx_ring = q_vector->rx_ring;
5207 struct net_device *netdev = rx_ring->netdev;
5208 struct pci_dev *pdev = rx_ring->pdev;
5209 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5210 struct igb_buffer *buffer_info , *next_buffer;
5211 struct sk_buff *skb;
5212 bool cleaned = false;
5213 int cleaned_count = 0;
5214 int current_node = numa_node_id();
5215 unsigned int total_bytes = 0, total_packets = 0;
5221 i = rx_ring->next_to_clean;
5222 buffer_info = &rx_ring->buffer_info[i];
5223 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5224 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5226 while (staterr & E1000_RXD_STAT_DD) {
5227 if (*work_done >= budget)
5231 skb = buffer_info->skb;
5232 prefetch(skb->data - NET_IP_ALIGN);
5233 buffer_info->skb = NULL;
5236 if (i == rx_ring->count)
5239 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5241 next_buffer = &rx_ring->buffer_info[i];
5243 length = le16_to_cpu(rx_desc->wb.upper.length);
5247 if (buffer_info->dma) {
5248 pci_unmap_single(pdev, buffer_info->dma,
5249 rx_ring->rx_buffer_len,
5250 PCI_DMA_FROMDEVICE);
5251 buffer_info->dma = 0;
5252 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5253 skb_put(skb, length);
5256 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5260 pci_unmap_page(pdev, buffer_info->page_dma,
5261 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5262 buffer_info->page_dma = 0;
5264 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5266 buffer_info->page_offset,
5269 if ((page_count(buffer_info->page) != 1) ||
5270 (page_to_nid(buffer_info->page) != current_node))
5271 buffer_info->page = NULL;
5273 get_page(buffer_info->page);
5276 skb->data_len += length;
5277 skb->truesize += length;
5280 if (!(staterr & E1000_RXD_STAT_EOP)) {
5281 buffer_info->skb = next_buffer->skb;
5282 buffer_info->dma = next_buffer->dma;
5283 next_buffer->skb = skb;
5284 next_buffer->dma = 0;
5288 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5289 dev_kfree_skb_irq(skb);
5293 igb_rx_hwtstamp(q_vector, staterr, skb);
5294 total_bytes += skb->len;
5297 igb_rx_checksum_adv(rx_ring, staterr, skb);
5299 skb->protocol = eth_type_trans(skb, netdev);
5300 skb_record_rx_queue(skb, rx_ring->queue_index);
5302 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5303 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5305 igb_receive_skb(q_vector, skb, vlan_tag);
5308 rx_desc->wb.upper.status_error = 0;
5310 /* return some buffers to hardware, one at a time is too slow */
5311 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5312 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5316 /* use prefetched values */
5318 buffer_info = next_buffer;
5319 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5322 rx_ring->next_to_clean = i;
5323 cleaned_count = igb_desc_unused(rx_ring);
5326 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5328 rx_ring->total_packets += total_packets;
5329 rx_ring->total_bytes += total_bytes;
5330 rx_ring->rx_stats.packets += total_packets;
5331 rx_ring->rx_stats.bytes += total_bytes;
5336 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5337 * @adapter: address of board private structure
5339 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5341 struct net_device *netdev = rx_ring->netdev;
5342 union e1000_adv_rx_desc *rx_desc;
5343 struct igb_buffer *buffer_info;
5344 struct sk_buff *skb;
5348 i = rx_ring->next_to_use;
5349 buffer_info = &rx_ring->buffer_info[i];
5351 bufsz = rx_ring->rx_buffer_len;
5353 while (cleaned_count--) {
5354 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5356 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5357 if (!buffer_info->page) {
5358 buffer_info->page = netdev_alloc_page(netdev);
5359 if (!buffer_info->page) {
5360 rx_ring->rx_stats.alloc_failed++;
5363 buffer_info->page_offset = 0;
5365 buffer_info->page_offset ^= PAGE_SIZE / 2;
5367 buffer_info->page_dma =
5368 pci_map_page(rx_ring->pdev, buffer_info->page,
5369 buffer_info->page_offset,
5371 PCI_DMA_FROMDEVICE);
5372 if (pci_dma_mapping_error(rx_ring->pdev,
5373 buffer_info->page_dma)) {
5374 buffer_info->page_dma = 0;
5375 rx_ring->rx_stats.alloc_failed++;
5380 skb = buffer_info->skb;
5382 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5384 rx_ring->rx_stats.alloc_failed++;
5388 buffer_info->skb = skb;
5390 if (!buffer_info->dma) {
5391 buffer_info->dma = pci_map_single(rx_ring->pdev,
5394 PCI_DMA_FROMDEVICE);
5395 if (pci_dma_mapping_error(rx_ring->pdev,
5396 buffer_info->dma)) {
5397 buffer_info->dma = 0;
5398 rx_ring->rx_stats.alloc_failed++;
5402 /* Refresh the desc even if buffer_addrs didn't change because
5403 * each write-back erases this info. */
5404 if (bufsz < IGB_RXBUFFER_1024) {
5405 rx_desc->read.pkt_addr =
5406 cpu_to_le64(buffer_info->page_dma);
5407 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5409 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5410 rx_desc->read.hdr_addr = 0;
5414 if (i == rx_ring->count)
5416 buffer_info = &rx_ring->buffer_info[i];
5420 if (rx_ring->next_to_use != i) {
5421 rx_ring->next_to_use = i;
5423 i = (rx_ring->count - 1);
5427 /* Force memory writes to complete before letting h/w
5428 * know there are new descriptors to fetch. (Only
5429 * applicable for weak-ordered memory model archs,
5430 * such as IA-64). */
5432 writel(i, rx_ring->tail);
5442 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5444 struct igb_adapter *adapter = netdev_priv(netdev);
5445 struct mii_ioctl_data *data = if_mii(ifr);
5447 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5452 data->phy_id = adapter->hw.phy.addr;
5455 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5467 * igb_hwtstamp_ioctl - control hardware time stamping
5472 * Outgoing time stamping can be enabled and disabled. Play nice and
5473 * disable it when requested, although it shouldn't case any overhead
5474 * when no packet needs it. At most one packet in the queue may be
5475 * marked for time stamping, otherwise it would be impossible to tell
5476 * for sure to which packet the hardware time stamp belongs.
5478 * Incoming time stamping has to be configured via the hardware
5479 * filters. Not all combinations are supported, in particular event
5480 * type has to be specified. Matching the kind of event packet is
5481 * not supported, with the exception of "all V2 events regardless of
5485 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5486 struct ifreq *ifr, int cmd)
5488 struct igb_adapter *adapter = netdev_priv(netdev);
5489 struct e1000_hw *hw = &adapter->hw;
5490 struct hwtstamp_config config;
5491 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5492 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5493 u32 tsync_rx_cfg = 0;
5498 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5501 /* reserved for future extensions */
5505 switch (config.tx_type) {
5506 case HWTSTAMP_TX_OFF:
5508 case HWTSTAMP_TX_ON:
5514 switch (config.rx_filter) {
5515 case HWTSTAMP_FILTER_NONE:
5518 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5519 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5520 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5521 case HWTSTAMP_FILTER_ALL:
5523 * register TSYNCRXCFG must be set, therefore it is not
5524 * possible to time stamp both Sync and Delay_Req messages
5525 * => fall back to time stamping all packets
5527 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5528 config.rx_filter = HWTSTAMP_FILTER_ALL;
5530 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5531 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5532 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5535 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5536 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5537 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5540 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5541 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5542 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5543 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5546 config.rx_filter = HWTSTAMP_FILTER_SOME;
5548 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5549 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5550 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5551 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5554 config.rx_filter = HWTSTAMP_FILTER_SOME;
5556 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5557 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5558 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5559 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5560 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5567 if (hw->mac.type == e1000_82575) {
5568 if (tsync_rx_ctl | tsync_tx_ctl)
5573 /* enable/disable TX */
5574 regval = rd32(E1000_TSYNCTXCTL);
5575 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5576 regval |= tsync_tx_ctl;
5577 wr32(E1000_TSYNCTXCTL, regval);
5579 /* enable/disable RX */
5580 regval = rd32(E1000_TSYNCRXCTL);
5581 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5582 regval |= tsync_rx_ctl;
5583 wr32(E1000_TSYNCRXCTL, regval);
5585 /* define which PTP packets are time stamped */
5586 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5588 /* define ethertype filter for timestamped packets */
5591 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5592 E1000_ETQF_1588 | /* enable timestamping */
5593 ETH_P_1588)); /* 1588 eth protocol type */
5595 wr32(E1000_ETQF(3), 0);
5597 #define PTP_PORT 319
5598 /* L4 Queue Filter[3]: filter by destination port and protocol */
5600 u32 ftqf = (IPPROTO_UDP /* UDP */
5601 | E1000_FTQF_VF_BP /* VF not compared */
5602 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5603 | E1000_FTQF_MASK); /* mask all inputs */
5604 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5606 wr32(E1000_IMIR(3), htons(PTP_PORT));
5607 wr32(E1000_IMIREXT(3),
5608 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5609 if (hw->mac.type == e1000_82576) {
5610 /* enable source port check */
5611 wr32(E1000_SPQF(3), htons(PTP_PORT));
5612 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5614 wr32(E1000_FTQF(3), ftqf);
5616 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5620 adapter->hwtstamp_config = config;
5622 /* clear TX/RX time stamp registers, just to be sure */
5623 regval = rd32(E1000_TXSTMPH);
5624 regval = rd32(E1000_RXSTMPH);
5626 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5636 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5642 return igb_mii_ioctl(netdev, ifr, cmd);
5644 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5650 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5652 struct igb_adapter *adapter = hw->back;
5655 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5657 return -E1000_ERR_CONFIG;
5659 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5664 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5666 struct igb_adapter *adapter = hw->back;
5669 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5671 return -E1000_ERR_CONFIG;
5673 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5678 static void igb_vlan_rx_register(struct net_device *netdev,
5679 struct vlan_group *grp)
5681 struct igb_adapter *adapter = netdev_priv(netdev);
5682 struct e1000_hw *hw = &adapter->hw;
5685 igb_irq_disable(adapter);
5686 adapter->vlgrp = grp;
5689 /* enable VLAN tag insert/strip */
5690 ctrl = rd32(E1000_CTRL);
5691 ctrl |= E1000_CTRL_VME;
5692 wr32(E1000_CTRL, ctrl);
5694 /* Disable CFI check */
5695 rctl = rd32(E1000_RCTL);
5696 rctl &= ~E1000_RCTL_CFIEN;
5697 wr32(E1000_RCTL, rctl);
5699 /* disable VLAN tag insert/strip */
5700 ctrl = rd32(E1000_CTRL);
5701 ctrl &= ~E1000_CTRL_VME;
5702 wr32(E1000_CTRL, ctrl);
5705 igb_rlpml_set(adapter);
5707 if (!test_bit(__IGB_DOWN, &adapter->state))
5708 igb_irq_enable(adapter);
5711 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5713 struct igb_adapter *adapter = netdev_priv(netdev);
5714 struct e1000_hw *hw = &adapter->hw;
5715 int pf_id = adapter->vfs_allocated_count;
5717 /* attempt to add filter to vlvf array */
5718 igb_vlvf_set(adapter, vid, true, pf_id);
5720 /* add the filter since PF can receive vlans w/o entry in vlvf */
5721 igb_vfta_set(hw, vid, true);
5724 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5726 struct igb_adapter *adapter = netdev_priv(netdev);
5727 struct e1000_hw *hw = &adapter->hw;
5728 int pf_id = adapter->vfs_allocated_count;
5731 igb_irq_disable(adapter);
5732 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5734 if (!test_bit(__IGB_DOWN, &adapter->state))
5735 igb_irq_enable(adapter);
5737 /* remove vlan from VLVF table array */
5738 err = igb_vlvf_set(adapter, vid, false, pf_id);
5740 /* if vid was not present in VLVF just remove it from table */
5742 igb_vfta_set(hw, vid, false);
5745 static void igb_restore_vlan(struct igb_adapter *adapter)
5747 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5749 if (adapter->vlgrp) {
5751 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5752 if (!vlan_group_get_device(adapter->vlgrp, vid))
5754 igb_vlan_rx_add_vid(adapter->netdev, vid);
5759 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5761 struct pci_dev *pdev = adapter->pdev;
5762 struct e1000_mac_info *mac = &adapter->hw.mac;
5767 case SPEED_10 + DUPLEX_HALF:
5768 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5770 case SPEED_10 + DUPLEX_FULL:
5771 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5773 case SPEED_100 + DUPLEX_HALF:
5774 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5776 case SPEED_100 + DUPLEX_FULL:
5777 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5779 case SPEED_1000 + DUPLEX_FULL:
5781 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5783 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5785 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5791 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5793 struct net_device *netdev = pci_get_drvdata(pdev);
5794 struct igb_adapter *adapter = netdev_priv(netdev);
5795 struct e1000_hw *hw = &adapter->hw;
5796 u32 ctrl, rctl, status;
5797 u32 wufc = adapter->wol;
5802 netif_device_detach(netdev);
5804 if (netif_running(netdev))
5807 igb_clear_interrupt_scheme(adapter);
5810 retval = pci_save_state(pdev);
5815 status = rd32(E1000_STATUS);
5816 if (status & E1000_STATUS_LU)
5817 wufc &= ~E1000_WUFC_LNKC;
5820 igb_setup_rctl(adapter);
5821 igb_set_rx_mode(netdev);
5823 /* turn on all-multi mode if wake on multicast is enabled */
5824 if (wufc & E1000_WUFC_MC) {
5825 rctl = rd32(E1000_RCTL);
5826 rctl |= E1000_RCTL_MPE;
5827 wr32(E1000_RCTL, rctl);
5830 ctrl = rd32(E1000_CTRL);
5831 /* advertise wake from D3Cold */
5832 #define E1000_CTRL_ADVD3WUC 0x00100000
5833 /* phy power management enable */
5834 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5835 ctrl |= E1000_CTRL_ADVD3WUC;
5836 wr32(E1000_CTRL, ctrl);
5838 /* Allow time for pending master requests to run */
5839 igb_disable_pcie_master(hw);
5841 wr32(E1000_WUC, E1000_WUC_PME_EN);
5842 wr32(E1000_WUFC, wufc);
5845 wr32(E1000_WUFC, 0);
5848 *enable_wake = wufc || adapter->en_mng_pt;
5850 igb_power_down_link(adapter);
5852 igb_power_up_link(adapter);
5854 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5855 * would have already happened in close and is redundant. */
5856 igb_release_hw_control(adapter);
5858 pci_disable_device(pdev);
5864 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5869 retval = __igb_shutdown(pdev, &wake);
5874 pci_prepare_to_sleep(pdev);
5876 pci_wake_from_d3(pdev, false);
5877 pci_set_power_state(pdev, PCI_D3hot);
5883 static int igb_resume(struct pci_dev *pdev)
5885 struct net_device *netdev = pci_get_drvdata(pdev);
5886 struct igb_adapter *adapter = netdev_priv(netdev);
5887 struct e1000_hw *hw = &adapter->hw;
5890 pci_set_power_state(pdev, PCI_D0);
5891 pci_restore_state(pdev);
5892 pci_save_state(pdev);
5894 err = pci_enable_device_mem(pdev);
5897 "igb: Cannot enable PCI device from suspend\n");
5900 pci_set_master(pdev);
5902 pci_enable_wake(pdev, PCI_D3hot, 0);
5903 pci_enable_wake(pdev, PCI_D3cold, 0);
5905 if (igb_init_interrupt_scheme(adapter)) {
5906 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5912 /* let the f/w know that the h/w is now under the control of the
5914 igb_get_hw_control(adapter);
5916 wr32(E1000_WUS, ~0);
5918 if (netif_running(netdev)) {
5919 err = igb_open(netdev);
5924 netif_device_attach(netdev);
5930 static void igb_shutdown(struct pci_dev *pdev)
5934 __igb_shutdown(pdev, &wake);
5936 if (system_state == SYSTEM_POWER_OFF) {
5937 pci_wake_from_d3(pdev, wake);
5938 pci_set_power_state(pdev, PCI_D3hot);
5942 #ifdef CONFIG_NET_POLL_CONTROLLER
5944 * Polling 'interrupt' - used by things like netconsole to send skbs
5945 * without having to re-enable interrupts. It's not called while
5946 * the interrupt routine is executing.
5948 static void igb_netpoll(struct net_device *netdev)
5950 struct igb_adapter *adapter = netdev_priv(netdev);
5951 struct e1000_hw *hw = &adapter->hw;
5954 if (!adapter->msix_entries) {
5955 struct igb_q_vector *q_vector = adapter->q_vector[0];
5956 igb_irq_disable(adapter);
5957 napi_schedule(&q_vector->napi);
5961 for (i = 0; i < adapter->num_q_vectors; i++) {
5962 struct igb_q_vector *q_vector = adapter->q_vector[i];
5963 wr32(E1000_EIMC, q_vector->eims_value);
5964 napi_schedule(&q_vector->napi);
5967 #endif /* CONFIG_NET_POLL_CONTROLLER */
5970 * igb_io_error_detected - called when PCI error is detected
5971 * @pdev: Pointer to PCI device
5972 * @state: The current pci connection state
5974 * This function is called after a PCI bus error affecting
5975 * this device has been detected.
5977 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5978 pci_channel_state_t state)
5980 struct net_device *netdev = pci_get_drvdata(pdev);
5981 struct igb_adapter *adapter = netdev_priv(netdev);
5983 netif_device_detach(netdev);
5985 if (state == pci_channel_io_perm_failure)
5986 return PCI_ERS_RESULT_DISCONNECT;
5988 if (netif_running(netdev))
5990 pci_disable_device(pdev);
5992 /* Request a slot slot reset. */
5993 return PCI_ERS_RESULT_NEED_RESET;
5997 * igb_io_slot_reset - called after the pci bus has been reset.
5998 * @pdev: Pointer to PCI device
6000 * Restart the card from scratch, as if from a cold-boot. Implementation
6001 * resembles the first-half of the igb_resume routine.
6003 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6005 struct net_device *netdev = pci_get_drvdata(pdev);
6006 struct igb_adapter *adapter = netdev_priv(netdev);
6007 struct e1000_hw *hw = &adapter->hw;
6008 pci_ers_result_t result;
6011 if (pci_enable_device_mem(pdev)) {
6013 "Cannot re-enable PCI device after reset.\n");
6014 result = PCI_ERS_RESULT_DISCONNECT;
6016 pci_set_master(pdev);
6017 pci_restore_state(pdev);
6018 pci_save_state(pdev);
6020 pci_enable_wake(pdev, PCI_D3hot, 0);
6021 pci_enable_wake(pdev, PCI_D3cold, 0);
6024 wr32(E1000_WUS, ~0);
6025 result = PCI_ERS_RESULT_RECOVERED;
6028 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6030 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6031 "failed 0x%0x\n", err);
6032 /* non-fatal, continue */
6039 * igb_io_resume - called when traffic can start flowing again.
6040 * @pdev: Pointer to PCI device
6042 * This callback is called when the error recovery driver tells us that
6043 * its OK to resume normal operation. Implementation resembles the
6044 * second-half of the igb_resume routine.
6046 static void igb_io_resume(struct pci_dev *pdev)
6048 struct net_device *netdev = pci_get_drvdata(pdev);
6049 struct igb_adapter *adapter = netdev_priv(netdev);
6051 if (netif_running(netdev)) {
6052 if (igb_up(adapter)) {
6053 dev_err(&pdev->dev, "igb_up failed after reset\n");
6058 netif_device_attach(netdev);
6060 /* let the f/w know that the h/w is now under the control of the
6062 igb_get_hw_control(adapter);
6065 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6068 u32 rar_low, rar_high;
6069 struct e1000_hw *hw = &adapter->hw;
6071 /* HW expects these in little endian so we reverse the byte order
6072 * from network order (big endian) to little endian
6074 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6075 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6076 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6078 /* Indicate to hardware the Address is Valid. */
6079 rar_high |= E1000_RAH_AV;
6081 if (hw->mac.type == e1000_82575)
6082 rar_high |= E1000_RAH_POOL_1 * qsel;
6084 rar_high |= E1000_RAH_POOL_1 << qsel;
6086 wr32(E1000_RAL(index), rar_low);
6088 wr32(E1000_RAH(index), rar_high);
6092 static int igb_set_vf_mac(struct igb_adapter *adapter,
6093 int vf, unsigned char *mac_addr)
6095 struct e1000_hw *hw = &adapter->hw;
6096 /* VF MAC addresses start at end of receive addresses and moves
6097 * torwards the first, as a result a collision should not be possible */
6098 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6100 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6102 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6107 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6109 struct igb_adapter *adapter = netdev_priv(netdev);
6110 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6112 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6113 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6114 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6115 " change effective.");
6116 if (test_bit(__IGB_DOWN, &adapter->state)) {
6117 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6118 " but the PF device is not up.\n");
6119 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6120 " attempting to use the VF device.\n");
6122 return igb_set_vf_mac(adapter, vf, mac);
6125 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6130 static int igb_ndo_get_vf_config(struct net_device *netdev,
6131 int vf, struct ifla_vf_info *ivi)
6133 struct igb_adapter *adapter = netdev_priv(netdev);
6134 if (vf >= adapter->vfs_allocated_count)
6137 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6139 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6140 ivi->qos = adapter->vf_data[vf].pf_qos;
6144 static void igb_vmm_control(struct igb_adapter *adapter)
6146 struct e1000_hw *hw = &adapter->hw;
6149 /* replication is not supported for 82575 */
6150 if (hw->mac.type == e1000_82575)
6153 /* enable replication vlan tag stripping */
6154 reg = rd32(E1000_RPLOLR);
6155 reg |= E1000_RPLOLR_STRVLAN;
6156 wr32(E1000_RPLOLR, reg);
6158 /* notify HW that the MAC is adding vlan tags */
6159 reg = rd32(E1000_DTXCTL);
6160 reg |= E1000_DTXCTL_VLAN_ADDED;
6161 wr32(E1000_DTXCTL, reg);
6163 if (adapter->vfs_allocated_count) {
6164 igb_vmdq_set_loopback_pf(hw, true);
6165 igb_vmdq_set_replication_pf(hw, true);
6167 igb_vmdq_set_loopback_pf(hw, false);
6168 igb_vmdq_set_replication_pf(hw, false);