]> Pileus Git - ~andy/linux/blob - drivers/net/igb/igb_main.c
igb: remove unused vmolr value
[~andy/linux] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
322                                                                Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
325                                                                Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         int i;
341
342         for (i = 0; i < adapter->num_tx_queues; i++) {
343                 kfree(adapter->tx_ring[i]);
344                 adapter->tx_ring[i] = NULL;
345         }
346         for (i = 0; i < adapter->num_rx_queues; i++) {
347                 kfree(adapter->rx_ring[i]);
348                 adapter->rx_ring[i] = NULL;
349         }
350         adapter->num_rx_queues = 0;
351         adapter->num_tx_queues = 0;
352 }
353
354 /**
355  * igb_alloc_queues - Allocate memory for all rings
356  * @adapter: board private structure to initialize
357  *
358  * We allocate one ring per queue at run-time since we don't know the
359  * number of queues at compile-time.
360  **/
361 static int igb_alloc_queues(struct igb_adapter *adapter)
362 {
363         struct igb_ring *ring;
364         int i;
365
366         for (i = 0; i < adapter->num_tx_queues; i++) {
367                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
368                 if (!ring)
369                         goto err;
370                 ring->count = adapter->tx_ring_count;
371                 ring->queue_index = i;
372                 ring->pdev = adapter->pdev;
373                 ring->netdev = adapter->netdev;
374                 /* For 82575, context index must be unique per ring. */
375                 if (adapter->hw.mac.type == e1000_82575)
376                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377                 adapter->tx_ring[i] = ring;
378         }
379
380         for (i = 0; i < adapter->num_rx_queues; i++) {
381                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
382                 if (!ring)
383                         goto err;
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393                 adapter->rx_ring[i] = ring;
394         }
395
396         igb_cache_ring_register(adapter);
397
398         return 0;
399
400 err:
401         igb_free_queues(adapter);
402
403         return -ENOMEM;
404 }
405
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
408 {
409         u32 msixbm = 0;
410         struct igb_adapter *adapter = q_vector->adapter;
411         struct e1000_hw *hw = &adapter->hw;
412         u32 ivar, index;
413         int rx_queue = IGB_N0_QUEUE;
414         int tx_queue = IGB_N0_QUEUE;
415
416         if (q_vector->rx_ring)
417                 rx_queue = q_vector->rx_ring->reg_idx;
418         if (q_vector->tx_ring)
419                 tx_queue = q_vector->tx_ring->reg_idx;
420
421         switch (hw->mac.type) {
422         case e1000_82575:
423                 /* The 82575 assigns vectors using a bitmask, which matches the
424                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
425                    or more queues to a vector, we write the appropriate bits
426                    into the MSIXBM register for that vector. */
427                 if (rx_queue > IGB_N0_QUEUE)
428                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429                 if (tx_queue > IGB_N0_QUEUE)
430                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431                 if (!adapter->msix_entries && msix_vector == 0)
432                         msixbm |= E1000_EIMS_OTHER;
433                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434                 q_vector->eims_value = msixbm;
435                 break;
436         case e1000_82576:
437                 /* 82576 uses a table-based method for assigning vectors.
438                    Each queue has a single entry in the table to which we write
439                    a vector number along with a "valid" bit.  Sadly, the layout
440                    of the table is somewhat counterintuitive. */
441                 if (rx_queue > IGB_N0_QUEUE) {
442                         index = (rx_queue & 0x7);
443                         ivar = array_rd32(E1000_IVAR0, index);
444                         if (rx_queue < 8) {
445                                 /* vector goes into low byte of register */
446                                 ivar = ivar & 0xFFFFFF00;
447                                 ivar |= msix_vector | E1000_IVAR_VALID;
448                         } else {
449                                 /* vector goes into third byte of register */
450                                 ivar = ivar & 0xFF00FFFF;
451                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452                         }
453                         array_wr32(E1000_IVAR0, index, ivar);
454                 }
455                 if (tx_queue > IGB_N0_QUEUE) {
456                         index = (tx_queue & 0x7);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (tx_queue < 8) {
459                                 /* vector goes into second byte of register */
460                                 ivar = ivar & 0xFFFF00FF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462                         } else {
463                                 /* vector goes into high byte of register */
464                                 ivar = ivar & 0x00FFFFFF;
465                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 q_vector->eims_value = 1 << msix_vector;
470                 break;
471         case e1000_82580:
472                 /* 82580 uses the same table-based approach as 82576 but has fewer
473                    entries as a result we carry over for queues greater than 4. */
474                 if (rx_queue > IGB_N0_QUEUE) {
475                         index = (rx_queue >> 1);
476                         ivar = array_rd32(E1000_IVAR0, index);
477                         if (rx_queue & 0x1) {
478                                 /* vector goes into third byte of register */
479                                 ivar = ivar & 0xFF00FFFF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481                         } else {
482                                 /* vector goes into low byte of register */
483                                 ivar = ivar & 0xFFFFFF00;
484                                 ivar |= msix_vector | E1000_IVAR_VALID;
485                         }
486                         array_wr32(E1000_IVAR0, index, ivar);
487                 }
488                 if (tx_queue > IGB_N0_QUEUE) {
489                         index = (tx_queue >> 1);
490                         ivar = array_rd32(E1000_IVAR0, index);
491                         if (tx_queue & 0x1) {
492                                 /* vector goes into high byte of register */
493                                 ivar = ivar & 0x00FFFFFF;
494                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495                         } else {
496                                 /* vector goes into second byte of register */
497                                 ivar = ivar & 0xFFFF00FF;
498                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499                         }
500                         array_wr32(E1000_IVAR0, index, ivar);
501                 }
502                 q_vector->eims_value = 1 << msix_vector;
503                 break;
504         default:
505                 BUG();
506                 break;
507         }
508
509         /* add q_vector eims value to global eims_enable_mask */
510         adapter->eims_enable_mask |= q_vector->eims_value;
511
512         /* configure q_vector to set itr on first interrupt */
513         q_vector->set_itr = 1;
514 }
515
516 /**
517  * igb_configure_msix - Configure MSI-X hardware
518  *
519  * igb_configure_msix sets up the hardware to properly
520  * generate MSI-X interrupts.
521  **/
522 static void igb_configure_msix(struct igb_adapter *adapter)
523 {
524         u32 tmp;
525         int i, vector = 0;
526         struct e1000_hw *hw = &adapter->hw;
527
528         adapter->eims_enable_mask = 0;
529
530         /* set vector for other causes, i.e. link changes */
531         switch (hw->mac.type) {
532         case e1000_82575:
533                 tmp = rd32(E1000_CTRL_EXT);
534                 /* enable MSI-X PBA support*/
535                 tmp |= E1000_CTRL_EXT_PBA_CLR;
536
537                 /* Auto-Mask interrupts upon ICR read. */
538                 tmp |= E1000_CTRL_EXT_EIAME;
539                 tmp |= E1000_CTRL_EXT_IRCA;
540
541                 wr32(E1000_CTRL_EXT, tmp);
542
543                 /* enable msix_other interrupt */
544                 array_wr32(E1000_MSIXBM(0), vector++,
545                                       E1000_EIMS_OTHER);
546                 adapter->eims_other = E1000_EIMS_OTHER;
547
548                 break;
549
550         case e1000_82576:
551         case e1000_82580:
552                 /* Turn on MSI-X capability first, or our settings
553                  * won't stick.  And it will take days to debug. */
554                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
556                                 E1000_GPIE_NSICR);
557
558                 /* enable msix_other interrupt */
559                 adapter->eims_other = 1 << vector;
560                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561
562                 wr32(E1000_IVAR_MISC, tmp);
563                 break;
564         default:
565                 /* do nothing, since nothing else supports MSI-X */
566                 break;
567         } /* switch (hw->mac.type) */
568
569         adapter->eims_enable_mask |= adapter->eims_other;
570
571         for (i = 0; i < adapter->num_q_vectors; i++)
572                 igb_assign_vector(adapter->q_vector[i], vector++);
573
574         wrfl();
575 }
576
577 /**
578  * igb_request_msix - Initialize MSI-X interrupts
579  *
580  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
581  * kernel.
582  **/
583 static int igb_request_msix(struct igb_adapter *adapter)
584 {
585         struct net_device *netdev = adapter->netdev;
586         struct e1000_hw *hw = &adapter->hw;
587         int i, err = 0, vector = 0;
588
589         err = request_irq(adapter->msix_entries[vector].vector,
590                           igb_msix_other, 0, netdev->name, adapter);
591         if (err)
592                 goto out;
593         vector++;
594
595         for (i = 0; i < adapter->num_q_vectors; i++) {
596                 struct igb_q_vector *q_vector = adapter->q_vector[i];
597
598                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599
600                 if (q_vector->rx_ring && q_vector->tx_ring)
601                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602                                 q_vector->rx_ring->queue_index);
603                 else if (q_vector->tx_ring)
604                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605                                 q_vector->tx_ring->queue_index);
606                 else if (q_vector->rx_ring)
607                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608                                 q_vector->rx_ring->queue_index);
609                 else
610                         sprintf(q_vector->name, "%s-unused", netdev->name);
611
612                 err = request_irq(adapter->msix_entries[vector].vector,
613                                   igb_msix_ring, 0, q_vector->name,
614                                   q_vector);
615                 if (err)
616                         goto out;
617                 vector++;
618         }
619
620         igb_configure_msix(adapter);
621         return 0;
622 out:
623         return err;
624 }
625
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 {
628         if (adapter->msix_entries) {
629                 pci_disable_msix(adapter->pdev);
630                 kfree(adapter->msix_entries);
631                 adapter->msix_entries = NULL;
632         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633                 pci_disable_msi(adapter->pdev);
634         }
635 }
636
637 /**
638  * igb_free_q_vectors - Free memory allocated for interrupt vectors
639  * @adapter: board private structure to initialize
640  *
641  * This function frees the memory allocated to the q_vectors.  In addition if
642  * NAPI is enabled it will delete any references to the NAPI struct prior
643  * to freeing the q_vector.
644  **/
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
646 {
647         int v_idx;
648
649         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651                 adapter->q_vector[v_idx] = NULL;
652                 netif_napi_del(&q_vector->napi);
653                 kfree(q_vector);
654         }
655         adapter->num_q_vectors = 0;
656 }
657
658 /**
659  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
660  *
661  * This function resets the device so that it has 0 rx queues, tx queues, and
662  * MSI-X interrupts allocated.
663  */
664 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
665 {
666         igb_free_queues(adapter);
667         igb_free_q_vectors(adapter);
668         igb_reset_interrupt_capability(adapter);
669 }
670
671 /**
672  * igb_set_interrupt_capability - set MSI or MSI-X if supported
673  *
674  * Attempt to configure interrupts using the best available
675  * capabilities of the hardware and kernel.
676  **/
677 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
678 {
679         int err;
680         int numvecs, i;
681
682         /* Number of supported queues. */
683         adapter->num_rx_queues = adapter->rss_queues;
684         adapter->num_tx_queues = adapter->rss_queues;
685
686         /* start with one vector for every rx queue */
687         numvecs = adapter->num_rx_queues;
688
689         /* if tx handler is seperate add 1 for every tx queue */
690         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
691                 numvecs += adapter->num_tx_queues;
692
693         /* store the number of vectors reserved for queues */
694         adapter->num_q_vectors = numvecs;
695
696         /* add 1 vector for link status interrupts */
697         numvecs++;
698         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
699                                         GFP_KERNEL);
700         if (!adapter->msix_entries)
701                 goto msi_only;
702
703         for (i = 0; i < numvecs; i++)
704                 adapter->msix_entries[i].entry = i;
705
706         err = pci_enable_msix(adapter->pdev,
707                               adapter->msix_entries,
708                               numvecs);
709         if (err == 0)
710                 goto out;
711
712         igb_reset_interrupt_capability(adapter);
713
714         /* If we can't do MSI-X, try MSI */
715 msi_only:
716 #ifdef CONFIG_PCI_IOV
717         /* disable SR-IOV for non MSI-X configurations */
718         if (adapter->vf_data) {
719                 struct e1000_hw *hw = &adapter->hw;
720                 /* disable iov and allow time for transactions to clear */
721                 pci_disable_sriov(adapter->pdev);
722                 msleep(500);
723
724                 kfree(adapter->vf_data);
725                 adapter->vf_data = NULL;
726                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
727                 msleep(100);
728                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
729         }
730 #endif
731         adapter->vfs_allocated_count = 0;
732         adapter->rss_queues = 1;
733         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
734         adapter->num_rx_queues = 1;
735         adapter->num_tx_queues = 1;
736         adapter->num_q_vectors = 1;
737         if (!pci_enable_msi(adapter->pdev))
738                 adapter->flags |= IGB_FLAG_HAS_MSI;
739 out:
740         /* Notify the stack of the (possibly) reduced Tx Queue count. */
741         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
742         return;
743 }
744
745 /**
746  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
747  * @adapter: board private structure to initialize
748  *
749  * We allocate one q_vector per queue interrupt.  If allocation fails we
750  * return -ENOMEM.
751  **/
752 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
753 {
754         struct igb_q_vector *q_vector;
755         struct e1000_hw *hw = &adapter->hw;
756         int v_idx;
757
758         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
759                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
760                 if (!q_vector)
761                         goto err_out;
762                 q_vector->adapter = adapter;
763                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
764                 q_vector->itr_val = IGB_START_ITR;
765                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
766                 adapter->q_vector[v_idx] = q_vector;
767         }
768         return 0;
769
770 err_out:
771         while (v_idx) {
772                 v_idx--;
773                 q_vector = adapter->q_vector[v_idx];
774                 netif_napi_del(&q_vector->napi);
775                 kfree(q_vector);
776                 adapter->q_vector[v_idx] = NULL;
777         }
778         return -ENOMEM;
779 }
780
781 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
782                                       int ring_idx, int v_idx)
783 {
784         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
785
786         q_vector->rx_ring = adapter->rx_ring[ring_idx];
787         q_vector->rx_ring->q_vector = q_vector;
788         q_vector->itr_val = adapter->rx_itr_setting;
789         if (q_vector->itr_val && q_vector->itr_val <= 3)
790                 q_vector->itr_val = IGB_START_ITR;
791 }
792
793 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
794                                       int ring_idx, int v_idx)
795 {
796         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
797
798         q_vector->tx_ring = adapter->tx_ring[ring_idx];
799         q_vector->tx_ring->q_vector = q_vector;
800         q_vector->itr_val = adapter->tx_itr_setting;
801         if (q_vector->itr_val && q_vector->itr_val <= 3)
802                 q_vector->itr_val = IGB_START_ITR;
803 }
804
805 /**
806  * igb_map_ring_to_vector - maps allocated queues to vectors
807  *
808  * This function maps the recently allocated queues to vectors.
809  **/
810 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
811 {
812         int i;
813         int v_idx = 0;
814
815         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
816             (adapter->num_q_vectors < adapter->num_tx_queues))
817                 return -ENOMEM;
818
819         if (adapter->num_q_vectors >=
820             (adapter->num_rx_queues + adapter->num_tx_queues)) {
821                 for (i = 0; i < adapter->num_rx_queues; i++)
822                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
823                 for (i = 0; i < adapter->num_tx_queues; i++)
824                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
825         } else {
826                 for (i = 0; i < adapter->num_rx_queues; i++) {
827                         if (i < adapter->num_tx_queues)
828                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
829                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
830                 }
831                 for (; i < adapter->num_tx_queues; i++)
832                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
833         }
834         return 0;
835 }
836
837 /**
838  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
839  *
840  * This function initializes the interrupts and allocates all of the queues.
841  **/
842 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
843 {
844         struct pci_dev *pdev = adapter->pdev;
845         int err;
846
847         igb_set_interrupt_capability(adapter);
848
849         err = igb_alloc_q_vectors(adapter);
850         if (err) {
851                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
852                 goto err_alloc_q_vectors;
853         }
854
855         err = igb_alloc_queues(adapter);
856         if (err) {
857                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
858                 goto err_alloc_queues;
859         }
860
861         err = igb_map_ring_to_vector(adapter);
862         if (err) {
863                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
864                 goto err_map_queues;
865         }
866
867
868         return 0;
869 err_map_queues:
870         igb_free_queues(adapter);
871 err_alloc_queues:
872         igb_free_q_vectors(adapter);
873 err_alloc_q_vectors:
874         igb_reset_interrupt_capability(adapter);
875         return err;
876 }
877
878 /**
879  * igb_request_irq - initialize interrupts
880  *
881  * Attempts to configure interrupts using the best available
882  * capabilities of the hardware and kernel.
883  **/
884 static int igb_request_irq(struct igb_adapter *adapter)
885 {
886         struct net_device *netdev = adapter->netdev;
887         struct pci_dev *pdev = adapter->pdev;
888         int err = 0;
889
890         if (adapter->msix_entries) {
891                 err = igb_request_msix(adapter);
892                 if (!err)
893                         goto request_done;
894                 /* fall back to MSI */
895                 igb_clear_interrupt_scheme(adapter);
896                 if (!pci_enable_msi(adapter->pdev))
897                         adapter->flags |= IGB_FLAG_HAS_MSI;
898                 igb_free_all_tx_resources(adapter);
899                 igb_free_all_rx_resources(adapter);
900                 adapter->num_tx_queues = 1;
901                 adapter->num_rx_queues = 1;
902                 adapter->num_q_vectors = 1;
903                 err = igb_alloc_q_vectors(adapter);
904                 if (err) {
905                         dev_err(&pdev->dev,
906                                 "Unable to allocate memory for vectors\n");
907                         goto request_done;
908                 }
909                 err = igb_alloc_queues(adapter);
910                 if (err) {
911                         dev_err(&pdev->dev,
912                                 "Unable to allocate memory for queues\n");
913                         igb_free_q_vectors(adapter);
914                         goto request_done;
915                 }
916                 igb_setup_all_tx_resources(adapter);
917                 igb_setup_all_rx_resources(adapter);
918         } else {
919                 igb_assign_vector(adapter->q_vector[0], 0);
920         }
921
922         if (adapter->flags & IGB_FLAG_HAS_MSI) {
923                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
924                                   netdev->name, adapter);
925                 if (!err)
926                         goto request_done;
927
928                 /* fall back to legacy interrupts */
929                 igb_reset_interrupt_capability(adapter);
930                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
931         }
932
933         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
934                           netdev->name, adapter);
935
936         if (err)
937                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
938                         err);
939
940 request_done:
941         return err;
942 }
943
944 static void igb_free_irq(struct igb_adapter *adapter)
945 {
946         if (adapter->msix_entries) {
947                 int vector = 0, i;
948
949                 free_irq(adapter->msix_entries[vector++].vector, adapter);
950
951                 for (i = 0; i < adapter->num_q_vectors; i++) {
952                         struct igb_q_vector *q_vector = adapter->q_vector[i];
953                         free_irq(adapter->msix_entries[vector++].vector,
954                                  q_vector);
955                 }
956         } else {
957                 free_irq(adapter->pdev->irq, adapter);
958         }
959 }
960
961 /**
962  * igb_irq_disable - Mask off interrupt generation on the NIC
963  * @adapter: board private structure
964  **/
965 static void igb_irq_disable(struct igb_adapter *adapter)
966 {
967         struct e1000_hw *hw = &adapter->hw;
968
969         /*
970          * we need to be careful when disabling interrupts.  The VFs are also
971          * mapped into these registers and so clearing the bits can cause
972          * issues on the VF drivers so we only need to clear what we set
973          */
974         if (adapter->msix_entries) {
975                 u32 regval = rd32(E1000_EIAM);
976                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
977                 wr32(E1000_EIMC, adapter->eims_enable_mask);
978                 regval = rd32(E1000_EIAC);
979                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
980         }
981
982         wr32(E1000_IAM, 0);
983         wr32(E1000_IMC, ~0);
984         wrfl();
985         synchronize_irq(adapter->pdev->irq);
986 }
987
988 /**
989  * igb_irq_enable - Enable default interrupt generation settings
990  * @adapter: board private structure
991  **/
992 static void igb_irq_enable(struct igb_adapter *adapter)
993 {
994         struct e1000_hw *hw = &adapter->hw;
995
996         if (adapter->msix_entries) {
997                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
998                 u32 regval = rd32(E1000_EIAC);
999                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1000                 regval = rd32(E1000_EIAM);
1001                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1002                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1003                 if (adapter->vfs_allocated_count) {
1004                         wr32(E1000_MBVFIMR, 0xFF);
1005                         ims |= E1000_IMS_VMMB;
1006                 }
1007                 if (adapter->hw.mac.type == e1000_82580)
1008                         ims |= E1000_IMS_DRSTA;
1009
1010                 wr32(E1000_IMS, ims);
1011         } else {
1012                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1013                                 E1000_IMS_DRSTA);
1014                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1015                                 E1000_IMS_DRSTA);
1016         }
1017 }
1018
1019 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1020 {
1021         struct e1000_hw *hw = &adapter->hw;
1022         u16 vid = adapter->hw.mng_cookie.vlan_id;
1023         u16 old_vid = adapter->mng_vlan_id;
1024
1025         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1026                 /* add VID to filter table */
1027                 igb_vfta_set(hw, vid, true);
1028                 adapter->mng_vlan_id = vid;
1029         } else {
1030                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1031         }
1032
1033         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1034             (vid != old_vid) &&
1035             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1036                 /* remove VID from filter table */
1037                 igb_vfta_set(hw, old_vid, false);
1038         }
1039 }
1040
1041 /**
1042  * igb_release_hw_control - release control of the h/w to f/w
1043  * @adapter: address of board private structure
1044  *
1045  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1046  * For ASF and Pass Through versions of f/w this means that the
1047  * driver is no longer loaded.
1048  *
1049  **/
1050 static void igb_release_hw_control(struct igb_adapter *adapter)
1051 {
1052         struct e1000_hw *hw = &adapter->hw;
1053         u32 ctrl_ext;
1054
1055         /* Let firmware take over control of h/w */
1056         ctrl_ext = rd32(E1000_CTRL_EXT);
1057         wr32(E1000_CTRL_EXT,
1058                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1059 }
1060
1061 /**
1062  * igb_get_hw_control - get control of the h/w from f/w
1063  * @adapter: address of board private structure
1064  *
1065  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1066  * For ASF and Pass Through versions of f/w this means that
1067  * the driver is loaded.
1068  *
1069  **/
1070 static void igb_get_hw_control(struct igb_adapter *adapter)
1071 {
1072         struct e1000_hw *hw = &adapter->hw;
1073         u32 ctrl_ext;
1074
1075         /* Let firmware know the driver has taken over */
1076         ctrl_ext = rd32(E1000_CTRL_EXT);
1077         wr32(E1000_CTRL_EXT,
1078                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1079 }
1080
1081 /**
1082  * igb_configure - configure the hardware for RX and TX
1083  * @adapter: private board structure
1084  **/
1085 static void igb_configure(struct igb_adapter *adapter)
1086 {
1087         struct net_device *netdev = adapter->netdev;
1088         int i;
1089
1090         igb_get_hw_control(adapter);
1091         igb_set_rx_mode(netdev);
1092
1093         igb_restore_vlan(adapter);
1094
1095         igb_setup_tctl(adapter);
1096         igb_setup_mrqc(adapter);
1097         igb_setup_rctl(adapter);
1098
1099         igb_configure_tx(adapter);
1100         igb_configure_rx(adapter);
1101
1102         igb_rx_fifo_flush_82575(&adapter->hw);
1103
1104         /* call igb_desc_unused which always leaves
1105          * at least 1 descriptor unused to make sure
1106          * next_to_use != next_to_clean */
1107         for (i = 0; i < adapter->num_rx_queues; i++) {
1108                 struct igb_ring *ring = adapter->rx_ring[i];
1109                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1110         }
1111
1112
1113         adapter->tx_queue_len = netdev->tx_queue_len;
1114 }
1115
1116 /**
1117  * igb_power_up_link - Power up the phy/serdes link
1118  * @adapter: address of board private structure
1119  **/
1120 void igb_power_up_link(struct igb_adapter *adapter)
1121 {
1122         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1123                 igb_power_up_phy_copper(&adapter->hw);
1124         else
1125                 igb_power_up_serdes_link_82575(&adapter->hw);
1126 }
1127
1128 /**
1129  * igb_power_down_link - Power down the phy/serdes link
1130  * @adapter: address of board private structure
1131  */
1132 static void igb_power_down_link(struct igb_adapter *adapter)
1133 {
1134         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1135                 igb_power_down_phy_copper_82575(&adapter->hw);
1136         else
1137                 igb_shutdown_serdes_link_82575(&adapter->hw);
1138 }
1139
1140 /**
1141  * igb_up - Open the interface and prepare it to handle traffic
1142  * @adapter: board private structure
1143  **/
1144 int igb_up(struct igb_adapter *adapter)
1145 {
1146         struct e1000_hw *hw = &adapter->hw;
1147         int i;
1148
1149         /* hardware has been reset, we need to reload some things */
1150         igb_configure(adapter);
1151
1152         clear_bit(__IGB_DOWN, &adapter->state);
1153
1154         for (i = 0; i < adapter->num_q_vectors; i++) {
1155                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1156                 napi_enable(&q_vector->napi);
1157         }
1158         if (adapter->msix_entries)
1159                 igb_configure_msix(adapter);
1160         else
1161                 igb_assign_vector(adapter->q_vector[0], 0);
1162
1163         /* Clear any pending interrupts. */
1164         rd32(E1000_ICR);
1165         igb_irq_enable(adapter);
1166
1167         /* notify VFs that reset has been completed */
1168         if (adapter->vfs_allocated_count) {
1169                 u32 reg_data = rd32(E1000_CTRL_EXT);
1170                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1171                 wr32(E1000_CTRL_EXT, reg_data);
1172         }
1173
1174         netif_tx_start_all_queues(adapter->netdev);
1175
1176         /* start the watchdog. */
1177         hw->mac.get_link_status = 1;
1178         schedule_work(&adapter->watchdog_task);
1179
1180         return 0;
1181 }
1182
1183 void igb_down(struct igb_adapter *adapter)
1184 {
1185         struct net_device *netdev = adapter->netdev;
1186         struct e1000_hw *hw = &adapter->hw;
1187         u32 tctl, rctl;
1188         int i;
1189
1190         /* signal that we're down so the interrupt handler does not
1191          * reschedule our watchdog timer */
1192         set_bit(__IGB_DOWN, &adapter->state);
1193
1194         /* disable receives in the hardware */
1195         rctl = rd32(E1000_RCTL);
1196         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1197         /* flush and sleep below */
1198
1199         netif_tx_stop_all_queues(netdev);
1200
1201         /* disable transmits in the hardware */
1202         tctl = rd32(E1000_TCTL);
1203         tctl &= ~E1000_TCTL_EN;
1204         wr32(E1000_TCTL, tctl);
1205         /* flush both disables and wait for them to finish */
1206         wrfl();
1207         msleep(10);
1208
1209         for (i = 0; i < adapter->num_q_vectors; i++) {
1210                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1211                 napi_disable(&q_vector->napi);
1212         }
1213
1214         igb_irq_disable(adapter);
1215
1216         del_timer_sync(&adapter->watchdog_timer);
1217         del_timer_sync(&adapter->phy_info_timer);
1218
1219         netdev->tx_queue_len = adapter->tx_queue_len;
1220         netif_carrier_off(netdev);
1221
1222         /* record the stats before reset*/
1223         igb_update_stats(adapter);
1224
1225         adapter->link_speed = 0;
1226         adapter->link_duplex = 0;
1227
1228         if (!pci_channel_offline(adapter->pdev))
1229                 igb_reset(adapter);
1230         igb_clean_all_tx_rings(adapter);
1231         igb_clean_all_rx_rings(adapter);
1232 #ifdef CONFIG_IGB_DCA
1233
1234         /* since we reset the hardware DCA settings were cleared */
1235         igb_setup_dca(adapter);
1236 #endif
1237 }
1238
1239 void igb_reinit_locked(struct igb_adapter *adapter)
1240 {
1241         WARN_ON(in_interrupt());
1242         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1243                 msleep(1);
1244         igb_down(adapter);
1245         igb_up(adapter);
1246         clear_bit(__IGB_RESETTING, &adapter->state);
1247 }
1248
1249 void igb_reset(struct igb_adapter *adapter)
1250 {
1251         struct pci_dev *pdev = adapter->pdev;
1252         struct e1000_hw *hw = &adapter->hw;
1253         struct e1000_mac_info *mac = &hw->mac;
1254         struct e1000_fc_info *fc = &hw->fc;
1255         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1256         u16 hwm;
1257
1258         /* Repartition Pba for greater than 9k mtu
1259          * To take effect CTRL.RST is required.
1260          */
1261         switch (mac->type) {
1262         case e1000_82580:
1263                 pba = rd32(E1000_RXPBS);
1264                 pba = igb_rxpbs_adjust_82580(pba);
1265                 break;
1266         case e1000_82576:
1267                 pba = rd32(E1000_RXPBS);
1268                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1269                 break;
1270         case e1000_82575:
1271         default:
1272                 pba = E1000_PBA_34K;
1273                 break;
1274         }
1275
1276         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1277             (mac->type < e1000_82576)) {
1278                 /* adjust PBA for jumbo frames */
1279                 wr32(E1000_PBA, pba);
1280
1281                 /* To maintain wire speed transmits, the Tx FIFO should be
1282                  * large enough to accommodate two full transmit packets,
1283                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1284                  * the Rx FIFO should be large enough to accommodate at least
1285                  * one full receive packet and is similarly rounded up and
1286                  * expressed in KB. */
1287                 pba = rd32(E1000_PBA);
1288                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1289                 tx_space = pba >> 16;
1290                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1291                 pba &= 0xffff;
1292                 /* the tx fifo also stores 16 bytes of information about the tx
1293                  * but don't include ethernet FCS because hardware appends it */
1294                 min_tx_space = (adapter->max_frame_size +
1295                                 sizeof(union e1000_adv_tx_desc) -
1296                                 ETH_FCS_LEN) * 2;
1297                 min_tx_space = ALIGN(min_tx_space, 1024);
1298                 min_tx_space >>= 10;
1299                 /* software strips receive CRC, so leave room for it */
1300                 min_rx_space = adapter->max_frame_size;
1301                 min_rx_space = ALIGN(min_rx_space, 1024);
1302                 min_rx_space >>= 10;
1303
1304                 /* If current Tx allocation is less than the min Tx FIFO size,
1305                  * and the min Tx FIFO size is less than the current Rx FIFO
1306                  * allocation, take space away from current Rx allocation */
1307                 if (tx_space < min_tx_space &&
1308                     ((min_tx_space - tx_space) < pba)) {
1309                         pba = pba - (min_tx_space - tx_space);
1310
1311                         /* if short on rx space, rx wins and must trump tx
1312                          * adjustment */
1313                         if (pba < min_rx_space)
1314                                 pba = min_rx_space;
1315                 }
1316                 wr32(E1000_PBA, pba);
1317         }
1318
1319         /* flow control settings */
1320         /* The high water mark must be low enough to fit one full frame
1321          * (or the size used for early receive) above it in the Rx FIFO.
1322          * Set it to the lower of:
1323          * - 90% of the Rx FIFO size, or
1324          * - the full Rx FIFO size minus one full frame */
1325         hwm = min(((pba << 10) * 9 / 10),
1326                         ((pba << 10) - 2 * adapter->max_frame_size));
1327
1328         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1329         fc->low_water = fc->high_water - 16;
1330         fc->pause_time = 0xFFFF;
1331         fc->send_xon = 1;
1332         fc->current_mode = fc->requested_mode;
1333
1334         /* disable receive for all VFs and wait one second */
1335         if (adapter->vfs_allocated_count) {
1336                 int i;
1337                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1338                         adapter->vf_data[i].flags = 0;
1339
1340                 /* ping all the active vfs to let them know we are going down */
1341                 igb_ping_all_vfs(adapter);
1342
1343                 /* disable transmits and receives */
1344                 wr32(E1000_VFRE, 0);
1345                 wr32(E1000_VFTE, 0);
1346         }
1347
1348         /* Allow time for pending master requests to run */
1349         hw->mac.ops.reset_hw(hw);
1350         wr32(E1000_WUC, 0);
1351
1352         if (hw->mac.ops.init_hw(hw))
1353                 dev_err(&pdev->dev, "Hardware Error\n");
1354
1355         if (hw->mac.type == e1000_82580) {
1356                 u32 reg = rd32(E1000_PCIEMISC);
1357                 wr32(E1000_PCIEMISC,
1358                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1359         }
1360         if (!netif_running(adapter->netdev))
1361                 igb_power_down_link(adapter);
1362
1363         igb_update_mng_vlan(adapter);
1364
1365         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1366         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1367
1368         igb_get_phy_info(hw);
1369 }
1370
1371 static const struct net_device_ops igb_netdev_ops = {
1372         .ndo_open               = igb_open,
1373         .ndo_stop               = igb_close,
1374         .ndo_start_xmit         = igb_xmit_frame_adv,
1375         .ndo_get_stats          = igb_get_stats,
1376         .ndo_set_rx_mode        = igb_set_rx_mode,
1377         .ndo_set_multicast_list = igb_set_rx_mode,
1378         .ndo_set_mac_address    = igb_set_mac,
1379         .ndo_change_mtu         = igb_change_mtu,
1380         .ndo_do_ioctl           = igb_ioctl,
1381         .ndo_tx_timeout         = igb_tx_timeout,
1382         .ndo_validate_addr      = eth_validate_addr,
1383         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1384         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1385         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1386         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1387         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1388         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1389         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1390 #ifdef CONFIG_NET_POLL_CONTROLLER
1391         .ndo_poll_controller    = igb_netpoll,
1392 #endif
1393 };
1394
1395 /**
1396  * igb_probe - Device Initialization Routine
1397  * @pdev: PCI device information struct
1398  * @ent: entry in igb_pci_tbl
1399  *
1400  * Returns 0 on success, negative on failure
1401  *
1402  * igb_probe initializes an adapter identified by a pci_dev structure.
1403  * The OS initialization, configuring of the adapter private structure,
1404  * and a hardware reset occur.
1405  **/
1406 static int __devinit igb_probe(struct pci_dev *pdev,
1407                                const struct pci_device_id *ent)
1408 {
1409         struct net_device *netdev;
1410         struct igb_adapter *adapter;
1411         struct e1000_hw *hw;
1412         u16 eeprom_data = 0;
1413         static int global_quad_port_a; /* global quad port a indication */
1414         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1415         unsigned long mmio_start, mmio_len;
1416         int err, pci_using_dac;
1417         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1418         u32 part_num;
1419
1420         err = pci_enable_device_mem(pdev);
1421         if (err)
1422                 return err;
1423
1424         pci_using_dac = 0;
1425         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1426         if (!err) {
1427                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1428                 if (!err)
1429                         pci_using_dac = 1;
1430         } else {
1431                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1432                 if (err) {
1433                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1434                         if (err) {
1435                                 dev_err(&pdev->dev, "No usable DMA "
1436                                         "configuration, aborting\n");
1437                                 goto err_dma;
1438                         }
1439                 }
1440         }
1441
1442         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1443                                            IORESOURCE_MEM),
1444                                            igb_driver_name);
1445         if (err)
1446                 goto err_pci_reg;
1447
1448         pci_enable_pcie_error_reporting(pdev);
1449
1450         pci_set_master(pdev);
1451         pci_save_state(pdev);
1452
1453         err = -ENOMEM;
1454         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1455                                    IGB_ABS_MAX_TX_QUEUES);
1456         if (!netdev)
1457                 goto err_alloc_etherdev;
1458
1459         SET_NETDEV_DEV(netdev, &pdev->dev);
1460
1461         pci_set_drvdata(pdev, netdev);
1462         adapter = netdev_priv(netdev);
1463         adapter->netdev = netdev;
1464         adapter->pdev = pdev;
1465         hw = &adapter->hw;
1466         hw->back = adapter;
1467         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1468
1469         mmio_start = pci_resource_start(pdev, 0);
1470         mmio_len = pci_resource_len(pdev, 0);
1471
1472         err = -EIO;
1473         hw->hw_addr = ioremap(mmio_start, mmio_len);
1474         if (!hw->hw_addr)
1475                 goto err_ioremap;
1476
1477         netdev->netdev_ops = &igb_netdev_ops;
1478         igb_set_ethtool_ops(netdev);
1479         netdev->watchdog_timeo = 5 * HZ;
1480
1481         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1482
1483         netdev->mem_start = mmio_start;
1484         netdev->mem_end = mmio_start + mmio_len;
1485
1486         /* PCI config space info */
1487         hw->vendor_id = pdev->vendor;
1488         hw->device_id = pdev->device;
1489         hw->revision_id = pdev->revision;
1490         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1491         hw->subsystem_device_id = pdev->subsystem_device;
1492
1493         /* Copy the default MAC, PHY and NVM function pointers */
1494         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1495         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1496         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1497         /* Initialize skew-specific constants */
1498         err = ei->get_invariants(hw);
1499         if (err)
1500                 goto err_sw_init;
1501
1502         /* setup the private structure */
1503         err = igb_sw_init(adapter);
1504         if (err)
1505                 goto err_sw_init;
1506
1507         igb_get_bus_info_pcie(hw);
1508
1509         hw->phy.autoneg_wait_to_complete = false;
1510
1511         /* Copper options */
1512         if (hw->phy.media_type == e1000_media_type_copper) {
1513                 hw->phy.mdix = AUTO_ALL_MODES;
1514                 hw->phy.disable_polarity_correction = false;
1515                 hw->phy.ms_type = e1000_ms_hw_default;
1516         }
1517
1518         if (igb_check_reset_block(hw))
1519                 dev_info(&pdev->dev,
1520                         "PHY reset is blocked due to SOL/IDER session.\n");
1521
1522         netdev->features = NETIF_F_SG |
1523                            NETIF_F_IP_CSUM |
1524                            NETIF_F_HW_VLAN_TX |
1525                            NETIF_F_HW_VLAN_RX |
1526                            NETIF_F_HW_VLAN_FILTER;
1527
1528         netdev->features |= NETIF_F_IPV6_CSUM;
1529         netdev->features |= NETIF_F_TSO;
1530         netdev->features |= NETIF_F_TSO6;
1531         netdev->features |= NETIF_F_GRO;
1532
1533         netdev->vlan_features |= NETIF_F_TSO;
1534         netdev->vlan_features |= NETIF_F_TSO6;
1535         netdev->vlan_features |= NETIF_F_IP_CSUM;
1536         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1537         netdev->vlan_features |= NETIF_F_SG;
1538
1539         if (pci_using_dac)
1540                 netdev->features |= NETIF_F_HIGHDMA;
1541
1542         if (hw->mac.type >= e1000_82576)
1543                 netdev->features |= NETIF_F_SCTP_CSUM;
1544
1545         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1546
1547         /* before reading the NVM, reset the controller to put the device in a
1548          * known good starting state */
1549         hw->mac.ops.reset_hw(hw);
1550
1551         /* make sure the NVM is good */
1552         if (igb_validate_nvm_checksum(hw) < 0) {
1553                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1554                 err = -EIO;
1555                 goto err_eeprom;
1556         }
1557
1558         /* copy the MAC address out of the NVM */
1559         if (hw->mac.ops.read_mac_addr(hw))
1560                 dev_err(&pdev->dev, "NVM Read Error\n");
1561
1562         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1563         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1564
1565         if (!is_valid_ether_addr(netdev->perm_addr)) {
1566                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1567                 err = -EIO;
1568                 goto err_eeprom;
1569         }
1570
1571         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1572                     (unsigned long) adapter);
1573         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1574                     (unsigned long) adapter);
1575
1576         INIT_WORK(&adapter->reset_task, igb_reset_task);
1577         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1578
1579         /* Initialize link properties that are user-changeable */
1580         adapter->fc_autoneg = true;
1581         hw->mac.autoneg = true;
1582         hw->phy.autoneg_advertised = 0x2f;
1583
1584         hw->fc.requested_mode = e1000_fc_default;
1585         hw->fc.current_mode = e1000_fc_default;
1586
1587         igb_validate_mdi_setting(hw);
1588
1589         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1590          * enable the ACPI Magic Packet filter
1591          */
1592
1593         if (hw->bus.func == 0)
1594                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1595         else if (hw->mac.type == e1000_82580)
1596                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1597                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1598                                  &eeprom_data);
1599         else if (hw->bus.func == 1)
1600                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1601
1602         if (eeprom_data & eeprom_apme_mask)
1603                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1604
1605         /* now that we have the eeprom settings, apply the special cases where
1606          * the eeprom may be wrong or the board simply won't support wake on
1607          * lan on a particular port */
1608         switch (pdev->device) {
1609         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1610                 adapter->eeprom_wol = 0;
1611                 break;
1612         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1613         case E1000_DEV_ID_82576_FIBER:
1614         case E1000_DEV_ID_82576_SERDES:
1615                 /* Wake events only supported on port A for dual fiber
1616                  * regardless of eeprom setting */
1617                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1618                         adapter->eeprom_wol = 0;
1619                 break;
1620         case E1000_DEV_ID_82576_QUAD_COPPER:
1621                 /* if quad port adapter, disable WoL on all but port A */
1622                 if (global_quad_port_a != 0)
1623                         adapter->eeprom_wol = 0;
1624                 else
1625                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1626                 /* Reset for multiple quad port adapters */
1627                 if (++global_quad_port_a == 4)
1628                         global_quad_port_a = 0;
1629                 break;
1630         }
1631
1632         /* initialize the wol settings based on the eeprom settings */
1633         adapter->wol = adapter->eeprom_wol;
1634         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1635
1636         /* reset the hardware with the new settings */
1637         igb_reset(adapter);
1638
1639         /* let the f/w know that the h/w is now under the control of the
1640          * driver. */
1641         igb_get_hw_control(adapter);
1642
1643         strcpy(netdev->name, "eth%d");
1644         err = register_netdev(netdev);
1645         if (err)
1646                 goto err_register;
1647
1648         /* carrier off reporting is important to ethtool even BEFORE open */
1649         netif_carrier_off(netdev);
1650
1651 #ifdef CONFIG_IGB_DCA
1652         if (dca_add_requester(&pdev->dev) == 0) {
1653                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1654                 dev_info(&pdev->dev, "DCA enabled\n");
1655                 igb_setup_dca(adapter);
1656         }
1657
1658 #endif
1659         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1660         /* print bus type/speed/width info */
1661         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1662                  netdev->name,
1663                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1664                                                             "unknown"),
1665                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1666                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1667                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1668                    "unknown"),
1669                  netdev->dev_addr);
1670
1671         igb_read_part_num(hw, &part_num);
1672         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1673                 (part_num >> 8), (part_num & 0xff));
1674
1675         dev_info(&pdev->dev,
1676                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1677                 adapter->msix_entries ? "MSI-X" :
1678                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1679                 adapter->num_rx_queues, adapter->num_tx_queues);
1680
1681         return 0;
1682
1683 err_register:
1684         igb_release_hw_control(adapter);
1685 err_eeprom:
1686         if (!igb_check_reset_block(hw))
1687                 igb_reset_phy(hw);
1688
1689         if (hw->flash_address)
1690                 iounmap(hw->flash_address);
1691 err_sw_init:
1692         igb_clear_interrupt_scheme(adapter);
1693         iounmap(hw->hw_addr);
1694 err_ioremap:
1695         free_netdev(netdev);
1696 err_alloc_etherdev:
1697         pci_release_selected_regions(pdev,
1698                                      pci_select_bars(pdev, IORESOURCE_MEM));
1699 err_pci_reg:
1700 err_dma:
1701         pci_disable_device(pdev);
1702         return err;
1703 }
1704
1705 /**
1706  * igb_remove - Device Removal Routine
1707  * @pdev: PCI device information struct
1708  *
1709  * igb_remove is called by the PCI subsystem to alert the driver
1710  * that it should release a PCI device.  The could be caused by a
1711  * Hot-Plug event, or because the driver is going to be removed from
1712  * memory.
1713  **/
1714 static void __devexit igb_remove(struct pci_dev *pdev)
1715 {
1716         struct net_device *netdev = pci_get_drvdata(pdev);
1717         struct igb_adapter *adapter = netdev_priv(netdev);
1718         struct e1000_hw *hw = &adapter->hw;
1719
1720         /* flush_scheduled work may reschedule our watchdog task, so
1721          * explicitly disable watchdog tasks from being rescheduled  */
1722         set_bit(__IGB_DOWN, &adapter->state);
1723         del_timer_sync(&adapter->watchdog_timer);
1724         del_timer_sync(&adapter->phy_info_timer);
1725
1726         flush_scheduled_work();
1727
1728 #ifdef CONFIG_IGB_DCA
1729         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1730                 dev_info(&pdev->dev, "DCA disabled\n");
1731                 dca_remove_requester(&pdev->dev);
1732                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1733                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1734         }
1735 #endif
1736
1737         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1738          * would have already happened in close and is redundant. */
1739         igb_release_hw_control(adapter);
1740
1741         unregister_netdev(netdev);
1742
1743         igb_clear_interrupt_scheme(adapter);
1744
1745 #ifdef CONFIG_PCI_IOV
1746         /* reclaim resources allocated to VFs */
1747         if (adapter->vf_data) {
1748                 /* disable iov and allow time for transactions to clear */
1749                 pci_disable_sriov(pdev);
1750                 msleep(500);
1751
1752                 kfree(adapter->vf_data);
1753                 adapter->vf_data = NULL;
1754                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1755                 msleep(100);
1756                 dev_info(&pdev->dev, "IOV Disabled\n");
1757         }
1758 #endif
1759
1760         iounmap(hw->hw_addr);
1761         if (hw->flash_address)
1762                 iounmap(hw->flash_address);
1763         pci_release_selected_regions(pdev,
1764                                      pci_select_bars(pdev, IORESOURCE_MEM));
1765
1766         free_netdev(netdev);
1767
1768         pci_disable_pcie_error_reporting(pdev);
1769
1770         pci_disable_device(pdev);
1771 }
1772
1773 /**
1774  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1775  * @adapter: board private structure to initialize
1776  *
1777  * This function initializes the vf specific data storage and then attempts to
1778  * allocate the VFs.  The reason for ordering it this way is because it is much
1779  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1780  * the memory for the VFs.
1781  **/
1782 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1783 {
1784 #ifdef CONFIG_PCI_IOV
1785         struct pci_dev *pdev = adapter->pdev;
1786
1787         if (adapter->vfs_allocated_count > 7)
1788                 adapter->vfs_allocated_count = 7;
1789
1790         if (adapter->vfs_allocated_count) {
1791                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1792                                            sizeof(struct vf_data_storage),
1793                                            GFP_KERNEL);
1794                 /* if allocation failed then we do not support SR-IOV */
1795                 if (!adapter->vf_data) {
1796                         adapter->vfs_allocated_count = 0;
1797                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1798                                 "Data Storage\n");
1799                 }
1800         }
1801
1802         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1803                 kfree(adapter->vf_data);
1804                 adapter->vf_data = NULL;
1805 #endif /* CONFIG_PCI_IOV */
1806                 adapter->vfs_allocated_count = 0;
1807 #ifdef CONFIG_PCI_IOV
1808         } else {
1809                 unsigned char mac_addr[ETH_ALEN];
1810                 int i;
1811                 dev_info(&pdev->dev, "%d vfs allocated\n",
1812                          adapter->vfs_allocated_count);
1813                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1814                         random_ether_addr(mac_addr);
1815                         igb_set_vf_mac(adapter, i, mac_addr);
1816                 }
1817         }
1818 #endif /* CONFIG_PCI_IOV */
1819 }
1820
1821
1822 /**
1823  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1824  * @adapter: board private structure to initialize
1825  *
1826  * igb_init_hw_timer initializes the function pointer and values for the hw
1827  * timer found in hardware.
1828  **/
1829 static void igb_init_hw_timer(struct igb_adapter *adapter)
1830 {
1831         struct e1000_hw *hw = &adapter->hw;
1832
1833         switch (hw->mac.type) {
1834         case e1000_82580:
1835                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1836                 adapter->cycles.read = igb_read_clock;
1837                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1838                 adapter->cycles.mult = 1;
1839                 /*
1840                  * The 82580 timesync updates the system timer every 8ns by 8ns
1841                  * and the value cannot be shifted.  Instead we need to shift
1842                  * the registers to generate a 64bit timer value.  As a result
1843                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1844                  * 24 in order to generate a larger value for synchronization.
1845                  */
1846                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1847                 /* disable system timer temporarily by setting bit 31 */
1848                 wr32(E1000_TSAUXC, 0x80000000);
1849                 wrfl();
1850
1851                 /* Set registers so that rollover occurs soon to test this. */
1852                 wr32(E1000_SYSTIMR, 0x00000000);
1853                 wr32(E1000_SYSTIML, 0x80000000);
1854                 wr32(E1000_SYSTIMH, 0x000000FF);
1855                 wrfl();
1856
1857                 /* enable system timer by clearing bit 31 */
1858                 wr32(E1000_TSAUXC, 0x0);
1859                 wrfl();
1860
1861                 timecounter_init(&adapter->clock,
1862                                  &adapter->cycles,
1863                                  ktime_to_ns(ktime_get_real()));
1864                 /*
1865                  * Synchronize our NIC clock against system wall clock. NIC
1866                  * time stamp reading requires ~3us per sample, each sample
1867                  * was pretty stable even under load => only require 10
1868                  * samples for each offset comparison.
1869                  */
1870                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1871                 adapter->compare.source = &adapter->clock;
1872                 adapter->compare.target = ktime_get_real;
1873                 adapter->compare.num_samples = 10;
1874                 timecompare_update(&adapter->compare, 0);
1875                 break;
1876         case e1000_82576:
1877                 /*
1878                  * Initialize hardware timer: we keep it running just in case
1879                  * that some program needs it later on.
1880                  */
1881                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1882                 adapter->cycles.read = igb_read_clock;
1883                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1884                 adapter->cycles.mult = 1;
1885                 /**
1886                  * Scale the NIC clock cycle by a large factor so that
1887                  * relatively small clock corrections can be added or
1888                  * substracted at each clock tick. The drawbacks of a large
1889                  * factor are a) that the clock register overflows more quickly
1890                  * (not such a big deal) and b) that the increment per tick has
1891                  * to fit into 24 bits.  As a result we need to use a shift of
1892                  * 19 so we can fit a value of 16 into the TIMINCA register.
1893                  */
1894                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1895                 wr32(E1000_TIMINCA,
1896                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1897                                 (16 << IGB_82576_TSYNC_SHIFT));
1898
1899                 /* Set registers so that rollover occurs soon to test this. */
1900                 wr32(E1000_SYSTIML, 0x00000000);
1901                 wr32(E1000_SYSTIMH, 0xFF800000);
1902                 wrfl();
1903
1904                 timecounter_init(&adapter->clock,
1905                                  &adapter->cycles,
1906                                  ktime_to_ns(ktime_get_real()));
1907                 /*
1908                  * Synchronize our NIC clock against system wall clock. NIC
1909                  * time stamp reading requires ~3us per sample, each sample
1910                  * was pretty stable even under load => only require 10
1911                  * samples for each offset comparison.
1912                  */
1913                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1914                 adapter->compare.source = &adapter->clock;
1915                 adapter->compare.target = ktime_get_real;
1916                 adapter->compare.num_samples = 10;
1917                 timecompare_update(&adapter->compare, 0);
1918                 break;
1919         case e1000_82575:
1920                 /* 82575 does not support timesync */
1921         default:
1922                 break;
1923         }
1924
1925 }
1926
1927 /**
1928  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1929  * @adapter: board private structure to initialize
1930  *
1931  * igb_sw_init initializes the Adapter private data structure.
1932  * Fields are initialized based on PCI device information and
1933  * OS network device settings (MTU size).
1934  **/
1935 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1936 {
1937         struct e1000_hw *hw = &adapter->hw;
1938         struct net_device *netdev = adapter->netdev;
1939         struct pci_dev *pdev = adapter->pdev;
1940
1941         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1942
1943         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1944         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1945         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1946         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1947
1948         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1949         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1950
1951 #ifdef CONFIG_PCI_IOV
1952         if (hw->mac.type == e1000_82576)
1953                 adapter->vfs_allocated_count = max_vfs;
1954
1955 #endif /* CONFIG_PCI_IOV */
1956         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1957
1958         /*
1959          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1960          * then we should combine the queues into a queue pair in order to
1961          * conserve interrupts due to limited supply
1962          */
1963         if ((adapter->rss_queues > 4) ||
1964             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1965                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1966
1967         /* This call may decrease the number of queues */
1968         if (igb_init_interrupt_scheme(adapter)) {
1969                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1970                 return -ENOMEM;
1971         }
1972
1973         igb_init_hw_timer(adapter);
1974         igb_probe_vfs(adapter);
1975
1976         /* Explicitly disable IRQ since the NIC can be in any state. */
1977         igb_irq_disable(adapter);
1978
1979         set_bit(__IGB_DOWN, &adapter->state);
1980         return 0;
1981 }
1982
1983 /**
1984  * igb_open - Called when a network interface is made active
1985  * @netdev: network interface device structure
1986  *
1987  * Returns 0 on success, negative value on failure
1988  *
1989  * The open entry point is called when a network interface is made
1990  * active by the system (IFF_UP).  At this point all resources needed
1991  * for transmit and receive operations are allocated, the interrupt
1992  * handler is registered with the OS, the watchdog timer is started,
1993  * and the stack is notified that the interface is ready.
1994  **/
1995 static int igb_open(struct net_device *netdev)
1996 {
1997         struct igb_adapter *adapter = netdev_priv(netdev);
1998         struct e1000_hw *hw = &adapter->hw;
1999         int err;
2000         int i;
2001
2002         /* disallow open during test */
2003         if (test_bit(__IGB_TESTING, &adapter->state))
2004                 return -EBUSY;
2005
2006         netif_carrier_off(netdev);
2007
2008         /* allocate transmit descriptors */
2009         err = igb_setup_all_tx_resources(adapter);
2010         if (err)
2011                 goto err_setup_tx;
2012
2013         /* allocate receive descriptors */
2014         err = igb_setup_all_rx_resources(adapter);
2015         if (err)
2016                 goto err_setup_rx;
2017
2018         igb_power_up_link(adapter);
2019
2020         /* before we allocate an interrupt, we must be ready to handle it.
2021          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2022          * as soon as we call pci_request_irq, so we have to setup our
2023          * clean_rx handler before we do so.  */
2024         igb_configure(adapter);
2025
2026         err = igb_request_irq(adapter);
2027         if (err)
2028                 goto err_req_irq;
2029
2030         /* From here on the code is the same as igb_up() */
2031         clear_bit(__IGB_DOWN, &adapter->state);
2032
2033         for (i = 0; i < adapter->num_q_vectors; i++) {
2034                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2035                 napi_enable(&q_vector->napi);
2036         }
2037
2038         /* Clear any pending interrupts. */
2039         rd32(E1000_ICR);
2040
2041         igb_irq_enable(adapter);
2042
2043         /* notify VFs that reset has been completed */
2044         if (adapter->vfs_allocated_count) {
2045                 u32 reg_data = rd32(E1000_CTRL_EXT);
2046                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2047                 wr32(E1000_CTRL_EXT, reg_data);
2048         }
2049
2050         netif_tx_start_all_queues(netdev);
2051
2052         /* start the watchdog. */
2053         hw->mac.get_link_status = 1;
2054         schedule_work(&adapter->watchdog_task);
2055
2056         return 0;
2057
2058 err_req_irq:
2059         igb_release_hw_control(adapter);
2060         igb_power_down_link(adapter);
2061         igb_free_all_rx_resources(adapter);
2062 err_setup_rx:
2063         igb_free_all_tx_resources(adapter);
2064 err_setup_tx:
2065         igb_reset(adapter);
2066
2067         return err;
2068 }
2069
2070 /**
2071  * igb_close - Disables a network interface
2072  * @netdev: network interface device structure
2073  *
2074  * Returns 0, this is not allowed to fail
2075  *
2076  * The close entry point is called when an interface is de-activated
2077  * by the OS.  The hardware is still under the driver's control, but
2078  * needs to be disabled.  A global MAC reset is issued to stop the
2079  * hardware, and all transmit and receive resources are freed.
2080  **/
2081 static int igb_close(struct net_device *netdev)
2082 {
2083         struct igb_adapter *adapter = netdev_priv(netdev);
2084
2085         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2086         igb_down(adapter);
2087
2088         igb_free_irq(adapter);
2089
2090         igb_free_all_tx_resources(adapter);
2091         igb_free_all_rx_resources(adapter);
2092
2093         return 0;
2094 }
2095
2096 /**
2097  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2098  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2099  *
2100  * Return 0 on success, negative on failure
2101  **/
2102 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2103 {
2104         struct pci_dev *pdev = tx_ring->pdev;
2105         int size;
2106
2107         size = sizeof(struct igb_buffer) * tx_ring->count;
2108         tx_ring->buffer_info = vmalloc(size);
2109         if (!tx_ring->buffer_info)
2110                 goto err;
2111         memset(tx_ring->buffer_info, 0, size);
2112
2113         /* round up to nearest 4K */
2114         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2115         tx_ring->size = ALIGN(tx_ring->size, 4096);
2116
2117         tx_ring->desc = pci_alloc_consistent(pdev,
2118                                              tx_ring->size,
2119                                              &tx_ring->dma);
2120
2121         if (!tx_ring->desc)
2122                 goto err;
2123
2124         tx_ring->next_to_use = 0;
2125         tx_ring->next_to_clean = 0;
2126         return 0;
2127
2128 err:
2129         vfree(tx_ring->buffer_info);
2130         dev_err(&pdev->dev,
2131                 "Unable to allocate memory for the transmit descriptor ring\n");
2132         return -ENOMEM;
2133 }
2134
2135 /**
2136  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2137  *                                (Descriptors) for all queues
2138  * @adapter: board private structure
2139  *
2140  * Return 0 on success, negative on failure
2141  **/
2142 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2143 {
2144         struct pci_dev *pdev = adapter->pdev;
2145         int i, err = 0;
2146
2147         for (i = 0; i < adapter->num_tx_queues; i++) {
2148                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2149                 if (err) {
2150                         dev_err(&pdev->dev,
2151                                 "Allocation for Tx Queue %u failed\n", i);
2152                         for (i--; i >= 0; i--)
2153                                 igb_free_tx_resources(adapter->tx_ring[i]);
2154                         break;
2155                 }
2156         }
2157
2158         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2159                 int r_idx = i % adapter->num_tx_queues;
2160                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2161         }
2162         return err;
2163 }
2164
2165 /**
2166  * igb_setup_tctl - configure the transmit control registers
2167  * @adapter: Board private structure
2168  **/
2169 void igb_setup_tctl(struct igb_adapter *adapter)
2170 {
2171         struct e1000_hw *hw = &adapter->hw;
2172         u32 tctl;
2173
2174         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2175         wr32(E1000_TXDCTL(0), 0);
2176
2177         /* Program the Transmit Control Register */
2178         tctl = rd32(E1000_TCTL);
2179         tctl &= ~E1000_TCTL_CT;
2180         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2181                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2182
2183         igb_config_collision_dist(hw);
2184
2185         /* Enable transmits */
2186         tctl |= E1000_TCTL_EN;
2187
2188         wr32(E1000_TCTL, tctl);
2189 }
2190
2191 /**
2192  * igb_configure_tx_ring - Configure transmit ring after Reset
2193  * @adapter: board private structure
2194  * @ring: tx ring to configure
2195  *
2196  * Configure a transmit ring after a reset.
2197  **/
2198 void igb_configure_tx_ring(struct igb_adapter *adapter,
2199                            struct igb_ring *ring)
2200 {
2201         struct e1000_hw *hw = &adapter->hw;
2202         u32 txdctl;
2203         u64 tdba = ring->dma;
2204         int reg_idx = ring->reg_idx;
2205
2206         /* disable the queue */
2207         txdctl = rd32(E1000_TXDCTL(reg_idx));
2208         wr32(E1000_TXDCTL(reg_idx),
2209                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2210         wrfl();
2211         mdelay(10);
2212
2213         wr32(E1000_TDLEN(reg_idx),
2214                         ring->count * sizeof(union e1000_adv_tx_desc));
2215         wr32(E1000_TDBAL(reg_idx),
2216                         tdba & 0x00000000ffffffffULL);
2217         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2218
2219         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2220         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2221         writel(0, ring->head);
2222         writel(0, ring->tail);
2223
2224         txdctl |= IGB_TX_PTHRESH;
2225         txdctl |= IGB_TX_HTHRESH << 8;
2226         txdctl |= IGB_TX_WTHRESH << 16;
2227
2228         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2229         wr32(E1000_TXDCTL(reg_idx), txdctl);
2230 }
2231
2232 /**
2233  * igb_configure_tx - Configure transmit Unit after Reset
2234  * @adapter: board private structure
2235  *
2236  * Configure the Tx unit of the MAC after a reset.
2237  **/
2238 static void igb_configure_tx(struct igb_adapter *adapter)
2239 {
2240         int i;
2241
2242         for (i = 0; i < adapter->num_tx_queues; i++)
2243                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2244 }
2245
2246 /**
2247  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2248  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2249  *
2250  * Returns 0 on success, negative on failure
2251  **/
2252 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2253 {
2254         struct pci_dev *pdev = rx_ring->pdev;
2255         int size, desc_len;
2256
2257         size = sizeof(struct igb_buffer) * rx_ring->count;
2258         rx_ring->buffer_info = vmalloc(size);
2259         if (!rx_ring->buffer_info)
2260                 goto err;
2261         memset(rx_ring->buffer_info, 0, size);
2262
2263         desc_len = sizeof(union e1000_adv_rx_desc);
2264
2265         /* Round up to nearest 4K */
2266         rx_ring->size = rx_ring->count * desc_len;
2267         rx_ring->size = ALIGN(rx_ring->size, 4096);
2268
2269         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2270                                              &rx_ring->dma);
2271
2272         if (!rx_ring->desc)
2273                 goto err;
2274
2275         rx_ring->next_to_clean = 0;
2276         rx_ring->next_to_use = 0;
2277
2278         return 0;
2279
2280 err:
2281         vfree(rx_ring->buffer_info);
2282         rx_ring->buffer_info = NULL;
2283         dev_err(&pdev->dev, "Unable to allocate memory for "
2284                 "the receive descriptor ring\n");
2285         return -ENOMEM;
2286 }
2287
2288 /**
2289  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2290  *                                (Descriptors) for all queues
2291  * @adapter: board private structure
2292  *
2293  * Return 0 on success, negative on failure
2294  **/
2295 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2296 {
2297         struct pci_dev *pdev = adapter->pdev;
2298         int i, err = 0;
2299
2300         for (i = 0; i < adapter->num_rx_queues; i++) {
2301                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2302                 if (err) {
2303                         dev_err(&pdev->dev,
2304                                 "Allocation for Rx Queue %u failed\n", i);
2305                         for (i--; i >= 0; i--)
2306                                 igb_free_rx_resources(adapter->rx_ring[i]);
2307                         break;
2308                 }
2309         }
2310
2311         return err;
2312 }
2313
2314 /**
2315  * igb_setup_mrqc - configure the multiple receive queue control registers
2316  * @adapter: Board private structure
2317  **/
2318 static void igb_setup_mrqc(struct igb_adapter *adapter)
2319 {
2320         struct e1000_hw *hw = &adapter->hw;
2321         u32 mrqc, rxcsum;
2322         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2323         union e1000_reta {
2324                 u32 dword;
2325                 u8  bytes[4];
2326         } reta;
2327         static const u8 rsshash[40] = {
2328                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2329                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2330                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2331                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2332
2333         /* Fill out hash function seeds */
2334         for (j = 0; j < 10; j++) {
2335                 u32 rsskey = rsshash[(j * 4)];
2336                 rsskey |= rsshash[(j * 4) + 1] << 8;
2337                 rsskey |= rsshash[(j * 4) + 2] << 16;
2338                 rsskey |= rsshash[(j * 4) + 3] << 24;
2339                 array_wr32(E1000_RSSRK(0), j, rsskey);
2340         }
2341
2342         num_rx_queues = adapter->rss_queues;
2343
2344         if (adapter->vfs_allocated_count) {
2345                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2346                 switch (hw->mac.type) {
2347                 case e1000_82580:
2348                         num_rx_queues = 1;
2349                         shift = 0;
2350                         break;
2351                 case e1000_82576:
2352                         shift = 3;
2353                         num_rx_queues = 2;
2354                         break;
2355                 case e1000_82575:
2356                         shift = 2;
2357                         shift2 = 6;
2358                 default:
2359                         break;
2360                 }
2361         } else {
2362                 if (hw->mac.type == e1000_82575)
2363                         shift = 6;
2364         }
2365
2366         for (j = 0; j < (32 * 4); j++) {
2367                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2368                 if (shift2)
2369                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2370                 if ((j & 3) == 3)
2371                         wr32(E1000_RETA(j >> 2), reta.dword);
2372         }
2373
2374         /*
2375          * Disable raw packet checksumming so that RSS hash is placed in
2376          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2377          * offloads as they are enabled by default
2378          */
2379         rxcsum = rd32(E1000_RXCSUM);
2380         rxcsum |= E1000_RXCSUM_PCSD;
2381
2382         if (adapter->hw.mac.type >= e1000_82576)
2383                 /* Enable Receive Checksum Offload for SCTP */
2384                 rxcsum |= E1000_RXCSUM_CRCOFL;
2385
2386         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2387         wr32(E1000_RXCSUM, rxcsum);
2388
2389         /* If VMDq is enabled then we set the appropriate mode for that, else
2390          * we default to RSS so that an RSS hash is calculated per packet even
2391          * if we are only using one queue */
2392         if (adapter->vfs_allocated_count) {
2393                 if (hw->mac.type > e1000_82575) {
2394                         /* Set the default pool for the PF's first queue */
2395                         u32 vtctl = rd32(E1000_VT_CTL);
2396                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2397                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2398                         vtctl |= adapter->vfs_allocated_count <<
2399                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2400                         wr32(E1000_VT_CTL, vtctl);
2401                 }
2402                 if (adapter->rss_queues > 1)
2403                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2404                 else
2405                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2406         } else {
2407                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2408         }
2409         igb_vmm_control(adapter);
2410
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2412                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2413         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2414                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2415         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2416                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2417         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2418                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2419
2420         wr32(E1000_MRQC, mrqc);
2421 }
2422
2423 /**
2424  * igb_setup_rctl - configure the receive control registers
2425  * @adapter: Board private structure
2426  **/
2427 void igb_setup_rctl(struct igb_adapter *adapter)
2428 {
2429         struct e1000_hw *hw = &adapter->hw;
2430         u32 rctl;
2431
2432         rctl = rd32(E1000_RCTL);
2433
2434         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2435         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2436
2437         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2438                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2439
2440         /*
2441          * enable stripping of CRC. It's unlikely this will break BMC
2442          * redirection as it did with e1000. Newer features require
2443          * that the HW strips the CRC.
2444          */
2445         rctl |= E1000_RCTL_SECRC;
2446
2447         /* disable store bad packets and clear size bits. */
2448         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2449
2450         /* enable LPE to prevent packets larger than max_frame_size */
2451         rctl |= E1000_RCTL_LPE;
2452
2453         /* disable queue 0 to prevent tail write w/o re-config */
2454         wr32(E1000_RXDCTL(0), 0);
2455
2456         /* Attention!!!  For SR-IOV PF driver operations you must enable
2457          * queue drop for all VF and PF queues to prevent head of line blocking
2458          * if an un-trusted VF does not provide descriptors to hardware.
2459          */
2460         if (adapter->vfs_allocated_count) {
2461                 /* set all queue drop enable bits */
2462                 wr32(E1000_QDE, ALL_QUEUES);
2463         }
2464
2465         wr32(E1000_RCTL, rctl);
2466 }
2467
2468 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2469                                    int vfn)
2470 {
2471         struct e1000_hw *hw = &adapter->hw;
2472         u32 vmolr;
2473
2474         /* if it isn't the PF check to see if VFs are enabled and
2475          * increase the size to support vlan tags */
2476         if (vfn < adapter->vfs_allocated_count &&
2477             adapter->vf_data[vfn].vlans_enabled)
2478                 size += VLAN_TAG_SIZE;
2479
2480         vmolr = rd32(E1000_VMOLR(vfn));
2481         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2482         vmolr |= size | E1000_VMOLR_LPE;
2483         wr32(E1000_VMOLR(vfn), vmolr);
2484
2485         return 0;
2486 }
2487
2488 /**
2489  * igb_rlpml_set - set maximum receive packet size
2490  * @adapter: board private structure
2491  *
2492  * Configure maximum receivable packet size.
2493  **/
2494 static void igb_rlpml_set(struct igb_adapter *adapter)
2495 {
2496         u32 max_frame_size = adapter->max_frame_size;
2497         struct e1000_hw *hw = &adapter->hw;
2498         u16 pf_id = adapter->vfs_allocated_count;
2499
2500         if (adapter->vlgrp)
2501                 max_frame_size += VLAN_TAG_SIZE;
2502
2503         /* if vfs are enabled we set RLPML to the largest possible request
2504          * size and set the VMOLR RLPML to the size we need */
2505         if (pf_id) {
2506                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2507                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2508         }
2509
2510         wr32(E1000_RLPML, max_frame_size);
2511 }
2512
2513 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2514                                  int vfn, bool aupe)
2515 {
2516         struct e1000_hw *hw = &adapter->hw;
2517         u32 vmolr;
2518
2519         /*
2520          * This register exists only on 82576 and newer so if we are older then
2521          * we should exit and do nothing
2522          */
2523         if (hw->mac.type < e1000_82576)
2524                 return;
2525
2526         vmolr = rd32(E1000_VMOLR(vfn));
2527         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2528         if (aupe)
2529                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2530         else
2531                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2532
2533         /* clear all bits that might not be set */
2534         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2535
2536         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2537                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2538         /*
2539          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2540          * multicast packets
2541          */
2542         if (vfn <= adapter->vfs_allocated_count)
2543                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2544
2545         wr32(E1000_VMOLR(vfn), vmolr);
2546 }
2547
2548 /**
2549  * igb_configure_rx_ring - Configure a receive ring after Reset
2550  * @adapter: board private structure
2551  * @ring: receive ring to be configured
2552  *
2553  * Configure the Rx unit of the MAC after a reset.
2554  **/
2555 void igb_configure_rx_ring(struct igb_adapter *adapter,
2556                            struct igb_ring *ring)
2557 {
2558         struct e1000_hw *hw = &adapter->hw;
2559         u64 rdba = ring->dma;
2560         int reg_idx = ring->reg_idx;
2561         u32 srrctl, rxdctl;
2562
2563         /* disable the queue */
2564         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2565         wr32(E1000_RXDCTL(reg_idx),
2566                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2567
2568         /* Set DMA base address registers */
2569         wr32(E1000_RDBAL(reg_idx),
2570              rdba & 0x00000000ffffffffULL);
2571         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2572         wr32(E1000_RDLEN(reg_idx),
2573                        ring->count * sizeof(union e1000_adv_rx_desc));
2574
2575         /* initialize head and tail */
2576         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2577         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2578         writel(0, ring->head);
2579         writel(0, ring->tail);
2580
2581         /* set descriptor configuration */
2582         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2583                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2584                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2585 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2586                 srrctl |= IGB_RXBUFFER_16384 >>
2587                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2588 #else
2589                 srrctl |= (PAGE_SIZE / 2) >>
2590                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2591 #endif
2592                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2593         } else {
2594                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2595                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2596                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2597         }
2598         /* Only set Drop Enable if we are supporting multiple queues */
2599         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2600                 srrctl |= E1000_SRRCTL_DROP_EN;
2601
2602         wr32(E1000_SRRCTL(reg_idx), srrctl);
2603
2604         /* set filtering for VMDQ pools */
2605         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2606
2607         /* enable receive descriptor fetching */
2608         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2609         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2610         rxdctl &= 0xFFF00000;
2611         rxdctl |= IGB_RX_PTHRESH;
2612         rxdctl |= IGB_RX_HTHRESH << 8;
2613         rxdctl |= IGB_RX_WTHRESH << 16;
2614         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2615 }
2616
2617 /**
2618  * igb_configure_rx - Configure receive Unit after Reset
2619  * @adapter: board private structure
2620  *
2621  * Configure the Rx unit of the MAC after a reset.
2622  **/
2623 static void igb_configure_rx(struct igb_adapter *adapter)
2624 {
2625         int i;
2626
2627         /* set UTA to appropriate mode */
2628         igb_set_uta(adapter);
2629
2630         /* set the correct pool for the PF default MAC address in entry 0 */
2631         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2632                          adapter->vfs_allocated_count);
2633
2634         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2635          * the Base and Length of the Rx Descriptor Ring */
2636         for (i = 0; i < adapter->num_rx_queues; i++)
2637                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2638 }
2639
2640 /**
2641  * igb_free_tx_resources - Free Tx Resources per Queue
2642  * @tx_ring: Tx descriptor ring for a specific queue
2643  *
2644  * Free all transmit software resources
2645  **/
2646 void igb_free_tx_resources(struct igb_ring *tx_ring)
2647 {
2648         igb_clean_tx_ring(tx_ring);
2649
2650         vfree(tx_ring->buffer_info);
2651         tx_ring->buffer_info = NULL;
2652
2653         /* if not set, then don't free */
2654         if (!tx_ring->desc)
2655                 return;
2656
2657         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2658                             tx_ring->desc, tx_ring->dma);
2659
2660         tx_ring->desc = NULL;
2661 }
2662
2663 /**
2664  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2665  * @adapter: board private structure
2666  *
2667  * Free all transmit software resources
2668  **/
2669 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2670 {
2671         int i;
2672
2673         for (i = 0; i < adapter->num_tx_queues; i++)
2674                 igb_free_tx_resources(adapter->tx_ring[i]);
2675 }
2676
2677 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2678                                     struct igb_buffer *buffer_info)
2679 {
2680         if (buffer_info->dma) {
2681                 if (buffer_info->mapped_as_page)
2682                         pci_unmap_page(tx_ring->pdev,
2683                                         buffer_info->dma,
2684                                         buffer_info->length,
2685                                         PCI_DMA_TODEVICE);
2686                 else
2687                         pci_unmap_single(tx_ring->pdev,
2688                                         buffer_info->dma,
2689                                         buffer_info->length,
2690                                         PCI_DMA_TODEVICE);
2691                 buffer_info->dma = 0;
2692         }
2693         if (buffer_info->skb) {
2694                 dev_kfree_skb_any(buffer_info->skb);
2695                 buffer_info->skb = NULL;
2696         }
2697         buffer_info->time_stamp = 0;
2698         buffer_info->length = 0;
2699         buffer_info->next_to_watch = 0;
2700         buffer_info->mapped_as_page = false;
2701 }
2702
2703 /**
2704  * igb_clean_tx_ring - Free Tx Buffers
2705  * @tx_ring: ring to be cleaned
2706  **/
2707 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2708 {
2709         struct igb_buffer *buffer_info;
2710         unsigned long size;
2711         unsigned int i;
2712
2713         if (!tx_ring->buffer_info)
2714                 return;
2715         /* Free all the Tx ring sk_buffs */
2716
2717         for (i = 0; i < tx_ring->count; i++) {
2718                 buffer_info = &tx_ring->buffer_info[i];
2719                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2720         }
2721
2722         size = sizeof(struct igb_buffer) * tx_ring->count;
2723         memset(tx_ring->buffer_info, 0, size);
2724
2725         /* Zero out the descriptor ring */
2726         memset(tx_ring->desc, 0, tx_ring->size);
2727
2728         tx_ring->next_to_use = 0;
2729         tx_ring->next_to_clean = 0;
2730 }
2731
2732 /**
2733  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2734  * @adapter: board private structure
2735  **/
2736 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2737 {
2738         int i;
2739
2740         for (i = 0; i < adapter->num_tx_queues; i++)
2741                 igb_clean_tx_ring(adapter->tx_ring[i]);
2742 }
2743
2744 /**
2745  * igb_free_rx_resources - Free Rx Resources
2746  * @rx_ring: ring to clean the resources from
2747  *
2748  * Free all receive software resources
2749  **/
2750 void igb_free_rx_resources(struct igb_ring *rx_ring)
2751 {
2752         igb_clean_rx_ring(rx_ring);
2753
2754         vfree(rx_ring->buffer_info);
2755         rx_ring->buffer_info = NULL;
2756
2757         /* if not set, then don't free */
2758         if (!rx_ring->desc)
2759                 return;
2760
2761         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2762                             rx_ring->desc, rx_ring->dma);
2763
2764         rx_ring->desc = NULL;
2765 }
2766
2767 /**
2768  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2769  * @adapter: board private structure
2770  *
2771  * Free all receive software resources
2772  **/
2773 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2774 {
2775         int i;
2776
2777         for (i = 0; i < adapter->num_rx_queues; i++)
2778                 igb_free_rx_resources(adapter->rx_ring[i]);
2779 }
2780
2781 /**
2782  * igb_clean_rx_ring - Free Rx Buffers per Queue
2783  * @rx_ring: ring to free buffers from
2784  **/
2785 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2786 {
2787         struct igb_buffer *buffer_info;
2788         unsigned long size;
2789         unsigned int i;
2790
2791         if (!rx_ring->buffer_info)
2792                 return;
2793
2794         /* Free all the Rx ring sk_buffs */
2795         for (i = 0; i < rx_ring->count; i++) {
2796                 buffer_info = &rx_ring->buffer_info[i];
2797                 if (buffer_info->dma) {
2798                         pci_unmap_single(rx_ring->pdev,
2799                                          buffer_info->dma,
2800                                          rx_ring->rx_buffer_len,
2801                                          PCI_DMA_FROMDEVICE);
2802                         buffer_info->dma = 0;
2803                 }
2804
2805                 if (buffer_info->skb) {
2806                         dev_kfree_skb(buffer_info->skb);
2807                         buffer_info->skb = NULL;
2808                 }
2809                 if (buffer_info->page_dma) {
2810                         pci_unmap_page(rx_ring->pdev,
2811                                        buffer_info->page_dma,
2812                                        PAGE_SIZE / 2,
2813                                        PCI_DMA_FROMDEVICE);
2814                         buffer_info->page_dma = 0;
2815                 }
2816                 if (buffer_info->page) {
2817                         put_page(buffer_info->page);
2818                         buffer_info->page = NULL;
2819                         buffer_info->page_offset = 0;
2820                 }
2821         }
2822
2823         size = sizeof(struct igb_buffer) * rx_ring->count;
2824         memset(rx_ring->buffer_info, 0, size);
2825
2826         /* Zero out the descriptor ring */
2827         memset(rx_ring->desc, 0, rx_ring->size);
2828
2829         rx_ring->next_to_clean = 0;
2830         rx_ring->next_to_use = 0;
2831 }
2832
2833 /**
2834  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2835  * @adapter: board private structure
2836  **/
2837 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2838 {
2839         int i;
2840
2841         for (i = 0; i < adapter->num_rx_queues; i++)
2842                 igb_clean_rx_ring(adapter->rx_ring[i]);
2843 }
2844
2845 /**
2846  * igb_set_mac - Change the Ethernet Address of the NIC
2847  * @netdev: network interface device structure
2848  * @p: pointer to an address structure
2849  *
2850  * Returns 0 on success, negative on failure
2851  **/
2852 static int igb_set_mac(struct net_device *netdev, void *p)
2853 {
2854         struct igb_adapter *adapter = netdev_priv(netdev);
2855         struct e1000_hw *hw = &adapter->hw;
2856         struct sockaddr *addr = p;
2857
2858         if (!is_valid_ether_addr(addr->sa_data))
2859                 return -EADDRNOTAVAIL;
2860
2861         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2862         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2863
2864         /* set the correct pool for the new PF MAC address in entry 0 */
2865         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2866                          adapter->vfs_allocated_count);
2867
2868         return 0;
2869 }
2870
2871 /**
2872  * igb_write_mc_addr_list - write multicast addresses to MTA
2873  * @netdev: network interface device structure
2874  *
2875  * Writes multicast address list to the MTA hash table.
2876  * Returns: -ENOMEM on failure
2877  *                0 on no addresses written
2878  *                X on writing X addresses to MTA
2879  **/
2880 static int igb_write_mc_addr_list(struct net_device *netdev)
2881 {
2882         struct igb_adapter *adapter = netdev_priv(netdev);
2883         struct e1000_hw *hw = &adapter->hw;
2884         struct dev_mc_list *mc_ptr = netdev->mc_list;
2885         u8  *mta_list;
2886         int i;
2887
2888         if (netdev_mc_empty(netdev)) {
2889                 /* nothing to program, so clear mc list */
2890                 igb_update_mc_addr_list(hw, NULL, 0);
2891                 igb_restore_vf_multicasts(adapter);
2892                 return 0;
2893         }
2894
2895         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2896         if (!mta_list)
2897                 return -ENOMEM;
2898
2899         /* The shared function expects a packed array of only addresses. */
2900         mc_ptr = netdev->mc_list;
2901
2902         for (i = 0; i < netdev_mc_count(netdev); i++) {
2903                 if (!mc_ptr)
2904                         break;
2905                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2906                 mc_ptr = mc_ptr->next;
2907         }
2908         igb_update_mc_addr_list(hw, mta_list, i);
2909         kfree(mta_list);
2910
2911         return netdev_mc_count(netdev);
2912 }
2913
2914 /**
2915  * igb_write_uc_addr_list - write unicast addresses to RAR table
2916  * @netdev: network interface device structure
2917  *
2918  * Writes unicast address list to the RAR table.
2919  * Returns: -ENOMEM on failure/insufficient address space
2920  *                0 on no addresses written
2921  *                X on writing X addresses to the RAR table
2922  **/
2923 static int igb_write_uc_addr_list(struct net_device *netdev)
2924 {
2925         struct igb_adapter *adapter = netdev_priv(netdev);
2926         struct e1000_hw *hw = &adapter->hw;
2927         unsigned int vfn = adapter->vfs_allocated_count;
2928         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2929         int count = 0;
2930
2931         /* return ENOMEM indicating insufficient memory for addresses */
2932         if (netdev_uc_count(netdev) > rar_entries)
2933                 return -ENOMEM;
2934
2935         if (!netdev_uc_empty(netdev) && rar_entries) {
2936                 struct netdev_hw_addr *ha;
2937
2938                 netdev_for_each_uc_addr(ha, netdev) {
2939                         if (!rar_entries)
2940                                 break;
2941                         igb_rar_set_qsel(adapter, ha->addr,
2942                                          rar_entries--,
2943                                          vfn);
2944                         count++;
2945                 }
2946         }
2947         /* write the addresses in reverse order to avoid write combining */
2948         for (; rar_entries > 0 ; rar_entries--) {
2949                 wr32(E1000_RAH(rar_entries), 0);
2950                 wr32(E1000_RAL(rar_entries), 0);
2951         }
2952         wrfl();
2953
2954         return count;
2955 }
2956
2957 /**
2958  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2959  * @netdev: network interface device structure
2960  *
2961  * The set_rx_mode entry point is called whenever the unicast or multicast
2962  * address lists or the network interface flags are updated.  This routine is
2963  * responsible for configuring the hardware for proper unicast, multicast,
2964  * promiscuous mode, and all-multi behavior.
2965  **/
2966 static void igb_set_rx_mode(struct net_device *netdev)
2967 {
2968         struct igb_adapter *adapter = netdev_priv(netdev);
2969         struct e1000_hw *hw = &adapter->hw;
2970         unsigned int vfn = adapter->vfs_allocated_count;
2971         u32 rctl, vmolr = 0;
2972         int count;
2973
2974         /* Check for Promiscuous and All Multicast modes */
2975         rctl = rd32(E1000_RCTL);
2976
2977         /* clear the effected bits */
2978         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2979
2980         if (netdev->flags & IFF_PROMISC) {
2981                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2982                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2983         } else {
2984                 if (netdev->flags & IFF_ALLMULTI) {
2985                         rctl |= E1000_RCTL_MPE;
2986                         vmolr |= E1000_VMOLR_MPME;
2987                 } else {
2988                         /*
2989                          * Write addresses to the MTA, if the attempt fails
2990                          * then we should just turn on promiscous mode so
2991                          * that we can at least receive multicast traffic
2992                          */
2993                         count = igb_write_mc_addr_list(netdev);
2994                         if (count < 0) {
2995                                 rctl |= E1000_RCTL_MPE;
2996                                 vmolr |= E1000_VMOLR_MPME;
2997                         } else if (count) {
2998                                 vmolr |= E1000_VMOLR_ROMPE;
2999                         }
3000                 }
3001                 /*
3002                  * Write addresses to available RAR registers, if there is not
3003                  * sufficient space to store all the addresses then enable
3004                  * unicast promiscous mode
3005                  */
3006                 count = igb_write_uc_addr_list(netdev);
3007                 if (count < 0) {
3008                         rctl |= E1000_RCTL_UPE;
3009                         vmolr |= E1000_VMOLR_ROPE;
3010                 }
3011                 rctl |= E1000_RCTL_VFE;
3012         }
3013         wr32(E1000_RCTL, rctl);
3014
3015         /*
3016          * In order to support SR-IOV and eventually VMDq it is necessary to set
3017          * the VMOLR to enable the appropriate modes.  Without this workaround
3018          * we will have issues with VLAN tag stripping not being done for frames
3019          * that are only arriving because we are the default pool
3020          */
3021         if (hw->mac.type < e1000_82576)
3022                 return;
3023
3024         vmolr |= rd32(E1000_VMOLR(vfn)) &
3025                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3026         wr32(E1000_VMOLR(vfn), vmolr);
3027         igb_restore_vf_multicasts(adapter);
3028 }
3029
3030 /* Need to wait a few seconds after link up to get diagnostic information from
3031  * the phy */
3032 static void igb_update_phy_info(unsigned long data)
3033 {
3034         struct igb_adapter *adapter = (struct igb_adapter *) data;
3035         igb_get_phy_info(&adapter->hw);
3036 }
3037
3038 /**
3039  * igb_has_link - check shared code for link and determine up/down
3040  * @adapter: pointer to driver private info
3041  **/
3042 bool igb_has_link(struct igb_adapter *adapter)
3043 {
3044         struct e1000_hw *hw = &adapter->hw;
3045         bool link_active = false;
3046         s32 ret_val = 0;
3047
3048         /* get_link_status is set on LSC (link status) interrupt or
3049          * rx sequence error interrupt.  get_link_status will stay
3050          * false until the e1000_check_for_link establishes link
3051          * for copper adapters ONLY
3052          */
3053         switch (hw->phy.media_type) {
3054         case e1000_media_type_copper:
3055                 if (hw->mac.get_link_status) {
3056                         ret_val = hw->mac.ops.check_for_link(hw);
3057                         link_active = !hw->mac.get_link_status;
3058                 } else {
3059                         link_active = true;
3060                 }
3061                 break;
3062         case e1000_media_type_internal_serdes:
3063                 ret_val = hw->mac.ops.check_for_link(hw);
3064                 link_active = hw->mac.serdes_has_link;
3065                 break;
3066         default:
3067         case e1000_media_type_unknown:
3068                 break;
3069         }
3070
3071         return link_active;
3072 }
3073
3074 /**
3075  * igb_watchdog - Timer Call-back
3076  * @data: pointer to adapter cast into an unsigned long
3077  **/
3078 static void igb_watchdog(unsigned long data)
3079 {
3080         struct igb_adapter *adapter = (struct igb_adapter *)data;
3081         /* Do the rest outside of interrupt context */
3082         schedule_work(&adapter->watchdog_task);
3083 }
3084
3085 static void igb_watchdog_task(struct work_struct *work)
3086 {
3087         struct igb_adapter *adapter = container_of(work,
3088                                                    struct igb_adapter,
3089                                                    watchdog_task);
3090         struct e1000_hw *hw = &adapter->hw;
3091         struct net_device *netdev = adapter->netdev;
3092         u32 link;
3093         int i;
3094
3095         link = igb_has_link(adapter);
3096         if (link) {
3097                 if (!netif_carrier_ok(netdev)) {
3098                         u32 ctrl;
3099                         hw->mac.ops.get_speed_and_duplex(hw,
3100                                                          &adapter->link_speed,
3101                                                          &adapter->link_duplex);
3102
3103                         ctrl = rd32(E1000_CTRL);
3104                         /* Links status message must follow this format */
3105                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3106                                  "Flow Control: %s\n",
3107                                netdev->name,
3108                                adapter->link_speed,
3109                                adapter->link_duplex == FULL_DUPLEX ?
3110                                  "Full Duplex" : "Half Duplex",
3111                                ((ctrl & E1000_CTRL_TFCE) &&
3112                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3113                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3114                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3115
3116                         /* tweak tx_queue_len according to speed/duplex and
3117                          * adjust the timeout factor */
3118                         netdev->tx_queue_len = adapter->tx_queue_len;
3119                         adapter->tx_timeout_factor = 1;
3120                         switch (adapter->link_speed) {
3121                         case SPEED_10:
3122                                 netdev->tx_queue_len = 10;
3123                                 adapter->tx_timeout_factor = 14;
3124                                 break;
3125                         case SPEED_100:
3126                                 netdev->tx_queue_len = 100;
3127                                 /* maybe add some timeout factor ? */
3128                                 break;
3129                         }
3130
3131                         netif_carrier_on(netdev);
3132
3133                         igb_ping_all_vfs(adapter);
3134
3135                         /* link state has changed, schedule phy info update */
3136                         if (!test_bit(__IGB_DOWN, &adapter->state))
3137                                 mod_timer(&adapter->phy_info_timer,
3138                                           round_jiffies(jiffies + 2 * HZ));
3139                 }
3140         } else {
3141                 if (netif_carrier_ok(netdev)) {
3142                         adapter->link_speed = 0;
3143                         adapter->link_duplex = 0;
3144                         /* Links status message must follow this format */
3145                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3146                                netdev->name);
3147                         netif_carrier_off(netdev);
3148
3149                         igb_ping_all_vfs(adapter);
3150
3151                         /* link state has changed, schedule phy info update */
3152                         if (!test_bit(__IGB_DOWN, &adapter->state))
3153                                 mod_timer(&adapter->phy_info_timer,
3154                                           round_jiffies(jiffies + 2 * HZ));
3155                 }
3156         }
3157
3158         igb_update_stats(adapter);
3159
3160         for (i = 0; i < adapter->num_tx_queues; i++) {
3161                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3162                 if (!netif_carrier_ok(netdev)) {
3163                         /* We've lost link, so the controller stops DMA,
3164                          * but we've got queued Tx work that's never going
3165                          * to get done, so reset controller to flush Tx.
3166                          * (Do the reset outside of interrupt context). */
3167                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3168                                 adapter->tx_timeout_count++;
3169                                 schedule_work(&adapter->reset_task);
3170                                 /* return immediately since reset is imminent */
3171                                 return;
3172                         }
3173                 }
3174
3175                 /* Force detection of hung controller every watchdog period */
3176                 tx_ring->detect_tx_hung = true;
3177         }
3178
3179         /* Cause software interrupt to ensure rx ring is cleaned */
3180         if (adapter->msix_entries) {
3181                 u32 eics = 0;
3182                 for (i = 0; i < adapter->num_q_vectors; i++) {
3183                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3184                         eics |= q_vector->eims_value;
3185                 }
3186                 wr32(E1000_EICS, eics);
3187         } else {
3188                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3189         }
3190
3191         /* Reset the timer */
3192         if (!test_bit(__IGB_DOWN, &adapter->state))
3193                 mod_timer(&adapter->watchdog_timer,
3194                           round_jiffies(jiffies + 2 * HZ));
3195 }
3196
3197 enum latency_range {
3198         lowest_latency = 0,
3199         low_latency = 1,
3200         bulk_latency = 2,
3201         latency_invalid = 255
3202 };
3203
3204 /**
3205  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3206  *
3207  *      Stores a new ITR value based on strictly on packet size.  This
3208  *      algorithm is less sophisticated than that used in igb_update_itr,
3209  *      due to the difficulty of synchronizing statistics across multiple
3210  *      receive rings.  The divisors and thresholds used by this fuction
3211  *      were determined based on theoretical maximum wire speed and testing
3212  *      data, in order to minimize response time while increasing bulk
3213  *      throughput.
3214  *      This functionality is controlled by the InterruptThrottleRate module
3215  *      parameter (see igb_param.c)
3216  *      NOTE:  This function is called only when operating in a multiqueue
3217  *             receive environment.
3218  * @q_vector: pointer to q_vector
3219  **/
3220 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3221 {
3222         int new_val = q_vector->itr_val;
3223         int avg_wire_size = 0;
3224         struct igb_adapter *adapter = q_vector->adapter;
3225
3226         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3227          * ints/sec - ITR timer value of 120 ticks.
3228          */
3229         if (adapter->link_speed != SPEED_1000) {
3230                 new_val = 976;
3231                 goto set_itr_val;
3232         }
3233
3234         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3235                 struct igb_ring *ring = q_vector->rx_ring;
3236                 avg_wire_size = ring->total_bytes / ring->total_packets;
3237         }
3238
3239         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3240                 struct igb_ring *ring = q_vector->tx_ring;
3241                 avg_wire_size = max_t(u32, avg_wire_size,
3242                                       (ring->total_bytes /
3243                                        ring->total_packets));
3244         }
3245
3246         /* if avg_wire_size isn't set no work was done */
3247         if (!avg_wire_size)
3248                 goto clear_counts;
3249
3250         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3251         avg_wire_size += 24;
3252
3253         /* Don't starve jumbo frames */
3254         avg_wire_size = min(avg_wire_size, 3000);
3255
3256         /* Give a little boost to mid-size frames */
3257         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3258                 new_val = avg_wire_size / 3;
3259         else
3260                 new_val = avg_wire_size / 2;
3261
3262         /* when in itr mode 3 do not exceed 20K ints/sec */
3263         if (adapter->rx_itr_setting == 3 && new_val < 196)
3264                 new_val = 196;
3265
3266 set_itr_val:
3267         if (new_val != q_vector->itr_val) {
3268                 q_vector->itr_val = new_val;
3269                 q_vector->set_itr = 1;
3270         }
3271 clear_counts:
3272         if (q_vector->rx_ring) {
3273                 q_vector->rx_ring->total_bytes = 0;
3274                 q_vector->rx_ring->total_packets = 0;
3275         }
3276         if (q_vector->tx_ring) {
3277                 q_vector->tx_ring->total_bytes = 0;
3278                 q_vector->tx_ring->total_packets = 0;
3279         }
3280 }
3281
3282 /**
3283  * igb_update_itr - update the dynamic ITR value based on statistics
3284  *      Stores a new ITR value based on packets and byte
3285  *      counts during the last interrupt.  The advantage of per interrupt
3286  *      computation is faster updates and more accurate ITR for the current
3287  *      traffic pattern.  Constants in this function were computed
3288  *      based on theoretical maximum wire speed and thresholds were set based
3289  *      on testing data as well as attempting to minimize response time
3290  *      while increasing bulk throughput.
3291  *      this functionality is controlled by the InterruptThrottleRate module
3292  *      parameter (see igb_param.c)
3293  *      NOTE:  These calculations are only valid when operating in a single-
3294  *             queue environment.
3295  * @adapter: pointer to adapter
3296  * @itr_setting: current q_vector->itr_val
3297  * @packets: the number of packets during this measurement interval
3298  * @bytes: the number of bytes during this measurement interval
3299  **/
3300 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3301                                    int packets, int bytes)
3302 {
3303         unsigned int retval = itr_setting;
3304
3305         if (packets == 0)
3306                 goto update_itr_done;
3307
3308         switch (itr_setting) {
3309         case lowest_latency:
3310                 /* handle TSO and jumbo frames */
3311                 if (bytes/packets > 8000)
3312                         retval = bulk_latency;
3313                 else if ((packets < 5) && (bytes > 512))
3314                         retval = low_latency;
3315                 break;
3316         case low_latency:  /* 50 usec aka 20000 ints/s */
3317                 if (bytes > 10000) {
3318                         /* this if handles the TSO accounting */
3319                         if (bytes/packets > 8000) {
3320                                 retval = bulk_latency;
3321                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3322                                 retval = bulk_latency;
3323                         } else if ((packets > 35)) {
3324                                 retval = lowest_latency;
3325                         }
3326                 } else if (bytes/packets > 2000) {
3327                         retval = bulk_latency;
3328                 } else if (packets <= 2 && bytes < 512) {
3329                         retval = lowest_latency;
3330                 }
3331                 break;
3332         case bulk_latency: /* 250 usec aka 4000 ints/s */
3333                 if (bytes > 25000) {
3334                         if (packets > 35)
3335                                 retval = low_latency;
3336                 } else if (bytes < 1500) {
3337                         retval = low_latency;
3338                 }
3339                 break;
3340         }
3341
3342 update_itr_done:
3343         return retval;
3344 }
3345
3346 static void igb_set_itr(struct igb_adapter *adapter)
3347 {
3348         struct igb_q_vector *q_vector = adapter->q_vector[0];
3349         u16 current_itr;
3350         u32 new_itr = q_vector->itr_val;
3351
3352         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3353         if (adapter->link_speed != SPEED_1000) {
3354                 current_itr = 0;
3355                 new_itr = 4000;
3356                 goto set_itr_now;
3357         }
3358
3359         adapter->rx_itr = igb_update_itr(adapter,
3360                                     adapter->rx_itr,
3361                                     q_vector->rx_ring->total_packets,
3362                                     q_vector->rx_ring->total_bytes);
3363
3364         adapter->tx_itr = igb_update_itr(adapter,
3365                                     adapter->tx_itr,
3366                                     q_vector->tx_ring->total_packets,
3367                                     q_vector->tx_ring->total_bytes);
3368         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3369
3370         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3371         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3372                 current_itr = low_latency;
3373
3374         switch (current_itr) {
3375         /* counts and packets in update_itr are dependent on these numbers */
3376         case lowest_latency:
3377                 new_itr = 56;  /* aka 70,000 ints/sec */
3378                 break;
3379         case low_latency:
3380                 new_itr = 196; /* aka 20,000 ints/sec */
3381                 break;
3382         case bulk_latency:
3383                 new_itr = 980; /* aka 4,000 ints/sec */
3384                 break;
3385         default:
3386                 break;
3387         }
3388
3389 set_itr_now:
3390         q_vector->rx_ring->total_bytes = 0;
3391         q_vector->rx_ring->total_packets = 0;
3392         q_vector->tx_ring->total_bytes = 0;
3393         q_vector->tx_ring->total_packets = 0;
3394
3395         if (new_itr != q_vector->itr_val) {
3396                 /* this attempts to bias the interrupt rate towards Bulk
3397                  * by adding intermediate steps when interrupt rate is
3398                  * increasing */
3399                 new_itr = new_itr > q_vector->itr_val ?
3400                              max((new_itr * q_vector->itr_val) /
3401                                  (new_itr + (q_vector->itr_val >> 2)),
3402                                  new_itr) :
3403                              new_itr;
3404                 /* Don't write the value here; it resets the adapter's
3405                  * internal timer, and causes us to delay far longer than
3406                  * we should between interrupts.  Instead, we write the ITR
3407                  * value at the beginning of the next interrupt so the timing
3408                  * ends up being correct.
3409                  */
3410                 q_vector->itr_val = new_itr;
3411                 q_vector->set_itr = 1;
3412         }
3413
3414         return;
3415 }
3416
3417 #define IGB_TX_FLAGS_CSUM               0x00000001
3418 #define IGB_TX_FLAGS_VLAN               0x00000002
3419 #define IGB_TX_FLAGS_TSO                0x00000004
3420 #define IGB_TX_FLAGS_IPV4               0x00000008
3421 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3422 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3423 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3424
3425 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3426                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3427 {
3428         struct e1000_adv_tx_context_desc *context_desc;
3429         unsigned int i;
3430         int err;
3431         struct igb_buffer *buffer_info;
3432         u32 info = 0, tu_cmd = 0;
3433         u32 mss_l4len_idx;
3434         u8 l4len;
3435
3436         if (skb_header_cloned(skb)) {
3437                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3438                 if (err)
3439                         return err;
3440         }
3441
3442         l4len = tcp_hdrlen(skb);
3443         *hdr_len += l4len;
3444
3445         if (skb->protocol == htons(ETH_P_IP)) {
3446                 struct iphdr *iph = ip_hdr(skb);
3447                 iph->tot_len = 0;
3448                 iph->check = 0;
3449                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3450                                                          iph->daddr, 0,
3451                                                          IPPROTO_TCP,
3452                                                          0);
3453         } else if (skb_is_gso_v6(skb)) {
3454                 ipv6_hdr(skb)->payload_len = 0;
3455                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3456                                                        &ipv6_hdr(skb)->daddr,
3457                                                        0, IPPROTO_TCP, 0);
3458         }
3459
3460         i = tx_ring->next_to_use;
3461
3462         buffer_info = &tx_ring->buffer_info[i];
3463         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3464         /* VLAN MACLEN IPLEN */
3465         if (tx_flags & IGB_TX_FLAGS_VLAN)
3466                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3467         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3468         *hdr_len += skb_network_offset(skb);
3469         info |= skb_network_header_len(skb);
3470         *hdr_len += skb_network_header_len(skb);
3471         context_desc->vlan_macip_lens = cpu_to_le32(info);
3472
3473         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3474         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3475
3476         if (skb->protocol == htons(ETH_P_IP))
3477                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3478         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3479
3480         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3481
3482         /* MSS L4LEN IDX */
3483         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3484         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3485
3486         /* For 82575, context index must be unique per ring. */
3487         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3488                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3489
3490         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3491         context_desc->seqnum_seed = 0;
3492
3493         buffer_info->time_stamp = jiffies;
3494         buffer_info->next_to_watch = i;
3495         buffer_info->dma = 0;
3496         i++;
3497         if (i == tx_ring->count)
3498                 i = 0;
3499
3500         tx_ring->next_to_use = i;
3501
3502         return true;
3503 }
3504
3505 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3506                                    struct sk_buff *skb, u32 tx_flags)
3507 {
3508         struct e1000_adv_tx_context_desc *context_desc;
3509         struct pci_dev *pdev = tx_ring->pdev;
3510         struct igb_buffer *buffer_info;
3511         u32 info = 0, tu_cmd = 0;
3512         unsigned int i;
3513
3514         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3515             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3516                 i = tx_ring->next_to_use;
3517                 buffer_info = &tx_ring->buffer_info[i];
3518                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3519
3520                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3521                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3522
3523                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3524                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3525                         info |= skb_network_header_len(skb);
3526
3527                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3528
3529                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3530
3531                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3532                         __be16 protocol;
3533
3534                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3535                                 const struct vlan_ethhdr *vhdr =
3536                                           (const struct vlan_ethhdr*)skb->data;
3537
3538                                 protocol = vhdr->h_vlan_encapsulated_proto;
3539                         } else {
3540                                 protocol = skb->protocol;
3541                         }
3542
3543                         switch (protocol) {
3544                         case cpu_to_be16(ETH_P_IP):
3545                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3546                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3547                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3548                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3549                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3550                                 break;
3551                         case cpu_to_be16(ETH_P_IPV6):
3552                                 /* XXX what about other V6 headers?? */
3553                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3554                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3555                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3556                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3557                                 break;
3558                         default:
3559                                 if (unlikely(net_ratelimit()))
3560                                         dev_warn(&pdev->dev,
3561                                             "partial checksum but proto=%x!\n",
3562                                             skb->protocol);
3563                                 break;
3564                         }
3565                 }
3566
3567                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3568                 context_desc->seqnum_seed = 0;
3569                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3570                         context_desc->mss_l4len_idx =
3571                                 cpu_to_le32(tx_ring->reg_idx << 4);
3572
3573                 buffer_info->time_stamp = jiffies;
3574                 buffer_info->next_to_watch = i;
3575                 buffer_info->dma = 0;
3576
3577                 i++;
3578                 if (i == tx_ring->count)
3579                         i = 0;
3580                 tx_ring->next_to_use = i;
3581
3582                 return true;
3583         }
3584         return false;
3585 }
3586
3587 #define IGB_MAX_TXD_PWR 16
3588 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3589
3590 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3591                                  unsigned int first)
3592 {
3593         struct igb_buffer *buffer_info;
3594         struct pci_dev *pdev = tx_ring->pdev;
3595         unsigned int len = skb_headlen(skb);
3596         unsigned int count = 0, i;
3597         unsigned int f;
3598
3599         i = tx_ring->next_to_use;
3600
3601         buffer_info = &tx_ring->buffer_info[i];
3602         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3603         buffer_info->length = len;
3604         /* set time_stamp *before* dma to help avoid a possible race */
3605         buffer_info->time_stamp = jiffies;
3606         buffer_info->next_to_watch = i;
3607         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3608                                           PCI_DMA_TODEVICE);
3609         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3610                 goto dma_error;
3611
3612         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3613                 struct skb_frag_struct *frag;
3614
3615                 count++;
3616                 i++;
3617                 if (i == tx_ring->count)
3618                         i = 0;
3619
3620                 frag = &skb_shinfo(skb)->frags[f];
3621                 len = frag->size;
3622
3623                 buffer_info = &tx_ring->buffer_info[i];
3624                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3625                 buffer_info->length = len;
3626                 buffer_info->time_stamp = jiffies;
3627                 buffer_info->next_to_watch = i;
3628                 buffer_info->mapped_as_page = true;
3629                 buffer_info->dma = pci_map_page(pdev,
3630                                                 frag->page,
3631                                                 frag->page_offset,
3632                                                 len,
3633                                                 PCI_DMA_TODEVICE);
3634                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3635                         goto dma_error;
3636
3637         }
3638
3639         tx_ring->buffer_info[i].skb = skb;
3640         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3641         tx_ring->buffer_info[first].next_to_watch = i;
3642
3643         return ++count;
3644
3645 dma_error:
3646         dev_err(&pdev->dev, "TX DMA map failed\n");
3647
3648         /* clear timestamp and dma mappings for failed buffer_info mapping */
3649         buffer_info->dma = 0;
3650         buffer_info->time_stamp = 0;
3651         buffer_info->length = 0;
3652         buffer_info->next_to_watch = 0;
3653         buffer_info->mapped_as_page = false;
3654         count--;
3655
3656         /* clear timestamp and dma mappings for remaining portion of packet */
3657         while (count >= 0) {
3658                 count--;
3659                 i--;
3660                 if (i < 0)
3661                         i += tx_ring->count;
3662                 buffer_info = &tx_ring->buffer_info[i];
3663                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3664         }
3665
3666         return 0;
3667 }
3668
3669 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3670                                     u32 tx_flags, int count, u32 paylen,
3671                                     u8 hdr_len)
3672 {
3673         union e1000_adv_tx_desc *tx_desc;
3674         struct igb_buffer *buffer_info;
3675         u32 olinfo_status = 0, cmd_type_len;
3676         unsigned int i = tx_ring->next_to_use;
3677
3678         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3679                         E1000_ADVTXD_DCMD_DEXT);
3680
3681         if (tx_flags & IGB_TX_FLAGS_VLAN)
3682                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3683
3684         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3685                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3686
3687         if (tx_flags & IGB_TX_FLAGS_TSO) {
3688                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3689
3690                 /* insert tcp checksum */
3691                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3692
3693                 /* insert ip checksum */
3694                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3695                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3696
3697         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3698                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3699         }
3700
3701         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3702             (tx_flags & (IGB_TX_FLAGS_CSUM |
3703                          IGB_TX_FLAGS_TSO |
3704                          IGB_TX_FLAGS_VLAN)))
3705                 olinfo_status |= tx_ring->reg_idx << 4;
3706
3707         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3708
3709         do {
3710                 buffer_info = &tx_ring->buffer_info[i];
3711                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3712                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3713                 tx_desc->read.cmd_type_len =
3714                         cpu_to_le32(cmd_type_len | buffer_info->length);
3715                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3716                 count--;
3717                 i++;
3718                 if (i == tx_ring->count)
3719                         i = 0;
3720         } while (count > 0);
3721
3722         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3723         /* Force memory writes to complete before letting h/w
3724          * know there are new descriptors to fetch.  (Only
3725          * applicable for weak-ordered memory model archs,
3726          * such as IA-64). */
3727         wmb();
3728
3729         tx_ring->next_to_use = i;
3730         writel(i, tx_ring->tail);
3731         /* we need this if more than one processor can write to our tail
3732          * at a time, it syncronizes IO on IA64/Altix systems */
3733         mmiowb();
3734 }
3735
3736 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3737 {
3738         struct net_device *netdev = tx_ring->netdev;
3739
3740         netif_stop_subqueue(netdev, tx_ring->queue_index);
3741
3742         /* Herbert's original patch had:
3743          *  smp_mb__after_netif_stop_queue();
3744          * but since that doesn't exist yet, just open code it. */
3745         smp_mb();
3746
3747         /* We need to check again in a case another CPU has just
3748          * made room available. */
3749         if (igb_desc_unused(tx_ring) < size)
3750                 return -EBUSY;
3751
3752         /* A reprieve! */
3753         netif_wake_subqueue(netdev, tx_ring->queue_index);
3754         tx_ring->tx_stats.restart_queue++;
3755         return 0;
3756 }
3757
3758 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3759 {
3760         if (igb_desc_unused(tx_ring) >= size)
3761                 return 0;
3762         return __igb_maybe_stop_tx(tx_ring, size);
3763 }
3764
3765 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3766                                     struct igb_ring *tx_ring)
3767 {
3768         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3769         int tso = 0, count;
3770         u32 tx_flags = 0;
3771         u16 first;
3772         u8 hdr_len = 0;
3773         union skb_shared_tx *shtx = skb_tx(skb);
3774
3775         /* need: 1 descriptor per page,
3776          *       + 2 desc gap to keep tail from touching head,
3777          *       + 1 desc for skb->data,
3778          *       + 1 desc for context descriptor,
3779          * otherwise try next time */
3780         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3781                 /* this is a hard error */
3782                 return NETDEV_TX_BUSY;
3783         }
3784
3785         if (unlikely(shtx->hardware)) {
3786                 shtx->in_progress = 1;
3787                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3788         }
3789
3790         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3791                 tx_flags |= IGB_TX_FLAGS_VLAN;
3792                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3793         }
3794
3795         if (skb->protocol == htons(ETH_P_IP))
3796                 tx_flags |= IGB_TX_FLAGS_IPV4;
3797
3798         first = tx_ring->next_to_use;
3799         if (skb_is_gso(skb)) {
3800                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3801
3802                 if (tso < 0) {
3803                         dev_kfree_skb_any(skb);
3804                         return NETDEV_TX_OK;
3805                 }
3806         }
3807
3808         if (tso)
3809                 tx_flags |= IGB_TX_FLAGS_TSO;
3810         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3811                  (skb->ip_summed == CHECKSUM_PARTIAL))
3812                 tx_flags |= IGB_TX_FLAGS_CSUM;
3813
3814         /*
3815          * count reflects descriptors mapped, if 0 or less then mapping error
3816          * has occured and we need to rewind the descriptor queue
3817          */
3818         count = igb_tx_map_adv(tx_ring, skb, first);
3819         if (!count) {
3820                 dev_kfree_skb_any(skb);
3821                 tx_ring->buffer_info[first].time_stamp = 0;
3822                 tx_ring->next_to_use = first;
3823                 return NETDEV_TX_OK;
3824         }
3825
3826         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3827
3828         /* Make sure there is space in the ring for the next send. */
3829         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3830
3831         return NETDEV_TX_OK;
3832 }
3833
3834 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3835                                       struct net_device *netdev)
3836 {
3837         struct igb_adapter *adapter = netdev_priv(netdev);
3838         struct igb_ring *tx_ring;
3839         int r_idx = 0;
3840
3841         if (test_bit(__IGB_DOWN, &adapter->state)) {
3842                 dev_kfree_skb_any(skb);
3843                 return NETDEV_TX_OK;
3844         }
3845
3846         if (skb->len <= 0) {
3847                 dev_kfree_skb_any(skb);
3848                 return NETDEV_TX_OK;
3849         }
3850
3851         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3852         tx_ring = adapter->multi_tx_table[r_idx];
3853
3854         /* This goes back to the question of how to logically map a tx queue
3855          * to a flow.  Right now, performance is impacted slightly negatively
3856          * if using multiple tx queues.  If the stack breaks away from a
3857          * single qdisc implementation, we can look at this again. */
3858         return igb_xmit_frame_ring_adv(skb, tx_ring);
3859 }
3860
3861 /**
3862  * igb_tx_timeout - Respond to a Tx Hang
3863  * @netdev: network interface device structure
3864  **/
3865 static void igb_tx_timeout(struct net_device *netdev)
3866 {
3867         struct igb_adapter *adapter = netdev_priv(netdev);
3868         struct e1000_hw *hw = &adapter->hw;
3869
3870         /* Do the reset outside of interrupt context */
3871         adapter->tx_timeout_count++;
3872
3873         if (hw->mac.type == e1000_82580)
3874                 hw->dev_spec._82575.global_device_reset = true;
3875
3876         schedule_work(&adapter->reset_task);
3877         wr32(E1000_EICS,
3878              (adapter->eims_enable_mask & ~adapter->eims_other));
3879 }
3880
3881 static void igb_reset_task(struct work_struct *work)
3882 {
3883         struct igb_adapter *adapter;
3884         adapter = container_of(work, struct igb_adapter, reset_task);
3885
3886         igb_reinit_locked(adapter);
3887 }
3888
3889 /**
3890  * igb_get_stats - Get System Network Statistics
3891  * @netdev: network interface device structure
3892  *
3893  * Returns the address of the device statistics structure.
3894  * The statistics are actually updated from the timer callback.
3895  **/
3896 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3897 {
3898         /* only return the current stats */
3899         return &netdev->stats;
3900 }
3901
3902 /**
3903  * igb_change_mtu - Change the Maximum Transfer Unit
3904  * @netdev: network interface device structure
3905  * @new_mtu: new value for maximum frame size
3906  *
3907  * Returns 0 on success, negative on failure
3908  **/
3909 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3910 {
3911         struct igb_adapter *adapter = netdev_priv(netdev);
3912         struct pci_dev *pdev = adapter->pdev;
3913         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3914         u32 rx_buffer_len, i;
3915
3916         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3917                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3918                 return -EINVAL;
3919         }
3920
3921         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3922                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3923                 return -EINVAL;
3924         }
3925
3926         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3927                 msleep(1);
3928
3929         /* igb_down has a dependency on max_frame_size */
3930         adapter->max_frame_size = max_frame;
3931
3932         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3933          * means we reserve 2 more, this pushes us to allocate from the next
3934          * larger slab size.
3935          * i.e. RXBUFFER_2048 --> size-4096 slab
3936          */
3937
3938         if (max_frame <= IGB_RXBUFFER_1024)
3939                 rx_buffer_len = IGB_RXBUFFER_1024;
3940         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3941                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3942         else
3943                 rx_buffer_len = IGB_RXBUFFER_128;
3944
3945         if (netif_running(netdev))
3946                 igb_down(adapter);
3947
3948         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3949                  netdev->mtu, new_mtu);
3950         netdev->mtu = new_mtu;
3951
3952         for (i = 0; i < adapter->num_rx_queues; i++)
3953                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3954
3955         if (netif_running(netdev))
3956                 igb_up(adapter);
3957         else
3958                 igb_reset(adapter);
3959
3960         clear_bit(__IGB_RESETTING, &adapter->state);
3961
3962         return 0;
3963 }
3964
3965 /**
3966  * igb_update_stats - Update the board statistics counters
3967  * @adapter: board private structure
3968  **/
3969
3970 void igb_update_stats(struct igb_adapter *adapter)
3971 {
3972         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3973         struct e1000_hw *hw = &adapter->hw;
3974         struct pci_dev *pdev = adapter->pdev;
3975         u32 rnbc, reg;
3976         u16 phy_tmp;
3977         int i;
3978         u64 bytes, packets;
3979
3980 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3981
3982         /*
3983          * Prevent stats update while adapter is being reset, or if the pci
3984          * connection is down.
3985          */
3986         if (adapter->link_speed == 0)
3987                 return;
3988         if (pci_channel_offline(pdev))
3989                 return;
3990
3991         bytes = 0;
3992         packets = 0;
3993         for (i = 0; i < adapter->num_rx_queues; i++) {
3994                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3995                 struct igb_ring *ring = adapter->rx_ring[i];
3996                 ring->rx_stats.drops += rqdpc_tmp;
3997                 net_stats->rx_fifo_errors += rqdpc_tmp;
3998                 bytes += ring->rx_stats.bytes;
3999                 packets += ring->rx_stats.packets;
4000         }
4001
4002         net_stats->rx_bytes = bytes;
4003         net_stats->rx_packets = packets;
4004
4005         bytes = 0;
4006         packets = 0;
4007         for (i = 0; i < adapter->num_tx_queues; i++) {
4008                 struct igb_ring *ring = adapter->tx_ring[i];
4009                 bytes += ring->tx_stats.bytes;
4010                 packets += ring->tx_stats.packets;
4011         }
4012         net_stats->tx_bytes = bytes;
4013         net_stats->tx_packets = packets;
4014
4015         /* read stats registers */
4016         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4017         adapter->stats.gprc += rd32(E1000_GPRC);
4018         adapter->stats.gorc += rd32(E1000_GORCL);
4019         rd32(E1000_GORCH); /* clear GORCL */
4020         adapter->stats.bprc += rd32(E1000_BPRC);
4021         adapter->stats.mprc += rd32(E1000_MPRC);
4022         adapter->stats.roc += rd32(E1000_ROC);
4023
4024         adapter->stats.prc64 += rd32(E1000_PRC64);
4025         adapter->stats.prc127 += rd32(E1000_PRC127);
4026         adapter->stats.prc255 += rd32(E1000_PRC255);
4027         adapter->stats.prc511 += rd32(E1000_PRC511);
4028         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4029         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4030         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4031         adapter->stats.sec += rd32(E1000_SEC);
4032
4033         adapter->stats.mpc += rd32(E1000_MPC);
4034         adapter->stats.scc += rd32(E1000_SCC);
4035         adapter->stats.ecol += rd32(E1000_ECOL);
4036         adapter->stats.mcc += rd32(E1000_MCC);
4037         adapter->stats.latecol += rd32(E1000_LATECOL);
4038         adapter->stats.dc += rd32(E1000_DC);
4039         adapter->stats.rlec += rd32(E1000_RLEC);
4040         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4041         adapter->stats.xontxc += rd32(E1000_XONTXC);
4042         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4043         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4044         adapter->stats.fcruc += rd32(E1000_FCRUC);
4045         adapter->stats.gptc += rd32(E1000_GPTC);
4046         adapter->stats.gotc += rd32(E1000_GOTCL);
4047         rd32(E1000_GOTCH); /* clear GOTCL */
4048         rnbc = rd32(E1000_RNBC);
4049         adapter->stats.rnbc += rnbc;
4050         net_stats->rx_fifo_errors += rnbc;
4051         adapter->stats.ruc += rd32(E1000_RUC);
4052         adapter->stats.rfc += rd32(E1000_RFC);
4053         adapter->stats.rjc += rd32(E1000_RJC);
4054         adapter->stats.tor += rd32(E1000_TORH);
4055         adapter->stats.tot += rd32(E1000_TOTH);
4056         adapter->stats.tpr += rd32(E1000_TPR);
4057
4058         adapter->stats.ptc64 += rd32(E1000_PTC64);
4059         adapter->stats.ptc127 += rd32(E1000_PTC127);
4060         adapter->stats.ptc255 += rd32(E1000_PTC255);
4061         adapter->stats.ptc511 += rd32(E1000_PTC511);
4062         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4063         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4064
4065         adapter->stats.mptc += rd32(E1000_MPTC);
4066         adapter->stats.bptc += rd32(E1000_BPTC);
4067
4068         adapter->stats.tpt += rd32(E1000_TPT);
4069         adapter->stats.colc += rd32(E1000_COLC);
4070
4071         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4072         /* read internal phy specific stats */
4073         reg = rd32(E1000_CTRL_EXT);
4074         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4075                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4076                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4077         }
4078
4079         adapter->stats.tsctc += rd32(E1000_TSCTC);
4080         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4081
4082         adapter->stats.iac += rd32(E1000_IAC);
4083         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4084         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4085         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4086         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4087         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4088         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4089         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4090         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4091
4092         /* Fill out the OS statistics structure */
4093         net_stats->multicast = adapter->stats.mprc;
4094         net_stats->collisions = adapter->stats.colc;
4095
4096         /* Rx Errors */
4097
4098         /* RLEC on some newer hardware can be incorrect so build
4099          * our own version based on RUC and ROC */
4100         net_stats->rx_errors = adapter->stats.rxerrc +
4101                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4102                 adapter->stats.ruc + adapter->stats.roc +
4103                 adapter->stats.cexterr;
4104         net_stats->rx_length_errors = adapter->stats.ruc +
4105                                       adapter->stats.roc;
4106         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4107         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4108         net_stats->rx_missed_errors = adapter->stats.mpc;
4109
4110         /* Tx Errors */
4111         net_stats->tx_errors = adapter->stats.ecol +
4112                                adapter->stats.latecol;
4113         net_stats->tx_aborted_errors = adapter->stats.ecol;
4114         net_stats->tx_window_errors = adapter->stats.latecol;
4115         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4116
4117         /* Tx Dropped needs to be maintained elsewhere */
4118
4119         /* Phy Stats */
4120         if (hw->phy.media_type == e1000_media_type_copper) {
4121                 if ((adapter->link_speed == SPEED_1000) &&
4122                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4123                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4124                         adapter->phy_stats.idle_errors += phy_tmp;
4125                 }
4126         }
4127
4128         /* Management Stats */
4129         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4130         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4131         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4132 }
4133
4134 static irqreturn_t igb_msix_other(int irq, void *data)
4135 {
4136         struct igb_adapter *adapter = data;
4137         struct e1000_hw *hw = &adapter->hw;
4138         u32 icr = rd32(E1000_ICR);
4139         /* reading ICR causes bit 31 of EICR to be cleared */
4140
4141         if (icr & E1000_ICR_DRSTA)
4142                 schedule_work(&adapter->reset_task);
4143
4144         if (icr & E1000_ICR_DOUTSYNC) {
4145                 /* HW is reporting DMA is out of sync */
4146                 adapter->stats.doosync++;
4147         }
4148
4149         /* Check for a mailbox event */
4150         if (icr & E1000_ICR_VMMB)
4151                 igb_msg_task(adapter);
4152
4153         if (icr & E1000_ICR_LSC) {
4154                 hw->mac.get_link_status = 1;
4155                 /* guard against interrupt when we're going down */
4156                 if (!test_bit(__IGB_DOWN, &adapter->state))
4157                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4158         }
4159
4160         if (adapter->vfs_allocated_count)
4161                 wr32(E1000_IMS, E1000_IMS_LSC |
4162                                 E1000_IMS_VMMB |
4163                                 E1000_IMS_DOUTSYNC);
4164         else
4165                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4166         wr32(E1000_EIMS, adapter->eims_other);
4167
4168         return IRQ_HANDLED;
4169 }
4170
4171 static void igb_write_itr(struct igb_q_vector *q_vector)
4172 {
4173         struct igb_adapter *adapter = q_vector->adapter;
4174         u32 itr_val = q_vector->itr_val & 0x7FFC;
4175
4176         if (!q_vector->set_itr)
4177                 return;
4178
4179         if (!itr_val)
4180                 itr_val = 0x4;
4181
4182         if (adapter->hw.mac.type == e1000_82575)
4183                 itr_val |= itr_val << 16;
4184         else
4185                 itr_val |= 0x8000000;
4186
4187         writel(itr_val, q_vector->itr_register);
4188         q_vector->set_itr = 0;
4189 }
4190
4191 static irqreturn_t igb_msix_ring(int irq, void *data)
4192 {
4193         struct igb_q_vector *q_vector = data;
4194
4195         /* Write the ITR value calculated from the previous interrupt. */
4196         igb_write_itr(q_vector);
4197
4198         napi_schedule(&q_vector->napi);
4199
4200         return IRQ_HANDLED;
4201 }
4202
4203 #ifdef CONFIG_IGB_DCA
4204 static void igb_update_dca(struct igb_q_vector *q_vector)
4205 {
4206         struct igb_adapter *adapter = q_vector->adapter;
4207         struct e1000_hw *hw = &adapter->hw;
4208         int cpu = get_cpu();
4209
4210         if (q_vector->cpu == cpu)
4211                 goto out_no_update;
4212
4213         if (q_vector->tx_ring) {
4214                 int q = q_vector->tx_ring->reg_idx;
4215                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4216                 if (hw->mac.type == e1000_82575) {
4217                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4218                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4219                 } else {
4220                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4221                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4222                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4223                 }
4224                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4225                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4226         }
4227         if (q_vector->rx_ring) {
4228                 int q = q_vector->rx_ring->reg_idx;
4229                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4230                 if (hw->mac.type == e1000_82575) {
4231                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4232                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4233                 } else {
4234                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4235                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4236                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4237                 }
4238                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4239                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4240                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4241                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4242         }
4243         q_vector->cpu = cpu;
4244 out_no_update:
4245         put_cpu();
4246 }
4247
4248 static void igb_setup_dca(struct igb_adapter *adapter)
4249 {
4250         struct e1000_hw *hw = &adapter->hw;
4251         int i;
4252
4253         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4254                 return;
4255
4256         /* Always use CB2 mode, difference is masked in the CB driver. */
4257         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4258
4259         for (i = 0; i < adapter->num_q_vectors; i++) {
4260                 adapter->q_vector[i]->cpu = -1;
4261                 igb_update_dca(adapter->q_vector[i]);
4262         }
4263 }
4264
4265 static int __igb_notify_dca(struct device *dev, void *data)
4266 {
4267         struct net_device *netdev = dev_get_drvdata(dev);
4268         struct igb_adapter *adapter = netdev_priv(netdev);
4269         struct pci_dev *pdev = adapter->pdev;
4270         struct e1000_hw *hw = &adapter->hw;
4271         unsigned long event = *(unsigned long *)data;
4272
4273         switch (event) {
4274         case DCA_PROVIDER_ADD:
4275                 /* if already enabled, don't do it again */
4276                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4277                         break;
4278                 if (dca_add_requester(dev) == 0) {
4279                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4280                         dev_info(&pdev->dev, "DCA enabled\n");
4281                         igb_setup_dca(adapter);
4282                         break;
4283                 }
4284                 /* Fall Through since DCA is disabled. */
4285         case DCA_PROVIDER_REMOVE:
4286                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4287                         /* without this a class_device is left
4288                          * hanging around in the sysfs model */
4289                         dca_remove_requester(dev);
4290                         dev_info(&pdev->dev, "DCA disabled\n");
4291                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4292                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4293                 }
4294                 break;
4295         }
4296
4297         return 0;
4298 }
4299
4300 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4301                           void *p)
4302 {
4303         int ret_val;
4304
4305         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4306                                          __igb_notify_dca);
4307
4308         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4309 }
4310 #endif /* CONFIG_IGB_DCA */
4311
4312 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4313 {
4314         struct e1000_hw *hw = &adapter->hw;
4315         u32 ping;
4316         int i;
4317
4318         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4319                 ping = E1000_PF_CONTROL_MSG;
4320                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4321                         ping |= E1000_VT_MSGTYPE_CTS;
4322                 igb_write_mbx(hw, &ping, 1, i);
4323         }
4324 }
4325
4326 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4327 {
4328         struct e1000_hw *hw = &adapter->hw;
4329         u32 vmolr = rd32(E1000_VMOLR(vf));
4330         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4331
4332         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4333                             IGB_VF_FLAG_MULTI_PROMISC);
4334         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4335
4336         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4337                 vmolr |= E1000_VMOLR_MPME;
4338                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4339         } else {
4340                 /*
4341                  * if we have hashes and we are clearing a multicast promisc
4342                  * flag we need to write the hashes to the MTA as this step
4343                  * was previously skipped
4344                  */
4345                 if (vf_data->num_vf_mc_hashes > 30) {
4346                         vmolr |= E1000_VMOLR_MPME;
4347                 } else if (vf_data->num_vf_mc_hashes) {
4348                         int j;
4349                         vmolr |= E1000_VMOLR_ROMPE;
4350                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4351                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4352                 }
4353         }
4354
4355         wr32(E1000_VMOLR(vf), vmolr);
4356
4357         /* there are flags left unprocessed, likely not supported */
4358         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4359                 return -EINVAL;
4360
4361         return 0;
4362
4363 }
4364
4365 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4366                                   u32 *msgbuf, u32 vf)
4367 {
4368         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4369         u16 *hash_list = (u16 *)&msgbuf[1];
4370         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4371         int i;
4372
4373         /* salt away the number of multicast addresses assigned
4374          * to this VF for later use to restore when the PF multi cast
4375          * list changes
4376          */
4377         vf_data->num_vf_mc_hashes = n;
4378
4379         /* only up to 30 hash values supported */
4380         if (n > 30)
4381                 n = 30;
4382
4383         /* store the hashes for later use */
4384         for (i = 0; i < n; i++)
4385                 vf_data->vf_mc_hashes[i] = hash_list[i];
4386
4387         /* Flush and reset the mta with the new values */
4388         igb_set_rx_mode(adapter->netdev);
4389
4390         return 0;
4391 }
4392
4393 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4394 {
4395         struct e1000_hw *hw = &adapter->hw;
4396         struct vf_data_storage *vf_data;
4397         int i, j;
4398
4399         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4400                 u32 vmolr = rd32(E1000_VMOLR(i));
4401                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4402
4403                 vf_data = &adapter->vf_data[i];
4404
4405                 if ((vf_data->num_vf_mc_hashes > 30) ||
4406                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4407                         vmolr |= E1000_VMOLR_MPME;
4408                 } else if (vf_data->num_vf_mc_hashes) {
4409                         vmolr |= E1000_VMOLR_ROMPE;
4410                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4411                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4412                 }
4413                 wr32(E1000_VMOLR(i), vmolr);
4414         }
4415 }
4416
4417 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4418 {
4419         struct e1000_hw *hw = &adapter->hw;
4420         u32 pool_mask, reg, vid;
4421         int i;
4422
4423         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4424
4425         /* Find the vlan filter for this id */
4426         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4427                 reg = rd32(E1000_VLVF(i));
4428
4429                 /* remove the vf from the pool */
4430                 reg &= ~pool_mask;
4431
4432                 /* if pool is empty then remove entry from vfta */
4433                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4434                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4435                         reg = 0;
4436                         vid = reg & E1000_VLVF_VLANID_MASK;
4437                         igb_vfta_set(hw, vid, false);
4438                 }
4439
4440                 wr32(E1000_VLVF(i), reg);
4441         }
4442
4443         adapter->vf_data[vf].vlans_enabled = 0;
4444 }
4445
4446 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4447 {
4448         struct e1000_hw *hw = &adapter->hw;
4449         u32 reg, i;
4450
4451         /* The vlvf table only exists on 82576 hardware and newer */
4452         if (hw->mac.type < e1000_82576)
4453                 return -1;
4454
4455         /* we only need to do this if VMDq is enabled */
4456         if (!adapter->vfs_allocated_count)
4457                 return -1;
4458
4459         /* Find the vlan filter for this id */
4460         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4461                 reg = rd32(E1000_VLVF(i));
4462                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4463                     vid == (reg & E1000_VLVF_VLANID_MASK))
4464                         break;
4465         }
4466
4467         if (add) {
4468                 if (i == E1000_VLVF_ARRAY_SIZE) {
4469                         /* Did not find a matching VLAN ID entry that was
4470                          * enabled.  Search for a free filter entry, i.e.
4471                          * one without the enable bit set
4472                          */
4473                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4474                                 reg = rd32(E1000_VLVF(i));
4475                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4476                                         break;
4477                         }
4478                 }
4479                 if (i < E1000_VLVF_ARRAY_SIZE) {
4480                         /* Found an enabled/available entry */
4481                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4482
4483                         /* if !enabled we need to set this up in vfta */
4484                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4485                                 /* add VID to filter table */
4486                                 igb_vfta_set(hw, vid, true);
4487                                 reg |= E1000_VLVF_VLANID_ENABLE;
4488                         }
4489                         reg &= ~E1000_VLVF_VLANID_MASK;
4490                         reg |= vid;
4491                         wr32(E1000_VLVF(i), reg);
4492
4493                         /* do not modify RLPML for PF devices */
4494                         if (vf >= adapter->vfs_allocated_count)
4495                                 return 0;
4496
4497                         if (!adapter->vf_data[vf].vlans_enabled) {
4498                                 u32 size;
4499                                 reg = rd32(E1000_VMOLR(vf));
4500                                 size = reg & E1000_VMOLR_RLPML_MASK;
4501                                 size += 4;
4502                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4503                                 reg |= size;
4504                                 wr32(E1000_VMOLR(vf), reg);
4505                         }
4506
4507                         adapter->vf_data[vf].vlans_enabled++;
4508                         return 0;
4509                 }
4510         } else {
4511                 if (i < E1000_VLVF_ARRAY_SIZE) {
4512                         /* remove vf from the pool */
4513                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4514                         /* if pool is empty then remove entry from vfta */
4515                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4516                                 reg = 0;
4517                                 igb_vfta_set(hw, vid, false);
4518                         }
4519                         wr32(E1000_VLVF(i), reg);
4520
4521                         /* do not modify RLPML for PF devices */
4522                         if (vf >= adapter->vfs_allocated_count)
4523                                 return 0;
4524
4525                         adapter->vf_data[vf].vlans_enabled--;
4526                         if (!adapter->vf_data[vf].vlans_enabled) {
4527                                 u32 size;
4528                                 reg = rd32(E1000_VMOLR(vf));
4529                                 size = reg & E1000_VMOLR_RLPML_MASK;
4530                                 size -= 4;
4531                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4532                                 reg |= size;
4533                                 wr32(E1000_VMOLR(vf), reg);
4534                         }
4535                 }
4536         }
4537         return 0;
4538 }
4539
4540 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4541 {
4542         struct e1000_hw *hw = &adapter->hw;
4543
4544         if (vid)
4545                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4546         else
4547                 wr32(E1000_VMVIR(vf), 0);
4548 }
4549
4550 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4551                                int vf, u16 vlan, u8 qos)
4552 {
4553         int err = 0;
4554         struct igb_adapter *adapter = netdev_priv(netdev);
4555
4556         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4557                 return -EINVAL;
4558         if (vlan || qos) {
4559                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4560                 if (err)
4561                         goto out;
4562                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4563                 igb_set_vmolr(adapter, vf, !vlan);
4564                 adapter->vf_data[vf].pf_vlan = vlan;
4565                 adapter->vf_data[vf].pf_qos = qos;
4566                 dev_info(&adapter->pdev->dev,
4567                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4568                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4569                         dev_warn(&adapter->pdev->dev,
4570                                  "The VF VLAN has been set,"
4571                                  " but the PF device is not up.\n");
4572                         dev_warn(&adapter->pdev->dev,
4573                                  "Bring the PF device up before"
4574                                  " attempting to use the VF device.\n");
4575                 }
4576         } else {
4577                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4578                                    false, vf);
4579                 igb_set_vmvir(adapter, vlan, vf);
4580                 igb_set_vmolr(adapter, vf, true);
4581                 adapter->vf_data[vf].pf_vlan = 0;
4582                 adapter->vf_data[vf].pf_qos = 0;
4583        }
4584 out:
4585        return err;
4586 }
4587
4588 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4589 {
4590         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4591         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4592
4593         return igb_vlvf_set(adapter, vid, add, vf);
4594 }
4595
4596 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4597 {
4598         /* clear flags */
4599         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4600         adapter->vf_data[vf].last_nack = jiffies;
4601
4602         /* reset offloads to defaults */
4603         igb_set_vmolr(adapter, vf, true);
4604
4605         /* reset vlans for device */
4606         igb_clear_vf_vfta(adapter, vf);
4607         if (adapter->vf_data[vf].pf_vlan)
4608                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4609                                     adapter->vf_data[vf].pf_vlan,
4610                                     adapter->vf_data[vf].pf_qos);
4611         else
4612                 igb_clear_vf_vfta(adapter, vf);
4613
4614         /* reset multicast table array for vf */
4615         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4616
4617         /* Flush and reset the mta with the new values */
4618         igb_set_rx_mode(adapter->netdev);
4619 }
4620
4621 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4622 {
4623         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4624
4625         /* generate a new mac address as we were hotplug removed/added */
4626         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4627                 random_ether_addr(vf_mac);
4628
4629         /* process remaining reset events */
4630         igb_vf_reset(adapter, vf);
4631 }
4632
4633 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4634 {
4635         struct e1000_hw *hw = &adapter->hw;
4636         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4637         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4638         u32 reg, msgbuf[3];
4639         u8 *addr = (u8 *)(&msgbuf[1]);
4640
4641         /* process all the same items cleared in a function level reset */
4642         igb_vf_reset(adapter, vf);
4643
4644         /* set vf mac address */
4645         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4646
4647         /* enable transmit and receive for vf */
4648         reg = rd32(E1000_VFTE);
4649         wr32(E1000_VFTE, reg | (1 << vf));
4650         reg = rd32(E1000_VFRE);
4651         wr32(E1000_VFRE, reg | (1 << vf));
4652
4653         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4654
4655         /* reply to reset with ack and vf mac address */
4656         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4657         memcpy(addr, vf_mac, 6);
4658         igb_write_mbx(hw, msgbuf, 3, vf);
4659 }
4660
4661 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4662 {
4663         unsigned char *addr = (char *)&msg[1];
4664         int err = -1;
4665
4666         if (is_valid_ether_addr(addr))
4667                 err = igb_set_vf_mac(adapter, vf, addr);
4668
4669         return err;
4670 }
4671
4672 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4673 {
4674         struct e1000_hw *hw = &adapter->hw;
4675         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4676         u32 msg = E1000_VT_MSGTYPE_NACK;
4677
4678         /* if device isn't clear to send it shouldn't be reading either */
4679         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4680             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4681                 igb_write_mbx(hw, &msg, 1, vf);
4682                 vf_data->last_nack = jiffies;
4683         }
4684 }
4685
4686 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4687 {
4688         struct pci_dev *pdev = adapter->pdev;
4689         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4690         struct e1000_hw *hw = &adapter->hw;
4691         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4692         s32 retval;
4693
4694         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4695
4696         if (retval) {
4697                 /* if receive failed revoke VF CTS stats and restart init */
4698                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4699                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4700                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4701                         return;
4702                 goto out;
4703         }
4704
4705         /* this is a message we already processed, do nothing */
4706         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4707                 return;
4708
4709         /*
4710          * until the vf completes a reset it should not be
4711          * allowed to start any configuration.
4712          */
4713
4714         if (msgbuf[0] == E1000_VF_RESET) {
4715                 igb_vf_reset_msg(adapter, vf);
4716                 return;
4717         }
4718
4719         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4720                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4721                         return;
4722                 retval = -1;
4723                 goto out;
4724         }
4725
4726         switch ((msgbuf[0] & 0xFFFF)) {
4727         case E1000_VF_SET_MAC_ADDR:
4728                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4729                 break;
4730         case E1000_VF_SET_PROMISC:
4731                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4732                 break;
4733         case E1000_VF_SET_MULTICAST:
4734                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4735                 break;
4736         case E1000_VF_SET_LPE:
4737                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4738                 break;
4739         case E1000_VF_SET_VLAN:
4740                 if (adapter->vf_data[vf].pf_vlan)
4741                         retval = -1;
4742                 else
4743                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4744                 break;
4745         default:
4746                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4747                 retval = -1;
4748                 break;
4749         }
4750
4751         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4752 out:
4753         /* notify the VF of the results of what it sent us */
4754         if (retval)
4755                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4756         else
4757                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4758
4759         igb_write_mbx(hw, msgbuf, 1, vf);
4760 }
4761
4762 static void igb_msg_task(struct igb_adapter *adapter)
4763 {
4764         struct e1000_hw *hw = &adapter->hw;
4765         u32 vf;
4766
4767         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4768                 /* process any reset requests */
4769                 if (!igb_check_for_rst(hw, vf))
4770                         igb_vf_reset_event(adapter, vf);
4771
4772                 /* process any messages pending */
4773                 if (!igb_check_for_msg(hw, vf))
4774                         igb_rcv_msg_from_vf(adapter, vf);
4775
4776                 /* process any acks */
4777                 if (!igb_check_for_ack(hw, vf))
4778                         igb_rcv_ack_from_vf(adapter, vf);
4779         }
4780 }
4781
4782 /**
4783  *  igb_set_uta - Set unicast filter table address
4784  *  @adapter: board private structure
4785  *
4786  *  The unicast table address is a register array of 32-bit registers.
4787  *  The table is meant to be used in a way similar to how the MTA is used
4788  *  however due to certain limitations in the hardware it is necessary to
4789  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4790  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4791  **/
4792 static void igb_set_uta(struct igb_adapter *adapter)
4793 {
4794         struct e1000_hw *hw = &adapter->hw;
4795         int i;
4796
4797         /* The UTA table only exists on 82576 hardware and newer */
4798         if (hw->mac.type < e1000_82576)
4799                 return;
4800
4801         /* we only need to do this if VMDq is enabled */
4802         if (!adapter->vfs_allocated_count)
4803                 return;
4804
4805         for (i = 0; i < hw->mac.uta_reg_count; i++)
4806                 array_wr32(E1000_UTA, i, ~0);
4807 }
4808
4809 /**
4810  * igb_intr_msi - Interrupt Handler
4811  * @irq: interrupt number
4812  * @data: pointer to a network interface device structure
4813  **/
4814 static irqreturn_t igb_intr_msi(int irq, void *data)
4815 {
4816         struct igb_adapter *adapter = data;
4817         struct igb_q_vector *q_vector = adapter->q_vector[0];
4818         struct e1000_hw *hw = &adapter->hw;
4819         /* read ICR disables interrupts using IAM */
4820         u32 icr = rd32(E1000_ICR);
4821
4822         igb_write_itr(q_vector);
4823
4824         if (icr & E1000_ICR_DRSTA)
4825                 schedule_work(&adapter->reset_task);
4826
4827         if (icr & E1000_ICR_DOUTSYNC) {
4828                 /* HW is reporting DMA is out of sync */
4829                 adapter->stats.doosync++;
4830         }
4831
4832         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4833                 hw->mac.get_link_status = 1;
4834                 if (!test_bit(__IGB_DOWN, &adapter->state))
4835                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4836         }
4837
4838         napi_schedule(&q_vector->napi);
4839
4840         return IRQ_HANDLED;
4841 }
4842
4843 /**
4844  * igb_intr - Legacy Interrupt Handler
4845  * @irq: interrupt number
4846  * @data: pointer to a network interface device structure
4847  **/
4848 static irqreturn_t igb_intr(int irq, void *data)
4849 {
4850         struct igb_adapter *adapter = data;
4851         struct igb_q_vector *q_vector = adapter->q_vector[0];
4852         struct e1000_hw *hw = &adapter->hw;
4853         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4854          * need for the IMC write */
4855         u32 icr = rd32(E1000_ICR);
4856         if (!icr)
4857                 return IRQ_NONE;  /* Not our interrupt */
4858
4859         igb_write_itr(q_vector);
4860
4861         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4862          * not set, then the adapter didn't send an interrupt */
4863         if (!(icr & E1000_ICR_INT_ASSERTED))
4864                 return IRQ_NONE;
4865
4866         if (icr & E1000_ICR_DRSTA)
4867                 schedule_work(&adapter->reset_task);
4868
4869         if (icr & E1000_ICR_DOUTSYNC) {
4870                 /* HW is reporting DMA is out of sync */
4871                 adapter->stats.doosync++;
4872         }
4873
4874         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4875                 hw->mac.get_link_status = 1;
4876                 /* guard against interrupt when we're going down */
4877                 if (!test_bit(__IGB_DOWN, &adapter->state))
4878                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4879         }
4880
4881         napi_schedule(&q_vector->napi);
4882
4883         return IRQ_HANDLED;
4884 }
4885
4886 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4887 {
4888         struct igb_adapter *adapter = q_vector->adapter;
4889         struct e1000_hw *hw = &adapter->hw;
4890
4891         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4892             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4893                 if (!adapter->msix_entries)
4894                         igb_set_itr(adapter);
4895                 else
4896                         igb_update_ring_itr(q_vector);
4897         }
4898
4899         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4900                 if (adapter->msix_entries)
4901                         wr32(E1000_EIMS, q_vector->eims_value);
4902                 else
4903                         igb_irq_enable(adapter);
4904         }
4905 }
4906
4907 /**
4908  * igb_poll - NAPI Rx polling callback
4909  * @napi: napi polling structure
4910  * @budget: count of how many packets we should handle
4911  **/
4912 static int igb_poll(struct napi_struct *napi, int budget)
4913 {
4914         struct igb_q_vector *q_vector = container_of(napi,
4915                                                      struct igb_q_vector,
4916                                                      napi);
4917         int tx_clean_complete = 1, work_done = 0;
4918
4919 #ifdef CONFIG_IGB_DCA
4920         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4921                 igb_update_dca(q_vector);
4922 #endif
4923         if (q_vector->tx_ring)
4924                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4925
4926         if (q_vector->rx_ring)
4927                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4928
4929         if (!tx_clean_complete)
4930                 work_done = budget;
4931
4932         /* If not enough Rx work done, exit the polling mode */
4933         if (work_done < budget) {
4934                 napi_complete(napi);
4935                 igb_ring_irq_enable(q_vector);
4936         }
4937
4938         return work_done;
4939 }
4940
4941 /**
4942  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4943  * @adapter: board private structure
4944  * @shhwtstamps: timestamp structure to update
4945  * @regval: unsigned 64bit system time value.
4946  *
4947  * We need to convert the system time value stored in the RX/TXSTMP registers
4948  * into a hwtstamp which can be used by the upper level timestamping functions
4949  */
4950 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4951                                    struct skb_shared_hwtstamps *shhwtstamps,
4952                                    u64 regval)
4953 {
4954         u64 ns;
4955
4956         /*
4957          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4958          * 24 to match clock shift we setup earlier.
4959          */
4960         if (adapter->hw.mac.type == e1000_82580)
4961                 regval <<= IGB_82580_TSYNC_SHIFT;
4962
4963         ns = timecounter_cyc2time(&adapter->clock, regval);
4964         timecompare_update(&adapter->compare, ns);
4965         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4966         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4967         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4968 }
4969
4970 /**
4971  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4972  * @q_vector: pointer to q_vector containing needed info
4973  * @skb: packet that was just sent
4974  *
4975  * If we were asked to do hardware stamping and such a time stamp is
4976  * available, then it must have been for this skb here because we only
4977  * allow only one such packet into the queue.
4978  */
4979 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4980 {
4981         struct igb_adapter *adapter = q_vector->adapter;
4982         union skb_shared_tx *shtx = skb_tx(skb);
4983         struct e1000_hw *hw = &adapter->hw;
4984         struct skb_shared_hwtstamps shhwtstamps;
4985         u64 regval;
4986
4987         /* if skb does not support hw timestamp or TX stamp not valid exit */
4988         if (likely(!shtx->hardware) ||
4989             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4990                 return;
4991
4992         regval = rd32(E1000_TXSTMPL);
4993         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4994
4995         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4996         skb_tstamp_tx(skb, &shhwtstamps);
4997 }
4998
4999 /**
5000  * igb_clean_tx_irq - Reclaim resources after transmit completes
5001  * @q_vector: pointer to q_vector containing needed info
5002  * returns true if ring is completely cleaned
5003  **/
5004 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5005 {
5006         struct igb_adapter *adapter = q_vector->adapter;
5007         struct igb_ring *tx_ring = q_vector->tx_ring;
5008         struct net_device *netdev = tx_ring->netdev;
5009         struct e1000_hw *hw = &adapter->hw;
5010         struct igb_buffer *buffer_info;
5011         struct sk_buff *skb;
5012         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5013         unsigned int total_bytes = 0, total_packets = 0;
5014         unsigned int i, eop, count = 0;
5015         bool cleaned = false;
5016
5017         i = tx_ring->next_to_clean;
5018         eop = tx_ring->buffer_info[i].next_to_watch;
5019         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5020
5021         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5022                (count < tx_ring->count)) {
5023                 for (cleaned = false; !cleaned; count++) {
5024                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5025                         buffer_info = &tx_ring->buffer_info[i];
5026                         cleaned = (i == eop);
5027                         skb = buffer_info->skb;
5028
5029                         if (skb) {
5030                                 unsigned int segs, bytecount;
5031                                 /* gso_segs is currently only valid for tcp */
5032                                 segs = buffer_info->gso_segs;
5033                                 /* multiply data chunks by size of headers */
5034                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5035                                             skb->len;
5036                                 total_packets += segs;
5037                                 total_bytes += bytecount;
5038
5039                                 igb_tx_hwtstamp(q_vector, skb);
5040                         }
5041
5042                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5043                         tx_desc->wb.status = 0;
5044
5045                         i++;
5046                         if (i == tx_ring->count)
5047                                 i = 0;
5048                 }
5049                 eop = tx_ring->buffer_info[i].next_to_watch;
5050                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5051         }
5052
5053         tx_ring->next_to_clean = i;
5054
5055         if (unlikely(count &&
5056                      netif_carrier_ok(netdev) &&
5057                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5058                 /* Make sure that anybody stopping the queue after this
5059                  * sees the new next_to_clean.
5060                  */
5061                 smp_mb();
5062                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5063                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5064                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5065                         tx_ring->tx_stats.restart_queue++;
5066                 }
5067         }
5068
5069         if (tx_ring->detect_tx_hung) {
5070                 /* Detect a transmit hang in hardware, this serializes the
5071                  * check with the clearing of time_stamp and movement of i */
5072                 tx_ring->detect_tx_hung = false;
5073                 if (tx_ring->buffer_info[i].time_stamp &&
5074                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5075                                (adapter->tx_timeout_factor * HZ)) &&
5076                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5077
5078                         /* detected Tx unit hang */
5079                         dev_err(&tx_ring->pdev->dev,
5080                                 "Detected Tx Unit Hang\n"
5081                                 "  Tx Queue             <%d>\n"
5082                                 "  TDH                  <%x>\n"
5083                                 "  TDT                  <%x>\n"
5084                                 "  next_to_use          <%x>\n"
5085                                 "  next_to_clean        <%x>\n"
5086                                 "buffer_info[next_to_clean]\n"
5087                                 "  time_stamp           <%lx>\n"
5088                                 "  next_to_watch        <%x>\n"
5089                                 "  jiffies              <%lx>\n"
5090                                 "  desc.status          <%x>\n",
5091                                 tx_ring->queue_index,
5092                                 readl(tx_ring->head),
5093                                 readl(tx_ring->tail),
5094                                 tx_ring->next_to_use,
5095                                 tx_ring->next_to_clean,
5096                                 tx_ring->buffer_info[eop].time_stamp,
5097                                 eop,
5098                                 jiffies,
5099                                 eop_desc->wb.status);
5100                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5101                 }
5102         }
5103         tx_ring->total_bytes += total_bytes;
5104         tx_ring->total_packets += total_packets;
5105         tx_ring->tx_stats.bytes += total_bytes;
5106         tx_ring->tx_stats.packets += total_packets;
5107         return (count < tx_ring->count);
5108 }
5109
5110 /**
5111  * igb_receive_skb - helper function to handle rx indications
5112  * @q_vector: structure containing interrupt and ring information
5113  * @skb: packet to send up
5114  * @vlan_tag: vlan tag for packet
5115  **/
5116 static void igb_receive_skb(struct igb_q_vector *q_vector,
5117                             struct sk_buff *skb,
5118                             u16 vlan_tag)
5119 {
5120         struct igb_adapter *adapter = q_vector->adapter;
5121
5122         if (vlan_tag)
5123                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5124                                  vlan_tag, skb);
5125         else
5126                 napi_gro_receive(&q_vector->napi, skb);
5127 }
5128
5129 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5130                                        u32 status_err, struct sk_buff *skb)
5131 {
5132         skb->ip_summed = CHECKSUM_NONE;
5133
5134         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5135         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5136              (status_err & E1000_RXD_STAT_IXSM))
5137                 return;
5138
5139         /* TCP/UDP checksum error bit is set */
5140         if (status_err &
5141             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5142                 /*
5143                  * work around errata with sctp packets where the TCPE aka
5144                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5145                  * packets, (aka let the stack check the crc32c)
5146                  */
5147                 if ((skb->len == 60) &&
5148                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5149                         ring->rx_stats.csum_err++;
5150
5151                 /* let the stack verify checksum errors */
5152                 return;
5153         }
5154         /* It must be a TCP or UDP packet with a valid checksum */
5155         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5156                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5157
5158         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5159 }
5160
5161 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5162                                    struct sk_buff *skb)
5163 {
5164         struct igb_adapter *adapter = q_vector->adapter;
5165         struct e1000_hw *hw = &adapter->hw;
5166         u64 regval;
5167
5168         /*
5169          * If this bit is set, then the RX registers contain the time stamp. No
5170          * other packet will be time stamped until we read these registers, so
5171          * read the registers to make them available again. Because only one
5172          * packet can be time stamped at a time, we know that the register
5173          * values must belong to this one here and therefore we don't need to
5174          * compare any of the additional attributes stored for it.
5175          *
5176          * If nothing went wrong, then it should have a skb_shared_tx that we
5177          * can turn into a skb_shared_hwtstamps.
5178          */
5179         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5180                 return;
5181         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5182                 return;
5183
5184         regval = rd32(E1000_RXSTMPL);
5185         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5186
5187         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5188 }
5189 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5190                                union e1000_adv_rx_desc *rx_desc)
5191 {
5192         /* HW will not DMA in data larger than the given buffer, even if it
5193          * parses the (NFS, of course) header to be larger.  In that case, it
5194          * fills the header buffer and spills the rest into the page.
5195          */
5196         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5197                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5198         if (hlen > rx_ring->rx_buffer_len)
5199                 hlen = rx_ring->rx_buffer_len;
5200         return hlen;
5201 }
5202
5203 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5204                                  int *work_done, int budget)
5205 {
5206         struct igb_ring *rx_ring = q_vector->rx_ring;
5207         struct net_device *netdev = rx_ring->netdev;
5208         struct pci_dev *pdev = rx_ring->pdev;
5209         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5210         struct igb_buffer *buffer_info , *next_buffer;
5211         struct sk_buff *skb;
5212         bool cleaned = false;
5213         int cleaned_count = 0;
5214         int current_node = numa_node_id();
5215         unsigned int total_bytes = 0, total_packets = 0;
5216         unsigned int i;
5217         u32 staterr;
5218         u16 length;
5219         u16 vlan_tag;
5220
5221         i = rx_ring->next_to_clean;
5222         buffer_info = &rx_ring->buffer_info[i];
5223         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5224         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5225
5226         while (staterr & E1000_RXD_STAT_DD) {
5227                 if (*work_done >= budget)
5228                         break;
5229                 (*work_done)++;
5230
5231                 skb = buffer_info->skb;
5232                 prefetch(skb->data - NET_IP_ALIGN);
5233                 buffer_info->skb = NULL;
5234
5235                 i++;
5236                 if (i == rx_ring->count)
5237                         i = 0;
5238
5239                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5240                 prefetch(next_rxd);
5241                 next_buffer = &rx_ring->buffer_info[i];
5242
5243                 length = le16_to_cpu(rx_desc->wb.upper.length);
5244                 cleaned = true;
5245                 cleaned_count++;
5246
5247                 if (buffer_info->dma) {
5248                         pci_unmap_single(pdev, buffer_info->dma,
5249                                          rx_ring->rx_buffer_len,
5250                                          PCI_DMA_FROMDEVICE);
5251                         buffer_info->dma = 0;
5252                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5253                                 skb_put(skb, length);
5254                                 goto send_up;
5255                         }
5256                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5257                 }
5258
5259                 if (length) {
5260                         pci_unmap_page(pdev, buffer_info->page_dma,
5261                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5262                         buffer_info->page_dma = 0;
5263
5264                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5265                                                 buffer_info->page,
5266                                                 buffer_info->page_offset,
5267                                                 length);
5268
5269                         if ((page_count(buffer_info->page) != 1) ||
5270                             (page_to_nid(buffer_info->page) != current_node))
5271                                 buffer_info->page = NULL;
5272                         else
5273                                 get_page(buffer_info->page);
5274
5275                         skb->len += length;
5276                         skb->data_len += length;
5277                         skb->truesize += length;
5278                 }
5279
5280                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5281                         buffer_info->skb = next_buffer->skb;
5282                         buffer_info->dma = next_buffer->dma;
5283                         next_buffer->skb = skb;
5284                         next_buffer->dma = 0;
5285                         goto next_desc;
5286                 }
5287 send_up:
5288                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5289                         dev_kfree_skb_irq(skb);
5290                         goto next_desc;
5291                 }
5292
5293                 igb_rx_hwtstamp(q_vector, staterr, skb);
5294                 total_bytes += skb->len;
5295                 total_packets++;
5296
5297                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5298
5299                 skb->protocol = eth_type_trans(skb, netdev);
5300                 skb_record_rx_queue(skb, rx_ring->queue_index);
5301
5302                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5303                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5304
5305                 igb_receive_skb(q_vector, skb, vlan_tag);
5306
5307 next_desc:
5308                 rx_desc->wb.upper.status_error = 0;
5309
5310                 /* return some buffers to hardware, one at a time is too slow */
5311                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5312                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5313                         cleaned_count = 0;
5314                 }
5315
5316                 /* use prefetched values */
5317                 rx_desc = next_rxd;
5318                 buffer_info = next_buffer;
5319                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5320         }
5321
5322         rx_ring->next_to_clean = i;
5323         cleaned_count = igb_desc_unused(rx_ring);
5324
5325         if (cleaned_count)
5326                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5327
5328         rx_ring->total_packets += total_packets;
5329         rx_ring->total_bytes += total_bytes;
5330         rx_ring->rx_stats.packets += total_packets;
5331         rx_ring->rx_stats.bytes += total_bytes;
5332         return cleaned;
5333 }
5334
5335 /**
5336  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5337  * @adapter: address of board private structure
5338  **/
5339 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5340 {
5341         struct net_device *netdev = rx_ring->netdev;
5342         union e1000_adv_rx_desc *rx_desc;
5343         struct igb_buffer *buffer_info;
5344         struct sk_buff *skb;
5345         unsigned int i;
5346         int bufsz;
5347
5348         i = rx_ring->next_to_use;
5349         buffer_info = &rx_ring->buffer_info[i];
5350
5351         bufsz = rx_ring->rx_buffer_len;
5352
5353         while (cleaned_count--) {
5354                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5355
5356                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5357                         if (!buffer_info->page) {
5358                                 buffer_info->page = netdev_alloc_page(netdev);
5359                                 if (!buffer_info->page) {
5360                                         rx_ring->rx_stats.alloc_failed++;
5361                                         goto no_buffers;
5362                                 }
5363                                 buffer_info->page_offset = 0;
5364                         } else {
5365                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5366                         }
5367                         buffer_info->page_dma =
5368                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5369                                              buffer_info->page_offset,
5370                                              PAGE_SIZE / 2,
5371                                              PCI_DMA_FROMDEVICE);
5372                         if (pci_dma_mapping_error(rx_ring->pdev,
5373                                                   buffer_info->page_dma)) {
5374                                 buffer_info->page_dma = 0;
5375                                 rx_ring->rx_stats.alloc_failed++;
5376                                 goto no_buffers;
5377                         }
5378                 }
5379
5380                 skb = buffer_info->skb;
5381                 if (!skb) {
5382                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5383                         if (!skb) {
5384                                 rx_ring->rx_stats.alloc_failed++;
5385                                 goto no_buffers;
5386                         }
5387
5388                         buffer_info->skb = skb;
5389                 }
5390                 if (!buffer_info->dma) {
5391                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5392                                                           skb->data,
5393                                                           bufsz,
5394                                                           PCI_DMA_FROMDEVICE);
5395                         if (pci_dma_mapping_error(rx_ring->pdev,
5396                                                   buffer_info->dma)) {
5397                                 buffer_info->dma = 0;
5398                                 rx_ring->rx_stats.alloc_failed++;
5399                                 goto no_buffers;
5400                         }
5401                 }
5402                 /* Refresh the desc even if buffer_addrs didn't change because
5403                  * each write-back erases this info. */
5404                 if (bufsz < IGB_RXBUFFER_1024) {
5405                         rx_desc->read.pkt_addr =
5406                              cpu_to_le64(buffer_info->page_dma);
5407                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5408                 } else {
5409                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5410                         rx_desc->read.hdr_addr = 0;
5411                 }
5412
5413                 i++;
5414                 if (i == rx_ring->count)
5415                         i = 0;
5416                 buffer_info = &rx_ring->buffer_info[i];
5417         }
5418
5419 no_buffers:
5420         if (rx_ring->next_to_use != i) {
5421                 rx_ring->next_to_use = i;
5422                 if (i == 0)
5423                         i = (rx_ring->count - 1);
5424                 else
5425                         i--;
5426
5427                 /* Force memory writes to complete before letting h/w
5428                  * know there are new descriptors to fetch.  (Only
5429                  * applicable for weak-ordered memory model archs,
5430                  * such as IA-64). */
5431                 wmb();
5432                 writel(i, rx_ring->tail);
5433         }
5434 }
5435
5436 /**
5437  * igb_mii_ioctl -
5438  * @netdev:
5439  * @ifreq:
5440  * @cmd:
5441  **/
5442 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5443 {
5444         struct igb_adapter *adapter = netdev_priv(netdev);
5445         struct mii_ioctl_data *data = if_mii(ifr);
5446
5447         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5448                 return -EOPNOTSUPP;
5449
5450         switch (cmd) {
5451         case SIOCGMIIPHY:
5452                 data->phy_id = adapter->hw.phy.addr;
5453                 break;
5454         case SIOCGMIIREG:
5455                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5456                                      &data->val_out))
5457                         return -EIO;
5458                 break;
5459         case SIOCSMIIREG:
5460         default:
5461                 return -EOPNOTSUPP;
5462         }
5463         return 0;
5464 }
5465
5466 /**
5467  * igb_hwtstamp_ioctl - control hardware time stamping
5468  * @netdev:
5469  * @ifreq:
5470  * @cmd:
5471  *
5472  * Outgoing time stamping can be enabled and disabled. Play nice and
5473  * disable it when requested, although it shouldn't case any overhead
5474  * when no packet needs it. At most one packet in the queue may be
5475  * marked for time stamping, otherwise it would be impossible to tell
5476  * for sure to which packet the hardware time stamp belongs.
5477  *
5478  * Incoming time stamping has to be configured via the hardware
5479  * filters. Not all combinations are supported, in particular event
5480  * type has to be specified. Matching the kind of event packet is
5481  * not supported, with the exception of "all V2 events regardless of
5482  * level 2 or 4".
5483  *
5484  **/
5485 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5486                               struct ifreq *ifr, int cmd)
5487 {
5488         struct igb_adapter *adapter = netdev_priv(netdev);
5489         struct e1000_hw *hw = &adapter->hw;
5490         struct hwtstamp_config config;
5491         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5492         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5493         u32 tsync_rx_cfg = 0;
5494         bool is_l4 = false;
5495         bool is_l2 = false;
5496         u32 regval;
5497
5498         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5499                 return -EFAULT;
5500
5501         /* reserved for future extensions */
5502         if (config.flags)
5503                 return -EINVAL;
5504
5505         switch (config.tx_type) {
5506         case HWTSTAMP_TX_OFF:
5507                 tsync_tx_ctl = 0;
5508         case HWTSTAMP_TX_ON:
5509                 break;
5510         default:
5511                 return -ERANGE;
5512         }
5513
5514         switch (config.rx_filter) {
5515         case HWTSTAMP_FILTER_NONE:
5516                 tsync_rx_ctl = 0;
5517                 break;
5518         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5519         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5520         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5521         case HWTSTAMP_FILTER_ALL:
5522                 /*
5523                  * register TSYNCRXCFG must be set, therefore it is not
5524                  * possible to time stamp both Sync and Delay_Req messages
5525                  * => fall back to time stamping all packets
5526                  */
5527                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5528                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5529                 break;
5530         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5531                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5532                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5533                 is_l4 = true;
5534                 break;
5535         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5536                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5537                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5538                 is_l4 = true;
5539                 break;
5540         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5541         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5542                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5543                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5544                 is_l2 = true;
5545                 is_l4 = true;
5546                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5547                 break;
5548         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5549         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5550                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5551                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5552                 is_l2 = true;
5553                 is_l4 = true;
5554                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5555                 break;
5556         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5557         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5558         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5559                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5560                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5561                 is_l2 = true;
5562                 break;
5563         default:
5564                 return -ERANGE;
5565         }
5566
5567         if (hw->mac.type == e1000_82575) {
5568                 if (tsync_rx_ctl | tsync_tx_ctl)
5569                         return -EINVAL;
5570                 return 0;
5571         }
5572
5573         /* enable/disable TX */
5574         regval = rd32(E1000_TSYNCTXCTL);
5575         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5576         regval |= tsync_tx_ctl;
5577         wr32(E1000_TSYNCTXCTL, regval);
5578
5579         /* enable/disable RX */
5580         regval = rd32(E1000_TSYNCRXCTL);
5581         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5582         regval |= tsync_rx_ctl;
5583         wr32(E1000_TSYNCRXCTL, regval);
5584
5585         /* define which PTP packets are time stamped */
5586         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5587
5588         /* define ethertype filter for timestamped packets */
5589         if (is_l2)
5590                 wr32(E1000_ETQF(3),
5591                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5592                                  E1000_ETQF_1588 | /* enable timestamping */
5593                                  ETH_P_1588));     /* 1588 eth protocol type */
5594         else
5595                 wr32(E1000_ETQF(3), 0);
5596
5597 #define PTP_PORT 319
5598         /* L4 Queue Filter[3]: filter by destination port and protocol */
5599         if (is_l4) {
5600                 u32 ftqf = (IPPROTO_UDP /* UDP */
5601                         | E1000_FTQF_VF_BP /* VF not compared */
5602                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5603                         | E1000_FTQF_MASK); /* mask all inputs */
5604                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5605
5606                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5607                 wr32(E1000_IMIREXT(3),
5608                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5609                 if (hw->mac.type == e1000_82576) {
5610                         /* enable source port check */
5611                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5612                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5613                 }
5614                 wr32(E1000_FTQF(3), ftqf);
5615         } else {
5616                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5617         }
5618         wrfl();
5619
5620         adapter->hwtstamp_config = config;
5621
5622         /* clear TX/RX time stamp registers, just to be sure */
5623         regval = rd32(E1000_TXSTMPH);
5624         regval = rd32(E1000_RXSTMPH);
5625
5626         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5627                 -EFAULT : 0;
5628 }
5629
5630 /**
5631  * igb_ioctl -
5632  * @netdev:
5633  * @ifreq:
5634  * @cmd:
5635  **/
5636 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5637 {
5638         switch (cmd) {
5639         case SIOCGMIIPHY:
5640         case SIOCGMIIREG:
5641         case SIOCSMIIREG:
5642                 return igb_mii_ioctl(netdev, ifr, cmd);
5643         case SIOCSHWTSTAMP:
5644                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5645         default:
5646                 return -EOPNOTSUPP;
5647         }
5648 }
5649
5650 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5651 {
5652         struct igb_adapter *adapter = hw->back;
5653         u16 cap_offset;
5654
5655         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5656         if (!cap_offset)
5657                 return -E1000_ERR_CONFIG;
5658
5659         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5660
5661         return 0;
5662 }
5663
5664 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5665 {
5666         struct igb_adapter *adapter = hw->back;
5667         u16 cap_offset;
5668
5669         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5670         if (!cap_offset)
5671                 return -E1000_ERR_CONFIG;
5672
5673         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5674
5675         return 0;
5676 }
5677
5678 static void igb_vlan_rx_register(struct net_device *netdev,
5679                                  struct vlan_group *grp)
5680 {
5681         struct igb_adapter *adapter = netdev_priv(netdev);
5682         struct e1000_hw *hw = &adapter->hw;
5683         u32 ctrl, rctl;
5684
5685         igb_irq_disable(adapter);
5686         adapter->vlgrp = grp;
5687
5688         if (grp) {
5689                 /* enable VLAN tag insert/strip */
5690                 ctrl = rd32(E1000_CTRL);
5691                 ctrl |= E1000_CTRL_VME;
5692                 wr32(E1000_CTRL, ctrl);
5693
5694                 /* Disable CFI check */
5695                 rctl = rd32(E1000_RCTL);
5696                 rctl &= ~E1000_RCTL_CFIEN;
5697                 wr32(E1000_RCTL, rctl);
5698         } else {
5699                 /* disable VLAN tag insert/strip */
5700                 ctrl = rd32(E1000_CTRL);
5701                 ctrl &= ~E1000_CTRL_VME;
5702                 wr32(E1000_CTRL, ctrl);
5703         }
5704
5705         igb_rlpml_set(adapter);
5706
5707         if (!test_bit(__IGB_DOWN, &adapter->state))
5708                 igb_irq_enable(adapter);
5709 }
5710
5711 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5712 {
5713         struct igb_adapter *adapter = netdev_priv(netdev);
5714         struct e1000_hw *hw = &adapter->hw;
5715         int pf_id = adapter->vfs_allocated_count;
5716
5717         /* attempt to add filter to vlvf array */
5718         igb_vlvf_set(adapter, vid, true, pf_id);
5719
5720         /* add the filter since PF can receive vlans w/o entry in vlvf */
5721         igb_vfta_set(hw, vid, true);
5722 }
5723
5724 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5725 {
5726         struct igb_adapter *adapter = netdev_priv(netdev);
5727         struct e1000_hw *hw = &adapter->hw;
5728         int pf_id = adapter->vfs_allocated_count;
5729         s32 err;
5730
5731         igb_irq_disable(adapter);
5732         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5733
5734         if (!test_bit(__IGB_DOWN, &adapter->state))
5735                 igb_irq_enable(adapter);
5736
5737         /* remove vlan from VLVF table array */
5738         err = igb_vlvf_set(adapter, vid, false, pf_id);
5739
5740         /* if vid was not present in VLVF just remove it from table */
5741         if (err)
5742                 igb_vfta_set(hw, vid, false);
5743 }
5744
5745 static void igb_restore_vlan(struct igb_adapter *adapter)
5746 {
5747         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5748
5749         if (adapter->vlgrp) {
5750                 u16 vid;
5751                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5752                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5753                                 continue;
5754                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5755                 }
5756         }
5757 }
5758
5759 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5760 {
5761         struct pci_dev *pdev = adapter->pdev;
5762         struct e1000_mac_info *mac = &adapter->hw.mac;
5763
5764         mac->autoneg = 0;
5765
5766         switch (spddplx) {
5767         case SPEED_10 + DUPLEX_HALF:
5768                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5769                 break;
5770         case SPEED_10 + DUPLEX_FULL:
5771                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5772                 break;
5773         case SPEED_100 + DUPLEX_HALF:
5774                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5775                 break;
5776         case SPEED_100 + DUPLEX_FULL:
5777                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5778                 break;
5779         case SPEED_1000 + DUPLEX_FULL:
5780                 mac->autoneg = 1;
5781                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5782                 break;
5783         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5784         default:
5785                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5786                 return -EINVAL;
5787         }
5788         return 0;
5789 }
5790
5791 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5792 {
5793         struct net_device *netdev = pci_get_drvdata(pdev);
5794         struct igb_adapter *adapter = netdev_priv(netdev);
5795         struct e1000_hw *hw = &adapter->hw;
5796         u32 ctrl, rctl, status;
5797         u32 wufc = adapter->wol;
5798 #ifdef CONFIG_PM
5799         int retval = 0;
5800 #endif
5801
5802         netif_device_detach(netdev);
5803
5804         if (netif_running(netdev))
5805                 igb_close(netdev);
5806
5807         igb_clear_interrupt_scheme(adapter);
5808
5809 #ifdef CONFIG_PM
5810         retval = pci_save_state(pdev);
5811         if (retval)
5812                 return retval;
5813 #endif
5814
5815         status = rd32(E1000_STATUS);
5816         if (status & E1000_STATUS_LU)
5817                 wufc &= ~E1000_WUFC_LNKC;
5818
5819         if (wufc) {
5820                 igb_setup_rctl(adapter);
5821                 igb_set_rx_mode(netdev);
5822
5823                 /* turn on all-multi mode if wake on multicast is enabled */
5824                 if (wufc & E1000_WUFC_MC) {
5825                         rctl = rd32(E1000_RCTL);
5826                         rctl |= E1000_RCTL_MPE;
5827                         wr32(E1000_RCTL, rctl);
5828                 }
5829
5830                 ctrl = rd32(E1000_CTRL);
5831                 /* advertise wake from D3Cold */
5832                 #define E1000_CTRL_ADVD3WUC 0x00100000
5833                 /* phy power management enable */
5834                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5835                 ctrl |= E1000_CTRL_ADVD3WUC;
5836                 wr32(E1000_CTRL, ctrl);
5837
5838                 /* Allow time for pending master requests to run */
5839                 igb_disable_pcie_master(hw);
5840
5841                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5842                 wr32(E1000_WUFC, wufc);
5843         } else {
5844                 wr32(E1000_WUC, 0);
5845                 wr32(E1000_WUFC, 0);
5846         }
5847
5848         *enable_wake = wufc || adapter->en_mng_pt;
5849         if (!*enable_wake)
5850                 igb_power_down_link(adapter);
5851         else
5852                 igb_power_up_link(adapter);
5853
5854         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5855          * would have already happened in close and is redundant. */
5856         igb_release_hw_control(adapter);
5857
5858         pci_disable_device(pdev);
5859
5860         return 0;
5861 }
5862
5863 #ifdef CONFIG_PM
5864 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5865 {
5866         int retval;
5867         bool wake;
5868
5869         retval = __igb_shutdown(pdev, &wake);
5870         if (retval)
5871                 return retval;
5872
5873         if (wake) {
5874                 pci_prepare_to_sleep(pdev);
5875         } else {
5876                 pci_wake_from_d3(pdev, false);
5877                 pci_set_power_state(pdev, PCI_D3hot);
5878         }
5879
5880         return 0;
5881 }
5882
5883 static int igb_resume(struct pci_dev *pdev)
5884 {
5885         struct net_device *netdev = pci_get_drvdata(pdev);
5886         struct igb_adapter *adapter = netdev_priv(netdev);
5887         struct e1000_hw *hw = &adapter->hw;
5888         u32 err;
5889
5890         pci_set_power_state(pdev, PCI_D0);
5891         pci_restore_state(pdev);
5892         pci_save_state(pdev);
5893
5894         err = pci_enable_device_mem(pdev);
5895         if (err) {
5896                 dev_err(&pdev->dev,
5897                         "igb: Cannot enable PCI device from suspend\n");
5898                 return err;
5899         }
5900         pci_set_master(pdev);
5901
5902         pci_enable_wake(pdev, PCI_D3hot, 0);
5903         pci_enable_wake(pdev, PCI_D3cold, 0);
5904
5905         if (igb_init_interrupt_scheme(adapter)) {
5906                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5907                 return -ENOMEM;
5908         }
5909
5910         igb_reset(adapter);
5911
5912         /* let the f/w know that the h/w is now under the control of the
5913          * driver. */
5914         igb_get_hw_control(adapter);
5915
5916         wr32(E1000_WUS, ~0);
5917
5918         if (netif_running(netdev)) {
5919                 err = igb_open(netdev);
5920                 if (err)
5921                         return err;
5922         }
5923
5924         netif_device_attach(netdev);
5925
5926         return 0;
5927 }
5928 #endif
5929
5930 static void igb_shutdown(struct pci_dev *pdev)
5931 {
5932         bool wake;
5933
5934         __igb_shutdown(pdev, &wake);
5935
5936         if (system_state == SYSTEM_POWER_OFF) {
5937                 pci_wake_from_d3(pdev, wake);
5938                 pci_set_power_state(pdev, PCI_D3hot);
5939         }
5940 }
5941
5942 #ifdef CONFIG_NET_POLL_CONTROLLER
5943 /*
5944  * Polling 'interrupt' - used by things like netconsole to send skbs
5945  * without having to re-enable interrupts. It's not called while
5946  * the interrupt routine is executing.
5947  */
5948 static void igb_netpoll(struct net_device *netdev)
5949 {
5950         struct igb_adapter *adapter = netdev_priv(netdev);
5951         struct e1000_hw *hw = &adapter->hw;
5952         int i;
5953
5954         if (!adapter->msix_entries) {
5955                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5956                 igb_irq_disable(adapter);
5957                 napi_schedule(&q_vector->napi);
5958                 return;
5959         }
5960
5961         for (i = 0; i < adapter->num_q_vectors; i++) {
5962                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5963                 wr32(E1000_EIMC, q_vector->eims_value);
5964                 napi_schedule(&q_vector->napi);
5965         }
5966 }
5967 #endif /* CONFIG_NET_POLL_CONTROLLER */
5968
5969 /**
5970  * igb_io_error_detected - called when PCI error is detected
5971  * @pdev: Pointer to PCI device
5972  * @state: The current pci connection state
5973  *
5974  * This function is called after a PCI bus error affecting
5975  * this device has been detected.
5976  */
5977 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5978                                               pci_channel_state_t state)
5979 {
5980         struct net_device *netdev = pci_get_drvdata(pdev);
5981         struct igb_adapter *adapter = netdev_priv(netdev);
5982
5983         netif_device_detach(netdev);
5984
5985         if (state == pci_channel_io_perm_failure)
5986                 return PCI_ERS_RESULT_DISCONNECT;
5987
5988         if (netif_running(netdev))
5989                 igb_down(adapter);
5990         pci_disable_device(pdev);
5991
5992         /* Request a slot slot reset. */
5993         return PCI_ERS_RESULT_NEED_RESET;
5994 }
5995
5996 /**
5997  * igb_io_slot_reset - called after the pci bus has been reset.
5998  * @pdev: Pointer to PCI device
5999  *
6000  * Restart the card from scratch, as if from a cold-boot. Implementation
6001  * resembles the first-half of the igb_resume routine.
6002  */
6003 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6004 {
6005         struct net_device *netdev = pci_get_drvdata(pdev);
6006         struct igb_adapter *adapter = netdev_priv(netdev);
6007         struct e1000_hw *hw = &adapter->hw;
6008         pci_ers_result_t result;
6009         int err;
6010
6011         if (pci_enable_device_mem(pdev)) {
6012                 dev_err(&pdev->dev,
6013                         "Cannot re-enable PCI device after reset.\n");
6014                 result = PCI_ERS_RESULT_DISCONNECT;
6015         } else {
6016                 pci_set_master(pdev);
6017                 pci_restore_state(pdev);
6018                 pci_save_state(pdev);
6019
6020                 pci_enable_wake(pdev, PCI_D3hot, 0);
6021                 pci_enable_wake(pdev, PCI_D3cold, 0);
6022
6023                 igb_reset(adapter);
6024                 wr32(E1000_WUS, ~0);
6025                 result = PCI_ERS_RESULT_RECOVERED;
6026         }
6027
6028         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6029         if (err) {
6030                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6031                         "failed 0x%0x\n", err);
6032                 /* non-fatal, continue */
6033         }
6034
6035         return result;
6036 }
6037
6038 /**
6039  * igb_io_resume - called when traffic can start flowing again.
6040  * @pdev: Pointer to PCI device
6041  *
6042  * This callback is called when the error recovery driver tells us that
6043  * its OK to resume normal operation. Implementation resembles the
6044  * second-half of the igb_resume routine.
6045  */
6046 static void igb_io_resume(struct pci_dev *pdev)
6047 {
6048         struct net_device *netdev = pci_get_drvdata(pdev);
6049         struct igb_adapter *adapter = netdev_priv(netdev);
6050
6051         if (netif_running(netdev)) {
6052                 if (igb_up(adapter)) {
6053                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6054                         return;
6055                 }
6056         }
6057
6058         netif_device_attach(netdev);
6059
6060         /* let the f/w know that the h/w is now under the control of the
6061          * driver. */
6062         igb_get_hw_control(adapter);
6063 }
6064
6065 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6066                              u8 qsel)
6067 {
6068         u32 rar_low, rar_high;
6069         struct e1000_hw *hw = &adapter->hw;
6070
6071         /* HW expects these in little endian so we reverse the byte order
6072          * from network order (big endian) to little endian
6073          */
6074         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6075                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6076         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6077
6078         /* Indicate to hardware the Address is Valid. */
6079         rar_high |= E1000_RAH_AV;
6080
6081         if (hw->mac.type == e1000_82575)
6082                 rar_high |= E1000_RAH_POOL_1 * qsel;
6083         else
6084                 rar_high |= E1000_RAH_POOL_1 << qsel;
6085
6086         wr32(E1000_RAL(index), rar_low);
6087         wrfl();
6088         wr32(E1000_RAH(index), rar_high);
6089         wrfl();
6090 }
6091
6092 static int igb_set_vf_mac(struct igb_adapter *adapter,
6093                           int vf, unsigned char *mac_addr)
6094 {
6095         struct e1000_hw *hw = &adapter->hw;
6096         /* VF MAC addresses start at end of receive addresses and moves
6097          * torwards the first, as a result a collision should not be possible */
6098         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6099
6100         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6101
6102         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6103
6104         return 0;
6105 }
6106
6107 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6108 {
6109         struct igb_adapter *adapter = netdev_priv(netdev);
6110         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6111                 return -EINVAL;
6112         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6113         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6114         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6115                                       " change effective.");
6116         if (test_bit(__IGB_DOWN, &adapter->state)) {
6117                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6118                          " but the PF device is not up.\n");
6119                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6120                          " attempting to use the VF device.\n");
6121         }
6122         return igb_set_vf_mac(adapter, vf, mac);
6123 }
6124
6125 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6126 {
6127         return -EOPNOTSUPP;
6128 }
6129
6130 static int igb_ndo_get_vf_config(struct net_device *netdev,
6131                                  int vf, struct ifla_vf_info *ivi)
6132 {
6133         struct igb_adapter *adapter = netdev_priv(netdev);
6134         if (vf >= adapter->vfs_allocated_count)
6135                 return -EINVAL;
6136         ivi->vf = vf;
6137         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6138         ivi->tx_rate = 0;
6139         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6140         ivi->qos = adapter->vf_data[vf].pf_qos;
6141         return 0;
6142 }
6143
6144 static void igb_vmm_control(struct igb_adapter *adapter)
6145 {
6146         struct e1000_hw *hw = &adapter->hw;
6147         u32 reg;
6148
6149         /* replication is not supported for 82575 */
6150         if (hw->mac.type == e1000_82575)
6151                 return;
6152
6153         /* enable replication vlan tag stripping */
6154         reg = rd32(E1000_RPLOLR);
6155         reg |= E1000_RPLOLR_STRVLAN;
6156         wr32(E1000_RPLOLR, reg);
6157
6158         /* notify HW that the MAC is adding vlan tags */
6159         reg = rd32(E1000_DTXCTL);
6160         reg |= E1000_DTXCTL_VLAN_ADDED;
6161         wr32(E1000_DTXCTL, reg);
6162
6163         if (adapter->vfs_allocated_count) {
6164                 igb_vmdq_set_loopback_pf(hw, true);
6165                 igb_vmdq_set_replication_pf(hw, true);
6166         } else {
6167                 igb_vmdq_set_loopback_pf(hw, false);
6168                 igb_vmdq_set_replication_pf(hw, false);
6169         }
6170 }
6171
6172 /* igb_main.c */