]> Pileus Git - ~andy/linux/blob - drivers/net/igb/igb_main.c
e1000e: increment the driver version
[~andy/linux] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87         /* required last entry */
88         {0, }
89 };
90
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
92
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
100 static void __devexit igb_remove(struct pci_dev *pdev);
101 static int igb_sw_init(struct igb_adapter *);
102 static int igb_open(struct net_device *);
103 static int igb_close(struct net_device *);
104 static void igb_configure_tx(struct igb_adapter *);
105 static void igb_configure_rx(struct igb_adapter *);
106 static void igb_clean_all_tx_rings(struct igb_adapter *);
107 static void igb_clean_all_rx_rings(struct igb_adapter *);
108 static void igb_clean_tx_ring(struct igb_ring *);
109 static void igb_clean_rx_ring(struct igb_ring *);
110 static void igb_set_rx_mode(struct net_device *);
111 static void igb_update_phy_info(unsigned long);
112 static void igb_watchdog(unsigned long);
113 static void igb_watchdog_task(struct work_struct *);
114 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
115 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
116                                                  struct rtnl_link_stats64 *stats);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146                                int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149                                  struct ifla_vf_info *ivi);
150
151 #ifdef CONFIG_PM
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
154 #endif
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159         .notifier_call  = igb_notify_dca,
160         .next           = NULL,
161         .priority       = 0
162 };
163 #endif
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
167 #endif
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172                  "per physical function");
173 #endif /* CONFIG_PCI_IOV */
174
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176                      pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
179
180 static struct pci_error_handlers igb_err_handler = {
181         .error_detected = igb_io_error_detected,
182         .slot_reset = igb_io_slot_reset,
183         .resume = igb_io_resume,
184 };
185
186
187 static struct pci_driver igb_driver = {
188         .name     = igb_driver_name,
189         .id_table = igb_pci_tbl,
190         .probe    = igb_probe,
191         .remove   = __devexit_p(igb_remove),
192 #ifdef CONFIG_PM
193         /* Power Managment Hooks */
194         .suspend  = igb_suspend,
195         .resume   = igb_resume,
196 #endif
197         .shutdown = igb_shutdown,
198         .err_handler = &igb_err_handler
199 };
200
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
205
206 struct igb_reg_info {
207         u32 ofs;
208         char *name;
209 };
210
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
212
213         /* General Registers */
214         {E1000_CTRL, "CTRL"},
215         {E1000_STATUS, "STATUS"},
216         {E1000_CTRL_EXT, "CTRL_EXT"},
217
218         /* Interrupt Registers */
219         {E1000_ICR, "ICR"},
220
221         /* RX Registers */
222         {E1000_RCTL, "RCTL"},
223         {E1000_RDLEN(0), "RDLEN"},
224         {E1000_RDH(0), "RDH"},
225         {E1000_RDT(0), "RDT"},
226         {E1000_RXDCTL(0), "RXDCTL"},
227         {E1000_RDBAL(0), "RDBAL"},
228         {E1000_RDBAH(0), "RDBAH"},
229
230         /* TX Registers */
231         {E1000_TCTL, "TCTL"},
232         {E1000_TDBAL(0), "TDBAL"},
233         {E1000_TDBAH(0), "TDBAH"},
234         {E1000_TDLEN(0), "TDLEN"},
235         {E1000_TDH(0), "TDH"},
236         {E1000_TDT(0), "TDT"},
237         {E1000_TXDCTL(0), "TXDCTL"},
238         {E1000_TDFH, "TDFH"},
239         {E1000_TDFT, "TDFT"},
240         {E1000_TDFHS, "TDFHS"},
241         {E1000_TDFPC, "TDFPC"},
242
243         /* List Terminator */
244         {}
245 };
246
247 /*
248  * igb_regdump - register printout routine
249  */
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
251 {
252         int n = 0;
253         char rname[16];
254         u32 regs[8];
255
256         switch (reginfo->ofs) {
257         case E1000_RDLEN(0):
258                 for (n = 0; n < 4; n++)
259                         regs[n] = rd32(E1000_RDLEN(n));
260                 break;
261         case E1000_RDH(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDH(n));
264                 break;
265         case E1000_RDT(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDT(n));
268                 break;
269         case E1000_RXDCTL(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RXDCTL(n));
272                 break;
273         case E1000_RDBAL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDBAL(n));
276                 break;
277         case E1000_RDBAH(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAH(n));
280                 break;
281         case E1000_TDBAL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAL(n));
284                 break;
285         case E1000_TDBAH(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_TDBAH(n));
288                 break;
289         case E1000_TDLEN(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDLEN(n));
292                 break;
293         case E1000_TDH(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDH(n));
296                 break;
297         case E1000_TDT(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDT(n));
300                 break;
301         case E1000_TXDCTL(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TXDCTL(n));
304                 break;
305         default:
306                 printk(KERN_INFO "%-15s %08x\n",
307                         reginfo->name, rd32(reginfo->ofs));
308                 return;
309         }
310
311         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312         printk(KERN_INFO "%-15s ", rname);
313         for (n = 0; n < 4; n++)
314                 printk(KERN_CONT "%08x ", regs[n]);
315         printk(KERN_CONT "\n");
316 }
317
318 /*
319  * igb_dump - Print registers, tx-rings and rx-rings
320  */
321 static void igb_dump(struct igb_adapter *adapter)
322 {
323         struct net_device *netdev = adapter->netdev;
324         struct e1000_hw *hw = &adapter->hw;
325         struct igb_reg_info *reginfo;
326         int n = 0;
327         struct igb_ring *tx_ring;
328         union e1000_adv_tx_desc *tx_desc;
329         struct my_u0 { u64 a; u64 b; } *u0;
330         struct igb_buffer *buffer_info;
331         struct igb_ring *rx_ring;
332         union e1000_adv_rx_desc *rx_desc;
333         u32 staterr;
334         int i = 0;
335
336         if (!netif_msg_hw(adapter))
337                 return;
338
339         /* Print netdevice Info */
340         if (netdev) {
341                 dev_info(&adapter->pdev->dev, "Net device Info\n");
342                 printk(KERN_INFO "Device Name     state            "
343                         "trans_start      last_rx\n");
344                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
345                 netdev->name,
346                 netdev->state,
347                 netdev->trans_start,
348                 netdev->last_rx);
349         }
350
351         /* Print Registers */
352         dev_info(&adapter->pdev->dev, "Register Dump\n");
353         printk(KERN_INFO " Register Name   Value\n");
354         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355              reginfo->name; reginfo++) {
356                 igb_regdump(hw, reginfo);
357         }
358
359         /* Print TX Ring Summary */
360         if (!netdev || !netif_running(netdev))
361                 goto exit;
362
363         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
365                 " leng ntw timestamp\n");
366         for (n = 0; n < adapter->num_tx_queues; n++) {
367                 tx_ring = adapter->tx_ring[n];
368                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
371                            (u64)buffer_info->dma,
372                            buffer_info->length,
373                            buffer_info->next_to_watch,
374                            (u64)buffer_info->time_stamp);
375         }
376
377         /* Print TX Rings */
378         if (!netif_msg_tx_done(adapter))
379                 goto rx_ring_summary;
380
381         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
382
383         /* Transmit Descriptor Formats
384          *
385          * Advanced Transmit Descriptor
386          *   +--------------------------------------------------------------+
387          * 0 |         Buffer Address [63:0]                                |
388          *   +--------------------------------------------------------------+
389          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
390          *   +--------------------------------------------------------------+
391          *   63      46 45    40 39 38 36 35 32 31   24             15       0
392          */
393
394         for (n = 0; n < adapter->num_tx_queues; n++) {
395                 tx_ring = adapter->tx_ring[n];
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
400                         "[PlPOCIStDDM Ln] [bi->dma       ] "
401                         "leng  ntw timestamp        bi->skb\n");
402
403                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405                         buffer_info = &tx_ring->buffer_info[i];
406                         u0 = (struct my_u0 *)tx_desc;
407                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
408                                 " %04X  %3X %016llX %p", i,
409                                 le64_to_cpu(u0->a),
410                                 le64_to_cpu(u0->b),
411                                 (u64)buffer_info->dma,
412                                 buffer_info->length,
413                                 buffer_info->next_to_watch,
414                                 (u64)buffer_info->time_stamp,
415                                 buffer_info->skb);
416                         if (i == tx_ring->next_to_use &&
417                                 i == tx_ring->next_to_clean)
418                                 printk(KERN_CONT " NTC/U\n");
419                         else if (i == tx_ring->next_to_use)
420                                 printk(KERN_CONT " NTU\n");
421                         else if (i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC\n");
423                         else
424                                 printk(KERN_CONT "\n");
425
426                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427                                 print_hex_dump(KERN_INFO, "",
428                                         DUMP_PREFIX_ADDRESS,
429                                         16, 1, phys_to_virt(buffer_info->dma),
430                                         buffer_info->length, true);
431                 }
432         }
433
434         /* Print RX Rings Summary */
435 rx_ring_summary:
436         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437         printk(KERN_INFO "Queue [NTU] [NTC]\n");
438         for (n = 0; n < adapter->num_rx_queues; n++) {
439                 rx_ring = adapter->rx_ring[n];
440                 printk(KERN_INFO " %5d %5X %5X\n", n,
441                            rx_ring->next_to_use, rx_ring->next_to_clean);
442         }
443
444         /* Print RX Rings */
445         if (!netif_msg_rx_status(adapter))
446                 goto exit;
447
448         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
449
450         /* Advanced Receive Descriptor (Read) Format
451          *    63                                           1        0
452          *    +-----------------------------------------------------+
453          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
454          *    +----------------------------------------------+------+
455          *  8 |       Header Buffer Address [63:1]           |  DD  |
456          *    +-----------------------------------------------------+
457          *
458          *
459          * Advanced Receive Descriptor (Write-Back) Format
460          *
461          *   63       48 47    32 31  30      21 20 17 16   4 3     0
462          *   +------------------------------------------------------+
463          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
464          *   | Checksum   Ident  |   |           |    | Type | Type |
465          *   +------------------------------------------------------+
466          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467          *   +------------------------------------------------------+
468          *   63       48 47    32 31            20 19               0
469          */
470
471         for (n = 0; n < adapter->num_rx_queues; n++) {
472                 rx_ring = adapter->rx_ring[n];
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
477                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
478                         "<-- Adv Rx Read format\n");
479                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
480                         "[vl er S cks ln] ---------------- [bi->skb] "
481                         "<-- Adv Rx Write-Back format\n");
482
483                 for (i = 0; i < rx_ring->count; i++) {
484                         buffer_info = &rx_ring->buffer_info[i];
485                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486                         u0 = (struct my_u0 *)rx_desc;
487                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488                         if (staterr & E1000_RXD_STAT_DD) {
489                                 /* Descriptor Done */
490                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
491                                         "%016llX ---------------- %p", i,
492                                         le64_to_cpu(u0->a),
493                                         le64_to_cpu(u0->b),
494                                         buffer_info->skb);
495                         } else {
496                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
497                                         "%016llX %016llX %p", i,
498                                         le64_to_cpu(u0->a),
499                                         le64_to_cpu(u0->b),
500                                         (u64)buffer_info->dma,
501                                         buffer_info->skb);
502
503                                 if (netif_msg_pktdata(adapter)) {
504                                         print_hex_dump(KERN_INFO, "",
505                                                 DUMP_PREFIX_ADDRESS,
506                                                 16, 1,
507                                                 phys_to_virt(buffer_info->dma),
508                                                 rx_ring->rx_buffer_len, true);
509                                         if (rx_ring->rx_buffer_len
510                                                 < IGB_RXBUFFER_1024)
511                                                 print_hex_dump(KERN_INFO, "",
512                                                   DUMP_PREFIX_ADDRESS,
513                                                   16, 1,
514                                                   phys_to_virt(
515                                                     buffer_info->page_dma +
516                                                     buffer_info->page_offset),
517                                                   PAGE_SIZE/2, true);
518                                 }
519                         }
520
521                         if (i == rx_ring->next_to_use)
522                                 printk(KERN_CONT " NTU\n");
523                         else if (i == rx_ring->next_to_clean)
524                                 printk(KERN_CONT " NTC\n");
525                         else
526                                 printk(KERN_CONT "\n");
527
528                 }
529         }
530
531 exit:
532         return;
533 }
534
535
536 /**
537  * igb_read_clock - read raw cycle counter (to be used by time counter)
538  */
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
540 {
541         struct igb_adapter *adapter =
542                 container_of(tc, struct igb_adapter, cycles);
543         struct e1000_hw *hw = &adapter->hw;
544         u64 stamp = 0;
545         int shift = 0;
546
547         /*
548          * The timestamp latches on lowest register read. For the 82580
549          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
550          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
551          */
552         if (hw->mac.type == e1000_82580) {
553                 stamp = rd32(E1000_SYSTIMR) >> 8;
554                 shift = IGB_82580_TSYNC_SHIFT;
555         }
556
557         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
559         return stamp;
560 }
561
562 /**
563  * igb_get_hw_dev - return device
564  * used by hardware layer to print debugging information
565  **/
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
567 {
568         struct igb_adapter *adapter = hw->back;
569         return adapter->netdev;
570 }
571
572 /**
573  * igb_init_module - Driver Registration Routine
574  *
575  * igb_init_module is the first routine called when the driver is
576  * loaded. All it does is register with the PCI subsystem.
577  **/
578 static int __init igb_init_module(void)
579 {
580         int ret;
581         printk(KERN_INFO "%s - version %s\n",
582                igb_driver_string, igb_driver_version);
583
584         printk(KERN_INFO "%s\n", igb_copyright);
585
586 #ifdef CONFIG_IGB_DCA
587         dca_register_notify(&dca_notifier);
588 #endif
589         ret = pci_register_driver(&igb_driver);
590         return ret;
591 }
592
593 module_init(igb_init_module);
594
595 /**
596  * igb_exit_module - Driver Exit Cleanup Routine
597  *
598  * igb_exit_module is called just before the driver is removed
599  * from memory.
600  **/
601 static void __exit igb_exit_module(void)
602 {
603 #ifdef CONFIG_IGB_DCA
604         dca_unregister_notify(&dca_notifier);
605 #endif
606         pci_unregister_driver(&igb_driver);
607 }
608
609 module_exit(igb_exit_module);
610
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
612 /**
613  * igb_cache_ring_register - Descriptor ring to register mapping
614  * @adapter: board private structure to initialize
615  *
616  * Once we know the feature-set enabled for the device, we'll cache
617  * the register offset the descriptor ring is assigned to.
618  **/
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
620 {
621         int i = 0, j = 0;
622         u32 rbase_offset = adapter->vfs_allocated_count;
623
624         switch (adapter->hw.mac.type) {
625         case e1000_82576:
626                 /* The queues are allocated for virtualization such that VF 0
627                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628                  * In order to avoid collision we start at the first free queue
629                  * and continue consuming queues in the same sequence
630                  */
631                 if (adapter->vfs_allocated_count) {
632                         for (; i < adapter->rss_queues; i++)
633                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
634                                                                Q_IDX_82576(i);
635                 }
636         case e1000_82575:
637         case e1000_82580:
638         case e1000_i350:
639         default:
640                 for (; i < adapter->num_rx_queues; i++)
641                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642                 for (; j < adapter->num_tx_queues; j++)
643                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
644                 break;
645         }
646 }
647
648 static void igb_free_queues(struct igb_adapter *adapter)
649 {
650         int i;
651
652         for (i = 0; i < adapter->num_tx_queues; i++) {
653                 kfree(adapter->tx_ring[i]);
654                 adapter->tx_ring[i] = NULL;
655         }
656         for (i = 0; i < adapter->num_rx_queues; i++) {
657                 kfree(adapter->rx_ring[i]);
658                 adapter->rx_ring[i] = NULL;
659         }
660         adapter->num_rx_queues = 0;
661         adapter->num_tx_queues = 0;
662 }
663
664 /**
665  * igb_alloc_queues - Allocate memory for all rings
666  * @adapter: board private structure to initialize
667  *
668  * We allocate one ring per queue at run-time since we don't know the
669  * number of queues at compile-time.
670  **/
671 static int igb_alloc_queues(struct igb_adapter *adapter)
672 {
673         struct igb_ring *ring;
674         int i;
675
676         for (i = 0; i < adapter->num_tx_queues; i++) {
677                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
678                 if (!ring)
679                         goto err;
680                 ring->count = adapter->tx_ring_count;
681                 ring->queue_index = i;
682                 ring->dev = &adapter->pdev->dev;
683                 ring->netdev = adapter->netdev;
684                 /* For 82575, context index must be unique per ring. */
685                 if (adapter->hw.mac.type == e1000_82575)
686                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687                 adapter->tx_ring[i] = ring;
688         }
689
690         for (i = 0; i < adapter->num_rx_queues; i++) {
691                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
692                 if (!ring)
693                         goto err;
694                 ring->count = adapter->rx_ring_count;
695                 ring->queue_index = i;
696                 ring->dev = &adapter->pdev->dev;
697                 ring->netdev = adapter->netdev;
698                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700                 /* set flag indicating ring supports SCTP checksum offload */
701                 if (adapter->hw.mac.type >= e1000_82576)
702                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703                 adapter->rx_ring[i] = ring;
704         }
705
706         igb_cache_ring_register(adapter);
707
708         return 0;
709
710 err:
711         igb_free_queues(adapter);
712
713         return -ENOMEM;
714 }
715
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
718 {
719         u32 msixbm = 0;
720         struct igb_adapter *adapter = q_vector->adapter;
721         struct e1000_hw *hw = &adapter->hw;
722         u32 ivar, index;
723         int rx_queue = IGB_N0_QUEUE;
724         int tx_queue = IGB_N0_QUEUE;
725
726         if (q_vector->rx_ring)
727                 rx_queue = q_vector->rx_ring->reg_idx;
728         if (q_vector->tx_ring)
729                 tx_queue = q_vector->tx_ring->reg_idx;
730
731         switch (hw->mac.type) {
732         case e1000_82575:
733                 /* The 82575 assigns vectors using a bitmask, which matches the
734                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
735                    or more queues to a vector, we write the appropriate bits
736                    into the MSIXBM register for that vector. */
737                 if (rx_queue > IGB_N0_QUEUE)
738                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739                 if (tx_queue > IGB_N0_QUEUE)
740                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741                 if (!adapter->msix_entries && msix_vector == 0)
742                         msixbm |= E1000_EIMS_OTHER;
743                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744                 q_vector->eims_value = msixbm;
745                 break;
746         case e1000_82576:
747                 /* 82576 uses a table-based method for assigning vectors.
748                    Each queue has a single entry in the table to which we write
749                    a vector number along with a "valid" bit.  Sadly, the layout
750                    of the table is somewhat counterintuitive. */
751                 if (rx_queue > IGB_N0_QUEUE) {
752                         index = (rx_queue & 0x7);
753                         ivar = array_rd32(E1000_IVAR0, index);
754                         if (rx_queue < 8) {
755                                 /* vector goes into low byte of register */
756                                 ivar = ivar & 0xFFFFFF00;
757                                 ivar |= msix_vector | E1000_IVAR_VALID;
758                         } else {
759                                 /* vector goes into third byte of register */
760                                 ivar = ivar & 0xFF00FFFF;
761                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
762                         }
763                         array_wr32(E1000_IVAR0, index, ivar);
764                 }
765                 if (tx_queue > IGB_N0_QUEUE) {
766                         index = (tx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (tx_queue < 8) {
769                                 /* vector goes into second byte of register */
770                                 ivar = ivar & 0xFFFF00FF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
772                         } else {
773                                 /* vector goes into high byte of register */
774                                 ivar = ivar & 0x00FFFFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 q_vector->eims_value = 1 << msix_vector;
780                 break;
781         case e1000_82580:
782         case e1000_i350:
783                 /* 82580 uses the same table-based approach as 82576 but has fewer
784                    entries as a result we carry over for queues greater than 4. */
785                 if (rx_queue > IGB_N0_QUEUE) {
786                         index = (rx_queue >> 1);
787                         ivar = array_rd32(E1000_IVAR0, index);
788                         if (rx_queue & 0x1) {
789                                 /* vector goes into third byte of register */
790                                 ivar = ivar & 0xFF00FFFF;
791                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
792                         } else {
793                                 /* vector goes into low byte of register */
794                                 ivar = ivar & 0xFFFFFF00;
795                                 ivar |= msix_vector | E1000_IVAR_VALID;
796                         }
797                         array_wr32(E1000_IVAR0, index, ivar);
798                 }
799                 if (tx_queue > IGB_N0_QUEUE) {
800                         index = (tx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (tx_queue & 0x1) {
803                                 /* vector goes into high byte of register */
804                                 ivar = ivar & 0x00FFFFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
806                         } else {
807                                 /* vector goes into second byte of register */
808                                 ivar = ivar & 0xFFFF00FF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 q_vector->eims_value = 1 << msix_vector;
814                 break;
815         default:
816                 BUG();
817                 break;
818         }
819
820         /* add q_vector eims value to global eims_enable_mask */
821         adapter->eims_enable_mask |= q_vector->eims_value;
822
823         /* configure q_vector to set itr on first interrupt */
824         q_vector->set_itr = 1;
825 }
826
827 /**
828  * igb_configure_msix - Configure MSI-X hardware
829  *
830  * igb_configure_msix sets up the hardware to properly
831  * generate MSI-X interrupts.
832  **/
833 static void igb_configure_msix(struct igb_adapter *adapter)
834 {
835         u32 tmp;
836         int i, vector = 0;
837         struct e1000_hw *hw = &adapter->hw;
838
839         adapter->eims_enable_mask = 0;
840
841         /* set vector for other causes, i.e. link changes */
842         switch (hw->mac.type) {
843         case e1000_82575:
844                 tmp = rd32(E1000_CTRL_EXT);
845                 /* enable MSI-X PBA support*/
846                 tmp |= E1000_CTRL_EXT_PBA_CLR;
847
848                 /* Auto-Mask interrupts upon ICR read. */
849                 tmp |= E1000_CTRL_EXT_EIAME;
850                 tmp |= E1000_CTRL_EXT_IRCA;
851
852                 wr32(E1000_CTRL_EXT, tmp);
853
854                 /* enable msix_other interrupt */
855                 array_wr32(E1000_MSIXBM(0), vector++,
856                                       E1000_EIMS_OTHER);
857                 adapter->eims_other = E1000_EIMS_OTHER;
858
859                 break;
860
861         case e1000_82576:
862         case e1000_82580:
863         case e1000_i350:
864                 /* Turn on MSI-X capability first, or our settings
865                  * won't stick.  And it will take days to debug. */
866                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
868                                 E1000_GPIE_NSICR);
869
870                 /* enable msix_other interrupt */
871                 adapter->eims_other = 1 << vector;
872                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
873
874                 wr32(E1000_IVAR_MISC, tmp);
875                 break;
876         default:
877                 /* do nothing, since nothing else supports MSI-X */
878                 break;
879         } /* switch (hw->mac.type) */
880
881         adapter->eims_enable_mask |= adapter->eims_other;
882
883         for (i = 0; i < adapter->num_q_vectors; i++)
884                 igb_assign_vector(adapter->q_vector[i], vector++);
885
886         wrfl();
887 }
888
889 /**
890  * igb_request_msix - Initialize MSI-X interrupts
891  *
892  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
893  * kernel.
894  **/
895 static int igb_request_msix(struct igb_adapter *adapter)
896 {
897         struct net_device *netdev = adapter->netdev;
898         struct e1000_hw *hw = &adapter->hw;
899         int i, err = 0, vector = 0;
900
901         err = request_irq(adapter->msix_entries[vector].vector,
902                           igb_msix_other, 0, netdev->name, adapter);
903         if (err)
904                 goto out;
905         vector++;
906
907         for (i = 0; i < adapter->num_q_vectors; i++) {
908                 struct igb_q_vector *q_vector = adapter->q_vector[i];
909
910                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
911
912                 if (q_vector->rx_ring && q_vector->tx_ring)
913                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914                                 q_vector->rx_ring->queue_index);
915                 else if (q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917                                 q_vector->tx_ring->queue_index);
918                 else if (q_vector->rx_ring)
919                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920                                 q_vector->rx_ring->queue_index);
921                 else
922                         sprintf(q_vector->name, "%s-unused", netdev->name);
923
924                 err = request_irq(adapter->msix_entries[vector].vector,
925                                   igb_msix_ring, 0, q_vector->name,
926                                   q_vector);
927                 if (err)
928                         goto out;
929                 vector++;
930         }
931
932         igb_configure_msix(adapter);
933         return 0;
934 out:
935         return err;
936 }
937
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 pci_disable_msix(adapter->pdev);
942                 kfree(adapter->msix_entries);
943                 adapter->msix_entries = NULL;
944         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 pci_disable_msi(adapter->pdev);
946         }
947 }
948
949 /**
950  * igb_free_q_vectors - Free memory allocated for interrupt vectors
951  * @adapter: board private structure to initialize
952  *
953  * This function frees the memory allocated to the q_vectors.  In addition if
954  * NAPI is enabled it will delete any references to the NAPI struct prior
955  * to freeing the q_vector.
956  **/
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
958 {
959         int v_idx;
960
961         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963                 adapter->q_vector[v_idx] = NULL;
964                 if (!q_vector)
965                         continue;
966                 netif_napi_del(&q_vector->napi);
967                 kfree(q_vector);
968         }
969         adapter->num_q_vectors = 0;
970 }
971
972 /**
973  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
974  *
975  * This function resets the device so that it has 0 rx queues, tx queues, and
976  * MSI-X interrupts allocated.
977  */
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
979 {
980         igb_free_queues(adapter);
981         igb_free_q_vectors(adapter);
982         igb_reset_interrupt_capability(adapter);
983 }
984
985 /**
986  * igb_set_interrupt_capability - set MSI or MSI-X if supported
987  *
988  * Attempt to configure interrupts using the best available
989  * capabilities of the hardware and kernel.
990  **/
991 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
992 {
993         int err;
994         int numvecs, i;
995
996         /* Number of supported queues. */
997         adapter->num_rx_queues = adapter->rss_queues;
998         if (adapter->vfs_allocated_count)
999                 adapter->num_tx_queues = 1;
1000         else
1001                 adapter->num_tx_queues = adapter->rss_queues;
1002
1003         /* start with one vector for every rx queue */
1004         numvecs = adapter->num_rx_queues;
1005
1006         /* if tx handler is separate add 1 for every tx queue */
1007         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008                 numvecs += adapter->num_tx_queues;
1009
1010         /* store the number of vectors reserved for queues */
1011         adapter->num_q_vectors = numvecs;
1012
1013         /* add 1 vector for link status interrupts */
1014         numvecs++;
1015         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1016                                         GFP_KERNEL);
1017         if (!adapter->msix_entries)
1018                 goto msi_only;
1019
1020         for (i = 0; i < numvecs; i++)
1021                 adapter->msix_entries[i].entry = i;
1022
1023         err = pci_enable_msix(adapter->pdev,
1024                               adapter->msix_entries,
1025                               numvecs);
1026         if (err == 0)
1027                 goto out;
1028
1029         igb_reset_interrupt_capability(adapter);
1030
1031         /* If we can't do MSI-X, try MSI */
1032 msi_only:
1033 #ifdef CONFIG_PCI_IOV
1034         /* disable SR-IOV for non MSI-X configurations */
1035         if (adapter->vf_data) {
1036                 struct e1000_hw *hw = &adapter->hw;
1037                 /* disable iov and allow time for transactions to clear */
1038                 pci_disable_sriov(adapter->pdev);
1039                 msleep(500);
1040
1041                 kfree(adapter->vf_data);
1042                 adapter->vf_data = NULL;
1043                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1044                 msleep(100);
1045                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1046         }
1047 #endif
1048         adapter->vfs_allocated_count = 0;
1049         adapter->rss_queues = 1;
1050         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051         adapter->num_rx_queues = 1;
1052         adapter->num_tx_queues = 1;
1053         adapter->num_q_vectors = 1;
1054         if (!pci_enable_msi(adapter->pdev))
1055                 adapter->flags |= IGB_FLAG_HAS_MSI;
1056 out:
1057         /* Notify the stack of the (possibly) reduced queue counts. */
1058         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1059         return netif_set_real_num_rx_queues(adapter->netdev,
1060                                             adapter->num_rx_queues);
1061 }
1062
1063 /**
1064  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1065  * @adapter: board private structure to initialize
1066  *
1067  * We allocate one q_vector per queue interrupt.  If allocation fails we
1068  * return -ENOMEM.
1069  **/
1070 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1071 {
1072         struct igb_q_vector *q_vector;
1073         struct e1000_hw *hw = &adapter->hw;
1074         int v_idx;
1075
1076         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1077                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1078                 if (!q_vector)
1079                         goto err_out;
1080                 q_vector->adapter = adapter;
1081                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1082                 q_vector->itr_val = IGB_START_ITR;
1083                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1084                 adapter->q_vector[v_idx] = q_vector;
1085         }
1086         return 0;
1087
1088 err_out:
1089         igb_free_q_vectors(adapter);
1090         return -ENOMEM;
1091 }
1092
1093 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1094                                       int ring_idx, int v_idx)
1095 {
1096         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1097
1098         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1099         q_vector->rx_ring->q_vector = q_vector;
1100         q_vector->itr_val = adapter->rx_itr_setting;
1101         if (q_vector->itr_val && q_vector->itr_val <= 3)
1102                 q_vector->itr_val = IGB_START_ITR;
1103 }
1104
1105 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1106                                       int ring_idx, int v_idx)
1107 {
1108         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1111         q_vector->tx_ring->q_vector = q_vector;
1112         q_vector->itr_val = adapter->tx_itr_setting;
1113         if (q_vector->itr_val && q_vector->itr_val <= 3)
1114                 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 /**
1118  * igb_map_ring_to_vector - maps allocated queues to vectors
1119  *
1120  * This function maps the recently allocated queues to vectors.
1121  **/
1122 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1123 {
1124         int i;
1125         int v_idx = 0;
1126
1127         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1128             (adapter->num_q_vectors < adapter->num_tx_queues))
1129                 return -ENOMEM;
1130
1131         if (adapter->num_q_vectors >=
1132             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1133                 for (i = 0; i < adapter->num_rx_queues; i++)
1134                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1135                 for (i = 0; i < adapter->num_tx_queues; i++)
1136                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1137         } else {
1138                 for (i = 0; i < adapter->num_rx_queues; i++) {
1139                         if (i < adapter->num_tx_queues)
1140                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1141                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1142                 }
1143                 for (; i < adapter->num_tx_queues; i++)
1144                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1151  *
1152  * This function initializes the interrupts and allocates all of the queues.
1153  **/
1154 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1155 {
1156         struct pci_dev *pdev = adapter->pdev;
1157         int err;
1158
1159         err = igb_set_interrupt_capability(adapter);
1160         if (err)
1161                 return err;
1162
1163         err = igb_alloc_q_vectors(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1166                 goto err_alloc_q_vectors;
1167         }
1168
1169         err = igb_alloc_queues(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1172                 goto err_alloc_queues;
1173         }
1174
1175         err = igb_map_ring_to_vector(adapter);
1176         if (err) {
1177                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1178                 goto err_map_queues;
1179         }
1180
1181
1182         return 0;
1183 err_map_queues:
1184         igb_free_queues(adapter);
1185 err_alloc_queues:
1186         igb_free_q_vectors(adapter);
1187 err_alloc_q_vectors:
1188         igb_reset_interrupt_capability(adapter);
1189         return err;
1190 }
1191
1192 /**
1193  * igb_request_irq - initialize interrupts
1194  *
1195  * Attempts to configure interrupts using the best available
1196  * capabilities of the hardware and kernel.
1197  **/
1198 static int igb_request_irq(struct igb_adapter *adapter)
1199 {
1200         struct net_device *netdev = adapter->netdev;
1201         struct pci_dev *pdev = adapter->pdev;
1202         int err = 0;
1203
1204         if (adapter->msix_entries) {
1205                 err = igb_request_msix(adapter);
1206                 if (!err)
1207                         goto request_done;
1208                 /* fall back to MSI */
1209                 igb_clear_interrupt_scheme(adapter);
1210                 if (!pci_enable_msi(adapter->pdev))
1211                         adapter->flags |= IGB_FLAG_HAS_MSI;
1212                 igb_free_all_tx_resources(adapter);
1213                 igb_free_all_rx_resources(adapter);
1214                 adapter->num_tx_queues = 1;
1215                 adapter->num_rx_queues = 1;
1216                 adapter->num_q_vectors = 1;
1217                 err = igb_alloc_q_vectors(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for vectors\n");
1221                         goto request_done;
1222                 }
1223                 err = igb_alloc_queues(adapter);
1224                 if (err) {
1225                         dev_err(&pdev->dev,
1226                                 "Unable to allocate memory for queues\n");
1227                         igb_free_q_vectors(adapter);
1228                         goto request_done;
1229                 }
1230                 igb_setup_all_tx_resources(adapter);
1231                 igb_setup_all_rx_resources(adapter);
1232         } else {
1233                 igb_assign_vector(adapter->q_vector[0], 0);
1234         }
1235
1236         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1237                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1238                                   netdev->name, adapter);
1239                 if (!err)
1240                         goto request_done;
1241
1242                 /* fall back to legacy interrupts */
1243                 igb_reset_interrupt_capability(adapter);
1244                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1245         }
1246
1247         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1248                           netdev->name, adapter);
1249
1250         if (err)
1251                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1252                         err);
1253
1254 request_done:
1255         return err;
1256 }
1257
1258 static void igb_free_irq(struct igb_adapter *adapter)
1259 {
1260         if (adapter->msix_entries) {
1261                 int vector = 0, i;
1262
1263                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1264
1265                 for (i = 0; i < adapter->num_q_vectors; i++) {
1266                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1267                         free_irq(adapter->msix_entries[vector++].vector,
1268                                  q_vector);
1269                 }
1270         } else {
1271                 free_irq(adapter->pdev->irq, adapter);
1272         }
1273 }
1274
1275 /**
1276  * igb_irq_disable - Mask off interrupt generation on the NIC
1277  * @adapter: board private structure
1278  **/
1279 static void igb_irq_disable(struct igb_adapter *adapter)
1280 {
1281         struct e1000_hw *hw = &adapter->hw;
1282
1283         /*
1284          * we need to be careful when disabling interrupts.  The VFs are also
1285          * mapped into these registers and so clearing the bits can cause
1286          * issues on the VF drivers so we only need to clear what we set
1287          */
1288         if (adapter->msix_entries) {
1289                 u32 regval = rd32(E1000_EIAM);
1290                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1291                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1292                 regval = rd32(E1000_EIAC);
1293                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1294         }
1295
1296         wr32(E1000_IAM, 0);
1297         wr32(E1000_IMC, ~0);
1298         wrfl();
1299         if (adapter->msix_entries) {
1300                 int i;
1301                 for (i = 0; i < adapter->num_q_vectors; i++)
1302                         synchronize_irq(adapter->msix_entries[i].vector);
1303         } else {
1304                 synchronize_irq(adapter->pdev->irq);
1305         }
1306 }
1307
1308 /**
1309  * igb_irq_enable - Enable default interrupt generation settings
1310  * @adapter: board private structure
1311  **/
1312 static void igb_irq_enable(struct igb_adapter *adapter)
1313 {
1314         struct e1000_hw *hw = &adapter->hw;
1315
1316         if (adapter->msix_entries) {
1317                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1318                 u32 regval = rd32(E1000_EIAC);
1319                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1320                 regval = rd32(E1000_EIAM);
1321                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1322                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1323                 if (adapter->vfs_allocated_count) {
1324                         wr32(E1000_MBVFIMR, 0xFF);
1325                         ims |= E1000_IMS_VMMB;
1326                 }
1327                 if (adapter->hw.mac.type == e1000_82580)
1328                         ims |= E1000_IMS_DRSTA;
1329
1330                 wr32(E1000_IMS, ims);
1331         } else {
1332                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1333                                 E1000_IMS_DRSTA);
1334                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1335                                 E1000_IMS_DRSTA);
1336         }
1337 }
1338
1339 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1340 {
1341         struct e1000_hw *hw = &adapter->hw;
1342         u16 vid = adapter->hw.mng_cookie.vlan_id;
1343         u16 old_vid = adapter->mng_vlan_id;
1344
1345         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1346                 /* add VID to filter table */
1347                 igb_vfta_set(hw, vid, true);
1348                 adapter->mng_vlan_id = vid;
1349         } else {
1350                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1351         }
1352
1353         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1354             (vid != old_vid) &&
1355             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1356                 /* remove VID from filter table */
1357                 igb_vfta_set(hw, old_vid, false);
1358         }
1359 }
1360
1361 /**
1362  * igb_release_hw_control - release control of the h/w to f/w
1363  * @adapter: address of board private structure
1364  *
1365  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1366  * For ASF and Pass Through versions of f/w this means that the
1367  * driver is no longer loaded.
1368  *
1369  **/
1370 static void igb_release_hw_control(struct igb_adapter *adapter)
1371 {
1372         struct e1000_hw *hw = &adapter->hw;
1373         u32 ctrl_ext;
1374
1375         /* Let firmware take over control of h/w */
1376         ctrl_ext = rd32(E1000_CTRL_EXT);
1377         wr32(E1000_CTRL_EXT,
1378                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1379 }
1380
1381 /**
1382  * igb_get_hw_control - get control of the h/w from f/w
1383  * @adapter: address of board private structure
1384  *
1385  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1386  * For ASF and Pass Through versions of f/w this means that
1387  * the driver is loaded.
1388  *
1389  **/
1390 static void igb_get_hw_control(struct igb_adapter *adapter)
1391 {
1392         struct e1000_hw *hw = &adapter->hw;
1393         u32 ctrl_ext;
1394
1395         /* Let firmware know the driver has taken over */
1396         ctrl_ext = rd32(E1000_CTRL_EXT);
1397         wr32(E1000_CTRL_EXT,
1398                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1399 }
1400
1401 /**
1402  * igb_configure - configure the hardware for RX and TX
1403  * @adapter: private board structure
1404  **/
1405 static void igb_configure(struct igb_adapter *adapter)
1406 {
1407         struct net_device *netdev = adapter->netdev;
1408         int i;
1409
1410         igb_get_hw_control(adapter);
1411         igb_set_rx_mode(netdev);
1412
1413         igb_restore_vlan(adapter);
1414
1415         igb_setup_tctl(adapter);
1416         igb_setup_mrqc(adapter);
1417         igb_setup_rctl(adapter);
1418
1419         igb_configure_tx(adapter);
1420         igb_configure_rx(adapter);
1421
1422         igb_rx_fifo_flush_82575(&adapter->hw);
1423
1424         /* call igb_desc_unused which always leaves
1425          * at least 1 descriptor unused to make sure
1426          * next_to_use != next_to_clean */
1427         for (i = 0; i < adapter->num_rx_queues; i++) {
1428                 struct igb_ring *ring = adapter->rx_ring[i];
1429                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1430         }
1431 }
1432
1433 /**
1434  * igb_power_up_link - Power up the phy/serdes link
1435  * @adapter: address of board private structure
1436  **/
1437 void igb_power_up_link(struct igb_adapter *adapter)
1438 {
1439         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440                 igb_power_up_phy_copper(&adapter->hw);
1441         else
1442                 igb_power_up_serdes_link_82575(&adapter->hw);
1443 }
1444
1445 /**
1446  * igb_power_down_link - Power down the phy/serdes link
1447  * @adapter: address of board private structure
1448  */
1449 static void igb_power_down_link(struct igb_adapter *adapter)
1450 {
1451         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452                 igb_power_down_phy_copper_82575(&adapter->hw);
1453         else
1454                 igb_shutdown_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458  * igb_up - Open the interface and prepare it to handle traffic
1459  * @adapter: board private structure
1460  **/
1461 int igb_up(struct igb_adapter *adapter)
1462 {
1463         struct e1000_hw *hw = &adapter->hw;
1464         int i;
1465
1466         /* hardware has been reset, we need to reload some things */
1467         igb_configure(adapter);
1468
1469         clear_bit(__IGB_DOWN, &adapter->state);
1470
1471         for (i = 0; i < adapter->num_q_vectors; i++) {
1472                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1473                 napi_enable(&q_vector->napi);
1474         }
1475         if (adapter->msix_entries)
1476                 igb_configure_msix(adapter);
1477         else
1478                 igb_assign_vector(adapter->q_vector[0], 0);
1479
1480         /* Clear any pending interrupts. */
1481         rd32(E1000_ICR);
1482         igb_irq_enable(adapter);
1483
1484         /* notify VFs that reset has been completed */
1485         if (adapter->vfs_allocated_count) {
1486                 u32 reg_data = rd32(E1000_CTRL_EXT);
1487                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1488                 wr32(E1000_CTRL_EXT, reg_data);
1489         }
1490
1491         netif_tx_start_all_queues(adapter->netdev);
1492
1493         /* start the watchdog. */
1494         hw->mac.get_link_status = 1;
1495         schedule_work(&adapter->watchdog_task);
1496
1497         return 0;
1498 }
1499
1500 void igb_down(struct igb_adapter *adapter)
1501 {
1502         struct net_device *netdev = adapter->netdev;
1503         struct e1000_hw *hw = &adapter->hw;
1504         u32 tctl, rctl;
1505         int i;
1506
1507         /* signal that we're down so the interrupt handler does not
1508          * reschedule our watchdog timer */
1509         set_bit(__IGB_DOWN, &adapter->state);
1510
1511         /* disable receives in the hardware */
1512         rctl = rd32(E1000_RCTL);
1513         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1514         /* flush and sleep below */
1515
1516         netif_tx_stop_all_queues(netdev);
1517
1518         /* disable transmits in the hardware */
1519         tctl = rd32(E1000_TCTL);
1520         tctl &= ~E1000_TCTL_EN;
1521         wr32(E1000_TCTL, tctl);
1522         /* flush both disables and wait for them to finish */
1523         wrfl();
1524         msleep(10);
1525
1526         for (i = 0; i < adapter->num_q_vectors; i++) {
1527                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1528                 napi_disable(&q_vector->napi);
1529         }
1530
1531         igb_irq_disable(adapter);
1532
1533         del_timer_sync(&adapter->watchdog_timer);
1534         del_timer_sync(&adapter->phy_info_timer);
1535
1536         netif_carrier_off(netdev);
1537
1538         /* record the stats before reset*/
1539         spin_lock(&adapter->stats64_lock);
1540         igb_update_stats(adapter, &adapter->stats64);
1541         spin_unlock(&adapter->stats64_lock);
1542
1543         adapter->link_speed = 0;
1544         adapter->link_duplex = 0;
1545
1546         if (!pci_channel_offline(adapter->pdev))
1547                 igb_reset(adapter);
1548         igb_clean_all_tx_rings(adapter);
1549         igb_clean_all_rx_rings(adapter);
1550 #ifdef CONFIG_IGB_DCA
1551
1552         /* since we reset the hardware DCA settings were cleared */
1553         igb_setup_dca(adapter);
1554 #endif
1555 }
1556
1557 void igb_reinit_locked(struct igb_adapter *adapter)
1558 {
1559         WARN_ON(in_interrupt());
1560         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1561                 msleep(1);
1562         igb_down(adapter);
1563         igb_up(adapter);
1564         clear_bit(__IGB_RESETTING, &adapter->state);
1565 }
1566
1567 void igb_reset(struct igb_adapter *adapter)
1568 {
1569         struct pci_dev *pdev = adapter->pdev;
1570         struct e1000_hw *hw = &adapter->hw;
1571         struct e1000_mac_info *mac = &hw->mac;
1572         struct e1000_fc_info *fc = &hw->fc;
1573         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1574         u16 hwm;
1575
1576         /* Repartition Pba for greater than 9k mtu
1577          * To take effect CTRL.RST is required.
1578          */
1579         switch (mac->type) {
1580         case e1000_i350:
1581         case e1000_82580:
1582                 pba = rd32(E1000_RXPBS);
1583                 pba = igb_rxpbs_adjust_82580(pba);
1584                 break;
1585         case e1000_82576:
1586                 pba = rd32(E1000_RXPBS);
1587                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1588                 break;
1589         case e1000_82575:
1590         default:
1591                 pba = E1000_PBA_34K;
1592                 break;
1593         }
1594
1595         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1596             (mac->type < e1000_82576)) {
1597                 /* adjust PBA for jumbo frames */
1598                 wr32(E1000_PBA, pba);
1599
1600                 /* To maintain wire speed transmits, the Tx FIFO should be
1601                  * large enough to accommodate two full transmit packets,
1602                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1603                  * the Rx FIFO should be large enough to accommodate at least
1604                  * one full receive packet and is similarly rounded up and
1605                  * expressed in KB. */
1606                 pba = rd32(E1000_PBA);
1607                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1608                 tx_space = pba >> 16;
1609                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1610                 pba &= 0xffff;
1611                 /* the tx fifo also stores 16 bytes of information about the tx
1612                  * but don't include ethernet FCS because hardware appends it */
1613                 min_tx_space = (adapter->max_frame_size +
1614                                 sizeof(union e1000_adv_tx_desc) -
1615                                 ETH_FCS_LEN) * 2;
1616                 min_tx_space = ALIGN(min_tx_space, 1024);
1617                 min_tx_space >>= 10;
1618                 /* software strips receive CRC, so leave room for it */
1619                 min_rx_space = adapter->max_frame_size;
1620                 min_rx_space = ALIGN(min_rx_space, 1024);
1621                 min_rx_space >>= 10;
1622
1623                 /* If current Tx allocation is less than the min Tx FIFO size,
1624                  * and the min Tx FIFO size is less than the current Rx FIFO
1625                  * allocation, take space away from current Rx allocation */
1626                 if (tx_space < min_tx_space &&
1627                     ((min_tx_space - tx_space) < pba)) {
1628                         pba = pba - (min_tx_space - tx_space);
1629
1630                         /* if short on rx space, rx wins and must trump tx
1631                          * adjustment */
1632                         if (pba < min_rx_space)
1633                                 pba = min_rx_space;
1634                 }
1635                 wr32(E1000_PBA, pba);
1636         }
1637
1638         /* flow control settings */
1639         /* The high water mark must be low enough to fit one full frame
1640          * (or the size used for early receive) above it in the Rx FIFO.
1641          * Set it to the lower of:
1642          * - 90% of the Rx FIFO size, or
1643          * - the full Rx FIFO size minus one full frame */
1644         hwm = min(((pba << 10) * 9 / 10),
1645                         ((pba << 10) - 2 * adapter->max_frame_size));
1646
1647         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1648         fc->low_water = fc->high_water - 16;
1649         fc->pause_time = 0xFFFF;
1650         fc->send_xon = 1;
1651         fc->current_mode = fc->requested_mode;
1652
1653         /* disable receive for all VFs and wait one second */
1654         if (adapter->vfs_allocated_count) {
1655                 int i;
1656                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1657                         adapter->vf_data[i].flags = 0;
1658
1659                 /* ping all the active vfs to let them know we are going down */
1660                 igb_ping_all_vfs(adapter);
1661
1662                 /* disable transmits and receives */
1663                 wr32(E1000_VFRE, 0);
1664                 wr32(E1000_VFTE, 0);
1665         }
1666
1667         /* Allow time for pending master requests to run */
1668         hw->mac.ops.reset_hw(hw);
1669         wr32(E1000_WUC, 0);
1670
1671         if (hw->mac.ops.init_hw(hw))
1672                 dev_err(&pdev->dev, "Hardware Error\n");
1673
1674         if (hw->mac.type == e1000_82580) {
1675                 u32 reg = rd32(E1000_PCIEMISC);
1676                 wr32(E1000_PCIEMISC,
1677                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1678         }
1679         if (!netif_running(adapter->netdev))
1680                 igb_power_down_link(adapter);
1681
1682         igb_update_mng_vlan(adapter);
1683
1684         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1685         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1686
1687         igb_get_phy_info(hw);
1688 }
1689
1690 static const struct net_device_ops igb_netdev_ops = {
1691         .ndo_open               = igb_open,
1692         .ndo_stop               = igb_close,
1693         .ndo_start_xmit         = igb_xmit_frame_adv,
1694         .ndo_get_stats64        = igb_get_stats64,
1695         .ndo_set_rx_mode        = igb_set_rx_mode,
1696         .ndo_set_multicast_list = igb_set_rx_mode,
1697         .ndo_set_mac_address    = igb_set_mac,
1698         .ndo_change_mtu         = igb_change_mtu,
1699         .ndo_do_ioctl           = igb_ioctl,
1700         .ndo_tx_timeout         = igb_tx_timeout,
1701         .ndo_validate_addr      = eth_validate_addr,
1702         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1703         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1704         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1705         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1706         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1707         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1708         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1709 #ifdef CONFIG_NET_POLL_CONTROLLER
1710         .ndo_poll_controller    = igb_netpoll,
1711 #endif
1712 };
1713
1714 /**
1715  * igb_probe - Device Initialization Routine
1716  * @pdev: PCI device information struct
1717  * @ent: entry in igb_pci_tbl
1718  *
1719  * Returns 0 on success, negative on failure
1720  *
1721  * igb_probe initializes an adapter identified by a pci_dev structure.
1722  * The OS initialization, configuring of the adapter private structure,
1723  * and a hardware reset occur.
1724  **/
1725 static int __devinit igb_probe(struct pci_dev *pdev,
1726                                const struct pci_device_id *ent)
1727 {
1728         struct net_device *netdev;
1729         struct igb_adapter *adapter;
1730         struct e1000_hw *hw;
1731         u16 eeprom_data = 0;
1732         static int global_quad_port_a; /* global quad port a indication */
1733         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1734         unsigned long mmio_start, mmio_len;
1735         int err, pci_using_dac;
1736         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1737         u32 part_num;
1738
1739         /* Catch broken hardware that put the wrong VF device ID in
1740          * the PCIe SR-IOV capability.
1741          */
1742         if (pdev->is_virtfn) {
1743                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1744                      pci_name(pdev), pdev->vendor, pdev->device);
1745                 return -EINVAL;
1746         }
1747
1748         err = pci_enable_device_mem(pdev);
1749         if (err)
1750                 return err;
1751
1752         pci_using_dac = 0;
1753         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1754         if (!err) {
1755                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1756                 if (!err)
1757                         pci_using_dac = 1;
1758         } else {
1759                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1760                 if (err) {
1761                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1762                         if (err) {
1763                                 dev_err(&pdev->dev, "No usable DMA "
1764                                         "configuration, aborting\n");
1765                                 goto err_dma;
1766                         }
1767                 }
1768         }
1769
1770         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1771                                            IORESOURCE_MEM),
1772                                            igb_driver_name);
1773         if (err)
1774                 goto err_pci_reg;
1775
1776         pci_enable_pcie_error_reporting(pdev);
1777
1778         pci_set_master(pdev);
1779         pci_save_state(pdev);
1780
1781         err = -ENOMEM;
1782         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1783                                    IGB_ABS_MAX_TX_QUEUES);
1784         if (!netdev)
1785                 goto err_alloc_etherdev;
1786
1787         SET_NETDEV_DEV(netdev, &pdev->dev);
1788
1789         pci_set_drvdata(pdev, netdev);
1790         adapter = netdev_priv(netdev);
1791         adapter->netdev = netdev;
1792         adapter->pdev = pdev;
1793         hw = &adapter->hw;
1794         hw->back = adapter;
1795         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1796
1797         mmio_start = pci_resource_start(pdev, 0);
1798         mmio_len = pci_resource_len(pdev, 0);
1799
1800         err = -EIO;
1801         hw->hw_addr = ioremap(mmio_start, mmio_len);
1802         if (!hw->hw_addr)
1803                 goto err_ioremap;
1804
1805         netdev->netdev_ops = &igb_netdev_ops;
1806         igb_set_ethtool_ops(netdev);
1807         netdev->watchdog_timeo = 5 * HZ;
1808
1809         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1810
1811         netdev->mem_start = mmio_start;
1812         netdev->mem_end = mmio_start + mmio_len;
1813
1814         /* PCI config space info */
1815         hw->vendor_id = pdev->vendor;
1816         hw->device_id = pdev->device;
1817         hw->revision_id = pdev->revision;
1818         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1819         hw->subsystem_device_id = pdev->subsystem_device;
1820
1821         /* Copy the default MAC, PHY and NVM function pointers */
1822         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1823         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1824         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1825         /* Initialize skew-specific constants */
1826         err = ei->get_invariants(hw);
1827         if (err)
1828                 goto err_sw_init;
1829
1830         /* setup the private structure */
1831         err = igb_sw_init(adapter);
1832         if (err)
1833                 goto err_sw_init;
1834
1835         igb_get_bus_info_pcie(hw);
1836
1837         hw->phy.autoneg_wait_to_complete = false;
1838
1839         /* Copper options */
1840         if (hw->phy.media_type == e1000_media_type_copper) {
1841                 hw->phy.mdix = AUTO_ALL_MODES;
1842                 hw->phy.disable_polarity_correction = false;
1843                 hw->phy.ms_type = e1000_ms_hw_default;
1844         }
1845
1846         if (igb_check_reset_block(hw))
1847                 dev_info(&pdev->dev,
1848                         "PHY reset is blocked due to SOL/IDER session.\n");
1849
1850         netdev->features = NETIF_F_SG |
1851                            NETIF_F_IP_CSUM |
1852                            NETIF_F_HW_VLAN_TX |
1853                            NETIF_F_HW_VLAN_RX |
1854                            NETIF_F_HW_VLAN_FILTER;
1855
1856         netdev->features |= NETIF_F_IPV6_CSUM;
1857         netdev->features |= NETIF_F_TSO;
1858         netdev->features |= NETIF_F_TSO6;
1859         netdev->features |= NETIF_F_GRO;
1860
1861         netdev->vlan_features |= NETIF_F_TSO;
1862         netdev->vlan_features |= NETIF_F_TSO6;
1863         netdev->vlan_features |= NETIF_F_IP_CSUM;
1864         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1865         netdev->vlan_features |= NETIF_F_SG;
1866
1867         if (pci_using_dac) {
1868                 netdev->features |= NETIF_F_HIGHDMA;
1869                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1870         }
1871
1872         if (hw->mac.type >= e1000_82576)
1873                 netdev->features |= NETIF_F_SCTP_CSUM;
1874
1875         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1876
1877         /* before reading the NVM, reset the controller to put the device in a
1878          * known good starting state */
1879         hw->mac.ops.reset_hw(hw);
1880
1881         /* make sure the NVM is good */
1882         if (igb_validate_nvm_checksum(hw) < 0) {
1883                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1884                 err = -EIO;
1885                 goto err_eeprom;
1886         }
1887
1888         /* copy the MAC address out of the NVM */
1889         if (hw->mac.ops.read_mac_addr(hw))
1890                 dev_err(&pdev->dev, "NVM Read Error\n");
1891
1892         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1893         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1894
1895         if (!is_valid_ether_addr(netdev->perm_addr)) {
1896                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1897                 err = -EIO;
1898                 goto err_eeprom;
1899         }
1900
1901         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1902                     (unsigned long) adapter);
1903         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1904                     (unsigned long) adapter);
1905
1906         INIT_WORK(&adapter->reset_task, igb_reset_task);
1907         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1908
1909         /* Initialize link properties that are user-changeable */
1910         adapter->fc_autoneg = true;
1911         hw->mac.autoneg = true;
1912         hw->phy.autoneg_advertised = 0x2f;
1913
1914         hw->fc.requested_mode = e1000_fc_default;
1915         hw->fc.current_mode = e1000_fc_default;
1916
1917         igb_validate_mdi_setting(hw);
1918
1919         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1920          * enable the ACPI Magic Packet filter
1921          */
1922
1923         if (hw->bus.func == 0)
1924                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1925         else if (hw->mac.type == e1000_82580)
1926                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1927                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1928                                  &eeprom_data);
1929         else if (hw->bus.func == 1)
1930                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1931
1932         if (eeprom_data & eeprom_apme_mask)
1933                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1934
1935         /* now that we have the eeprom settings, apply the special cases where
1936          * the eeprom may be wrong or the board simply won't support wake on
1937          * lan on a particular port */
1938         switch (pdev->device) {
1939         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1940                 adapter->eeprom_wol = 0;
1941                 break;
1942         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1943         case E1000_DEV_ID_82576_FIBER:
1944         case E1000_DEV_ID_82576_SERDES:
1945                 /* Wake events only supported on port A for dual fiber
1946                  * regardless of eeprom setting */
1947                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1948                         adapter->eeprom_wol = 0;
1949                 break;
1950         case E1000_DEV_ID_82576_QUAD_COPPER:
1951         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1952                 /* if quad port adapter, disable WoL on all but port A */
1953                 if (global_quad_port_a != 0)
1954                         adapter->eeprom_wol = 0;
1955                 else
1956                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1957                 /* Reset for multiple quad port adapters */
1958                 if (++global_quad_port_a == 4)
1959                         global_quad_port_a = 0;
1960                 break;
1961         }
1962
1963         /* initialize the wol settings based on the eeprom settings */
1964         adapter->wol = adapter->eeprom_wol;
1965         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1966
1967         /* reset the hardware with the new settings */
1968         igb_reset(adapter);
1969
1970         /* let the f/w know that the h/w is now under the control of the
1971          * driver. */
1972         igb_get_hw_control(adapter);
1973
1974         strcpy(netdev->name, "eth%d");
1975         err = register_netdev(netdev);
1976         if (err)
1977                 goto err_register;
1978
1979         /* carrier off reporting is important to ethtool even BEFORE open */
1980         netif_carrier_off(netdev);
1981
1982 #ifdef CONFIG_IGB_DCA
1983         if (dca_add_requester(&pdev->dev) == 0) {
1984                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1985                 dev_info(&pdev->dev, "DCA enabled\n");
1986                 igb_setup_dca(adapter);
1987         }
1988
1989 #endif
1990         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1991         /* print bus type/speed/width info */
1992         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1993                  netdev->name,
1994                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1995                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1996                                                             "unknown"),
1997                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1998                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1999                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2000                    "unknown"),
2001                  netdev->dev_addr);
2002
2003         igb_read_part_num(hw, &part_num);
2004         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
2005                 (part_num >> 8), (part_num & 0xff));
2006
2007         dev_info(&pdev->dev,
2008                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2009                 adapter->msix_entries ? "MSI-X" :
2010                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2011                 adapter->num_rx_queues, adapter->num_tx_queues);
2012
2013         return 0;
2014
2015 err_register:
2016         igb_release_hw_control(adapter);
2017 err_eeprom:
2018         if (!igb_check_reset_block(hw))
2019                 igb_reset_phy(hw);
2020
2021         if (hw->flash_address)
2022                 iounmap(hw->flash_address);
2023 err_sw_init:
2024         igb_clear_interrupt_scheme(adapter);
2025         iounmap(hw->hw_addr);
2026 err_ioremap:
2027         free_netdev(netdev);
2028 err_alloc_etherdev:
2029         pci_release_selected_regions(pdev,
2030                                      pci_select_bars(pdev, IORESOURCE_MEM));
2031 err_pci_reg:
2032 err_dma:
2033         pci_disable_device(pdev);
2034         return err;
2035 }
2036
2037 /**
2038  * igb_remove - Device Removal Routine
2039  * @pdev: PCI device information struct
2040  *
2041  * igb_remove is called by the PCI subsystem to alert the driver
2042  * that it should release a PCI device.  The could be caused by a
2043  * Hot-Plug event, or because the driver is going to be removed from
2044  * memory.
2045  **/
2046 static void __devexit igb_remove(struct pci_dev *pdev)
2047 {
2048         struct net_device *netdev = pci_get_drvdata(pdev);
2049         struct igb_adapter *adapter = netdev_priv(netdev);
2050         struct e1000_hw *hw = &adapter->hw;
2051
2052         /* flush_scheduled work may reschedule our watchdog task, so
2053          * explicitly disable watchdog tasks from being rescheduled  */
2054         set_bit(__IGB_DOWN, &adapter->state);
2055         del_timer_sync(&adapter->watchdog_timer);
2056         del_timer_sync(&adapter->phy_info_timer);
2057
2058         flush_scheduled_work();
2059
2060 #ifdef CONFIG_IGB_DCA
2061         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2062                 dev_info(&pdev->dev, "DCA disabled\n");
2063                 dca_remove_requester(&pdev->dev);
2064                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2065                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2066         }
2067 #endif
2068
2069         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2070          * would have already happened in close and is redundant. */
2071         igb_release_hw_control(adapter);
2072
2073         unregister_netdev(netdev);
2074
2075         igb_clear_interrupt_scheme(adapter);
2076
2077 #ifdef CONFIG_PCI_IOV
2078         /* reclaim resources allocated to VFs */
2079         if (adapter->vf_data) {
2080                 /* disable iov and allow time for transactions to clear */
2081                 pci_disable_sriov(pdev);
2082                 msleep(500);
2083
2084                 kfree(adapter->vf_data);
2085                 adapter->vf_data = NULL;
2086                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2087                 msleep(100);
2088                 dev_info(&pdev->dev, "IOV Disabled\n");
2089         }
2090 #endif
2091
2092         iounmap(hw->hw_addr);
2093         if (hw->flash_address)
2094                 iounmap(hw->flash_address);
2095         pci_release_selected_regions(pdev,
2096                                      pci_select_bars(pdev, IORESOURCE_MEM));
2097
2098         free_netdev(netdev);
2099
2100         pci_disable_pcie_error_reporting(pdev);
2101
2102         pci_disable_device(pdev);
2103 }
2104
2105 /**
2106  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2107  * @adapter: board private structure to initialize
2108  *
2109  * This function initializes the vf specific data storage and then attempts to
2110  * allocate the VFs.  The reason for ordering it this way is because it is much
2111  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2112  * the memory for the VFs.
2113  **/
2114 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2115 {
2116 #ifdef CONFIG_PCI_IOV
2117         struct pci_dev *pdev = adapter->pdev;
2118
2119         if (adapter->vfs_allocated_count) {
2120                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2121                                            sizeof(struct vf_data_storage),
2122                                            GFP_KERNEL);
2123                 /* if allocation failed then we do not support SR-IOV */
2124                 if (!adapter->vf_data) {
2125                         adapter->vfs_allocated_count = 0;
2126                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2127                                 "Data Storage\n");
2128                 }
2129         }
2130
2131         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2132                 kfree(adapter->vf_data);
2133                 adapter->vf_data = NULL;
2134 #endif /* CONFIG_PCI_IOV */
2135                 adapter->vfs_allocated_count = 0;
2136 #ifdef CONFIG_PCI_IOV
2137         } else {
2138                 unsigned char mac_addr[ETH_ALEN];
2139                 int i;
2140                 dev_info(&pdev->dev, "%d vfs allocated\n",
2141                          adapter->vfs_allocated_count);
2142                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2143                         random_ether_addr(mac_addr);
2144                         igb_set_vf_mac(adapter, i, mac_addr);
2145                 }
2146         }
2147 #endif /* CONFIG_PCI_IOV */
2148 }
2149
2150
2151 /**
2152  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2153  * @adapter: board private structure to initialize
2154  *
2155  * igb_init_hw_timer initializes the function pointer and values for the hw
2156  * timer found in hardware.
2157  **/
2158 static void igb_init_hw_timer(struct igb_adapter *adapter)
2159 {
2160         struct e1000_hw *hw = &adapter->hw;
2161
2162         switch (hw->mac.type) {
2163         case e1000_i350:
2164         case e1000_82580:
2165                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2166                 adapter->cycles.read = igb_read_clock;
2167                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2168                 adapter->cycles.mult = 1;
2169                 /*
2170                  * The 82580 timesync updates the system timer every 8ns by 8ns
2171                  * and the value cannot be shifted.  Instead we need to shift
2172                  * the registers to generate a 64bit timer value.  As a result
2173                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2174                  * 24 in order to generate a larger value for synchronization.
2175                  */
2176                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2177                 /* disable system timer temporarily by setting bit 31 */
2178                 wr32(E1000_TSAUXC, 0x80000000);
2179                 wrfl();
2180
2181                 /* Set registers so that rollover occurs soon to test this. */
2182                 wr32(E1000_SYSTIMR, 0x00000000);
2183                 wr32(E1000_SYSTIML, 0x80000000);
2184                 wr32(E1000_SYSTIMH, 0x000000FF);
2185                 wrfl();
2186
2187                 /* enable system timer by clearing bit 31 */
2188                 wr32(E1000_TSAUXC, 0x0);
2189                 wrfl();
2190
2191                 timecounter_init(&adapter->clock,
2192                                  &adapter->cycles,
2193                                  ktime_to_ns(ktime_get_real()));
2194                 /*
2195                  * Synchronize our NIC clock against system wall clock. NIC
2196                  * time stamp reading requires ~3us per sample, each sample
2197                  * was pretty stable even under load => only require 10
2198                  * samples for each offset comparison.
2199                  */
2200                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2201                 adapter->compare.source = &adapter->clock;
2202                 adapter->compare.target = ktime_get_real;
2203                 adapter->compare.num_samples = 10;
2204                 timecompare_update(&adapter->compare, 0);
2205                 break;
2206         case e1000_82576:
2207                 /*
2208                  * Initialize hardware timer: we keep it running just in case
2209                  * that some program needs it later on.
2210                  */
2211                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2212                 adapter->cycles.read = igb_read_clock;
2213                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2214                 adapter->cycles.mult = 1;
2215                 /**
2216                  * Scale the NIC clock cycle by a large factor so that
2217                  * relatively small clock corrections can be added or
2218                  * substracted at each clock tick. The drawbacks of a large
2219                  * factor are a) that the clock register overflows more quickly
2220                  * (not such a big deal) and b) that the increment per tick has
2221                  * to fit into 24 bits.  As a result we need to use a shift of
2222                  * 19 so we can fit a value of 16 into the TIMINCA register.
2223                  */
2224                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2225                 wr32(E1000_TIMINCA,
2226                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2227                                 (16 << IGB_82576_TSYNC_SHIFT));
2228
2229                 /* Set registers so that rollover occurs soon to test this. */
2230                 wr32(E1000_SYSTIML, 0x00000000);
2231                 wr32(E1000_SYSTIMH, 0xFF800000);
2232                 wrfl();
2233
2234                 timecounter_init(&adapter->clock,
2235                                  &adapter->cycles,
2236                                  ktime_to_ns(ktime_get_real()));
2237                 /*
2238                  * Synchronize our NIC clock against system wall clock. NIC
2239                  * time stamp reading requires ~3us per sample, each sample
2240                  * was pretty stable even under load => only require 10
2241                  * samples for each offset comparison.
2242                  */
2243                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2244                 adapter->compare.source = &adapter->clock;
2245                 adapter->compare.target = ktime_get_real;
2246                 adapter->compare.num_samples = 10;
2247                 timecompare_update(&adapter->compare, 0);
2248                 break;
2249         case e1000_82575:
2250                 /* 82575 does not support timesync */
2251         default:
2252                 break;
2253         }
2254
2255 }
2256
2257 /**
2258  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2259  * @adapter: board private structure to initialize
2260  *
2261  * igb_sw_init initializes the Adapter private data structure.
2262  * Fields are initialized based on PCI device information and
2263  * OS network device settings (MTU size).
2264  **/
2265 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2266 {
2267         struct e1000_hw *hw = &adapter->hw;
2268         struct net_device *netdev = adapter->netdev;
2269         struct pci_dev *pdev = adapter->pdev;
2270
2271         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2272
2273         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2274         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2275         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2276         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2277
2278         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2279         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2280
2281         spin_lock_init(&adapter->stats64_lock);
2282 #ifdef CONFIG_PCI_IOV
2283         if (hw->mac.type == e1000_82576)
2284                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2285
2286 #endif /* CONFIG_PCI_IOV */
2287         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2288
2289         /*
2290          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2291          * then we should combine the queues into a queue pair in order to
2292          * conserve interrupts due to limited supply
2293          */
2294         if ((adapter->rss_queues > 4) ||
2295             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2296                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2297
2298         /* This call may decrease the number of queues */
2299         if (igb_init_interrupt_scheme(adapter)) {
2300                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2301                 return -ENOMEM;
2302         }
2303
2304         igb_init_hw_timer(adapter);
2305         igb_probe_vfs(adapter);
2306
2307         /* Explicitly disable IRQ since the NIC can be in any state. */
2308         igb_irq_disable(adapter);
2309
2310         set_bit(__IGB_DOWN, &adapter->state);
2311         return 0;
2312 }
2313
2314 /**
2315  * igb_open - Called when a network interface is made active
2316  * @netdev: network interface device structure
2317  *
2318  * Returns 0 on success, negative value on failure
2319  *
2320  * The open entry point is called when a network interface is made
2321  * active by the system (IFF_UP).  At this point all resources needed
2322  * for transmit and receive operations are allocated, the interrupt
2323  * handler is registered with the OS, the watchdog timer is started,
2324  * and the stack is notified that the interface is ready.
2325  **/
2326 static int igb_open(struct net_device *netdev)
2327 {
2328         struct igb_adapter *adapter = netdev_priv(netdev);
2329         struct e1000_hw *hw = &adapter->hw;
2330         int err;
2331         int i;
2332
2333         /* disallow open during test */
2334         if (test_bit(__IGB_TESTING, &adapter->state))
2335                 return -EBUSY;
2336
2337         netif_carrier_off(netdev);
2338
2339         /* allocate transmit descriptors */
2340         err = igb_setup_all_tx_resources(adapter);
2341         if (err)
2342                 goto err_setup_tx;
2343
2344         /* allocate receive descriptors */
2345         err = igb_setup_all_rx_resources(adapter);
2346         if (err)
2347                 goto err_setup_rx;
2348
2349         igb_power_up_link(adapter);
2350
2351         /* before we allocate an interrupt, we must be ready to handle it.
2352          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2353          * as soon as we call pci_request_irq, so we have to setup our
2354          * clean_rx handler before we do so.  */
2355         igb_configure(adapter);
2356
2357         err = igb_request_irq(adapter);
2358         if (err)
2359                 goto err_req_irq;
2360
2361         /* From here on the code is the same as igb_up() */
2362         clear_bit(__IGB_DOWN, &adapter->state);
2363
2364         for (i = 0; i < adapter->num_q_vectors; i++) {
2365                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2366                 napi_enable(&q_vector->napi);
2367         }
2368
2369         /* Clear any pending interrupts. */
2370         rd32(E1000_ICR);
2371
2372         igb_irq_enable(adapter);
2373
2374         /* notify VFs that reset has been completed */
2375         if (adapter->vfs_allocated_count) {
2376                 u32 reg_data = rd32(E1000_CTRL_EXT);
2377                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2378                 wr32(E1000_CTRL_EXT, reg_data);
2379         }
2380
2381         netif_tx_start_all_queues(netdev);
2382
2383         /* start the watchdog. */
2384         hw->mac.get_link_status = 1;
2385         schedule_work(&adapter->watchdog_task);
2386
2387         return 0;
2388
2389 err_req_irq:
2390         igb_release_hw_control(adapter);
2391         igb_power_down_link(adapter);
2392         igb_free_all_rx_resources(adapter);
2393 err_setup_rx:
2394         igb_free_all_tx_resources(adapter);
2395 err_setup_tx:
2396         igb_reset(adapter);
2397
2398         return err;
2399 }
2400
2401 /**
2402  * igb_close - Disables a network interface
2403  * @netdev: network interface device structure
2404  *
2405  * Returns 0, this is not allowed to fail
2406  *
2407  * The close entry point is called when an interface is de-activated
2408  * by the OS.  The hardware is still under the driver's control, but
2409  * needs to be disabled.  A global MAC reset is issued to stop the
2410  * hardware, and all transmit and receive resources are freed.
2411  **/
2412 static int igb_close(struct net_device *netdev)
2413 {
2414         struct igb_adapter *adapter = netdev_priv(netdev);
2415
2416         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2417         igb_down(adapter);
2418
2419         igb_free_irq(adapter);
2420
2421         igb_free_all_tx_resources(adapter);
2422         igb_free_all_rx_resources(adapter);
2423
2424         return 0;
2425 }
2426
2427 /**
2428  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2429  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2430  *
2431  * Return 0 on success, negative on failure
2432  **/
2433 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2434 {
2435         struct device *dev = tx_ring->dev;
2436         int size;
2437
2438         size = sizeof(struct igb_buffer) * tx_ring->count;
2439         tx_ring->buffer_info = vzalloc(size);
2440         if (!tx_ring->buffer_info)
2441                 goto err;
2442
2443         /* round up to nearest 4K */
2444         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2445         tx_ring->size = ALIGN(tx_ring->size, 4096);
2446
2447         tx_ring->desc = dma_alloc_coherent(dev,
2448                                            tx_ring->size,
2449                                            &tx_ring->dma,
2450                                            GFP_KERNEL);
2451
2452         if (!tx_ring->desc)
2453                 goto err;
2454
2455         tx_ring->next_to_use = 0;
2456         tx_ring->next_to_clean = 0;
2457         return 0;
2458
2459 err:
2460         vfree(tx_ring->buffer_info);
2461         dev_err(dev,
2462                 "Unable to allocate memory for the transmit descriptor ring\n");
2463         return -ENOMEM;
2464 }
2465
2466 /**
2467  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2468  *                                (Descriptors) for all queues
2469  * @adapter: board private structure
2470  *
2471  * Return 0 on success, negative on failure
2472  **/
2473 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2474 {
2475         struct pci_dev *pdev = adapter->pdev;
2476         int i, err = 0;
2477
2478         for (i = 0; i < adapter->num_tx_queues; i++) {
2479                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2480                 if (err) {
2481                         dev_err(&pdev->dev,
2482                                 "Allocation for Tx Queue %u failed\n", i);
2483                         for (i--; i >= 0; i--)
2484                                 igb_free_tx_resources(adapter->tx_ring[i]);
2485                         break;
2486                 }
2487         }
2488
2489         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2490                 int r_idx = i % adapter->num_tx_queues;
2491                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2492         }
2493         return err;
2494 }
2495
2496 /**
2497  * igb_setup_tctl - configure the transmit control registers
2498  * @adapter: Board private structure
2499  **/
2500 void igb_setup_tctl(struct igb_adapter *adapter)
2501 {
2502         struct e1000_hw *hw = &adapter->hw;
2503         u32 tctl;
2504
2505         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2506         wr32(E1000_TXDCTL(0), 0);
2507
2508         /* Program the Transmit Control Register */
2509         tctl = rd32(E1000_TCTL);
2510         tctl &= ~E1000_TCTL_CT;
2511         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2512                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2513
2514         igb_config_collision_dist(hw);
2515
2516         /* Enable transmits */
2517         tctl |= E1000_TCTL_EN;
2518
2519         wr32(E1000_TCTL, tctl);
2520 }
2521
2522 /**
2523  * igb_configure_tx_ring - Configure transmit ring after Reset
2524  * @adapter: board private structure
2525  * @ring: tx ring to configure
2526  *
2527  * Configure a transmit ring after a reset.
2528  **/
2529 void igb_configure_tx_ring(struct igb_adapter *adapter,
2530                            struct igb_ring *ring)
2531 {
2532         struct e1000_hw *hw = &adapter->hw;
2533         u32 txdctl;
2534         u64 tdba = ring->dma;
2535         int reg_idx = ring->reg_idx;
2536
2537         /* disable the queue */
2538         txdctl = rd32(E1000_TXDCTL(reg_idx));
2539         wr32(E1000_TXDCTL(reg_idx),
2540                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2541         wrfl();
2542         mdelay(10);
2543
2544         wr32(E1000_TDLEN(reg_idx),
2545                         ring->count * sizeof(union e1000_adv_tx_desc));
2546         wr32(E1000_TDBAL(reg_idx),
2547                         tdba & 0x00000000ffffffffULL);
2548         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2549
2550         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2551         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2552         writel(0, ring->head);
2553         writel(0, ring->tail);
2554
2555         txdctl |= IGB_TX_PTHRESH;
2556         txdctl |= IGB_TX_HTHRESH << 8;
2557         txdctl |= IGB_TX_WTHRESH << 16;
2558
2559         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2560         wr32(E1000_TXDCTL(reg_idx), txdctl);
2561 }
2562
2563 /**
2564  * igb_configure_tx - Configure transmit Unit after Reset
2565  * @adapter: board private structure
2566  *
2567  * Configure the Tx unit of the MAC after a reset.
2568  **/
2569 static void igb_configure_tx(struct igb_adapter *adapter)
2570 {
2571         int i;
2572
2573         for (i = 0; i < adapter->num_tx_queues; i++)
2574                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2575 }
2576
2577 /**
2578  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2579  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2580  *
2581  * Returns 0 on success, negative on failure
2582  **/
2583 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2584 {
2585         struct device *dev = rx_ring->dev;
2586         int size, desc_len;
2587
2588         size = sizeof(struct igb_buffer) * rx_ring->count;
2589         rx_ring->buffer_info = vzalloc(size);
2590         if (!rx_ring->buffer_info)
2591                 goto err;
2592
2593         desc_len = sizeof(union e1000_adv_rx_desc);
2594
2595         /* Round up to nearest 4K */
2596         rx_ring->size = rx_ring->count * desc_len;
2597         rx_ring->size = ALIGN(rx_ring->size, 4096);
2598
2599         rx_ring->desc = dma_alloc_coherent(dev,
2600                                            rx_ring->size,
2601                                            &rx_ring->dma,
2602                                            GFP_KERNEL);
2603
2604         if (!rx_ring->desc)
2605                 goto err;
2606
2607         rx_ring->next_to_clean = 0;
2608         rx_ring->next_to_use = 0;
2609
2610         return 0;
2611
2612 err:
2613         vfree(rx_ring->buffer_info);
2614         rx_ring->buffer_info = NULL;
2615         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2616                 " ring\n");
2617         return -ENOMEM;
2618 }
2619
2620 /**
2621  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2622  *                                (Descriptors) for all queues
2623  * @adapter: board private structure
2624  *
2625  * Return 0 on success, negative on failure
2626  **/
2627 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2628 {
2629         struct pci_dev *pdev = adapter->pdev;
2630         int i, err = 0;
2631
2632         for (i = 0; i < adapter->num_rx_queues; i++) {
2633                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2634                 if (err) {
2635                         dev_err(&pdev->dev,
2636                                 "Allocation for Rx Queue %u failed\n", i);
2637                         for (i--; i >= 0; i--)
2638                                 igb_free_rx_resources(adapter->rx_ring[i]);
2639                         break;
2640                 }
2641         }
2642
2643         return err;
2644 }
2645
2646 /**
2647  * igb_setup_mrqc - configure the multiple receive queue control registers
2648  * @adapter: Board private structure
2649  **/
2650 static void igb_setup_mrqc(struct igb_adapter *adapter)
2651 {
2652         struct e1000_hw *hw = &adapter->hw;
2653         u32 mrqc, rxcsum;
2654         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2655         union e1000_reta {
2656                 u32 dword;
2657                 u8  bytes[4];
2658         } reta;
2659         static const u8 rsshash[40] = {
2660                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2661                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2662                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2663                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2664
2665         /* Fill out hash function seeds */
2666         for (j = 0; j < 10; j++) {
2667                 u32 rsskey = rsshash[(j * 4)];
2668                 rsskey |= rsshash[(j * 4) + 1] << 8;
2669                 rsskey |= rsshash[(j * 4) + 2] << 16;
2670                 rsskey |= rsshash[(j * 4) + 3] << 24;
2671                 array_wr32(E1000_RSSRK(0), j, rsskey);
2672         }
2673
2674         num_rx_queues = adapter->rss_queues;
2675
2676         if (adapter->vfs_allocated_count) {
2677                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2678                 switch (hw->mac.type) {
2679                 case e1000_i350:
2680                 case e1000_82580:
2681                         num_rx_queues = 1;
2682                         shift = 0;
2683                         break;
2684                 case e1000_82576:
2685                         shift = 3;
2686                         num_rx_queues = 2;
2687                         break;
2688                 case e1000_82575:
2689                         shift = 2;
2690                         shift2 = 6;
2691                 default:
2692                         break;
2693                 }
2694         } else {
2695                 if (hw->mac.type == e1000_82575)
2696                         shift = 6;
2697         }
2698
2699         for (j = 0; j < (32 * 4); j++) {
2700                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2701                 if (shift2)
2702                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2703                 if ((j & 3) == 3)
2704                         wr32(E1000_RETA(j >> 2), reta.dword);
2705         }
2706
2707         /*
2708          * Disable raw packet checksumming so that RSS hash is placed in
2709          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2710          * offloads as they are enabled by default
2711          */
2712         rxcsum = rd32(E1000_RXCSUM);
2713         rxcsum |= E1000_RXCSUM_PCSD;
2714
2715         if (adapter->hw.mac.type >= e1000_82576)
2716                 /* Enable Receive Checksum Offload for SCTP */
2717                 rxcsum |= E1000_RXCSUM_CRCOFL;
2718
2719         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2720         wr32(E1000_RXCSUM, rxcsum);
2721
2722         /* If VMDq is enabled then we set the appropriate mode for that, else
2723          * we default to RSS so that an RSS hash is calculated per packet even
2724          * if we are only using one queue */
2725         if (adapter->vfs_allocated_count) {
2726                 if (hw->mac.type > e1000_82575) {
2727                         /* Set the default pool for the PF's first queue */
2728                         u32 vtctl = rd32(E1000_VT_CTL);
2729                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2730                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2731                         vtctl |= adapter->vfs_allocated_count <<
2732                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2733                         wr32(E1000_VT_CTL, vtctl);
2734                 }
2735                 if (adapter->rss_queues > 1)
2736                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2737                 else
2738                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2739         } else {
2740                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2741         }
2742         igb_vmm_control(adapter);
2743
2744         /*
2745          * Generate RSS hash based on TCP port numbers and/or
2746          * IPv4/v6 src and dst addresses since UDP cannot be
2747          * hashed reliably due to IP fragmentation
2748          */
2749         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2750                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2751                 E1000_MRQC_RSS_FIELD_IPV6 |
2752                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2753                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2754
2755         wr32(E1000_MRQC, mrqc);
2756 }
2757
2758 /**
2759  * igb_setup_rctl - configure the receive control registers
2760  * @adapter: Board private structure
2761  **/
2762 void igb_setup_rctl(struct igb_adapter *adapter)
2763 {
2764         struct e1000_hw *hw = &adapter->hw;
2765         u32 rctl;
2766
2767         rctl = rd32(E1000_RCTL);
2768
2769         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2770         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2771
2772         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2773                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2774
2775         /*
2776          * enable stripping of CRC. It's unlikely this will break BMC
2777          * redirection as it did with e1000. Newer features require
2778          * that the HW strips the CRC.
2779          */
2780         rctl |= E1000_RCTL_SECRC;
2781
2782         /* disable store bad packets and clear size bits. */
2783         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2784
2785         /* enable LPE to prevent packets larger than max_frame_size */
2786         rctl |= E1000_RCTL_LPE;
2787
2788         /* disable queue 0 to prevent tail write w/o re-config */
2789         wr32(E1000_RXDCTL(0), 0);
2790
2791         /* Attention!!!  For SR-IOV PF driver operations you must enable
2792          * queue drop for all VF and PF queues to prevent head of line blocking
2793          * if an un-trusted VF does not provide descriptors to hardware.
2794          */
2795         if (adapter->vfs_allocated_count) {
2796                 /* set all queue drop enable bits */
2797                 wr32(E1000_QDE, ALL_QUEUES);
2798         }
2799
2800         wr32(E1000_RCTL, rctl);
2801 }
2802
2803 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2804                                    int vfn)
2805 {
2806         struct e1000_hw *hw = &adapter->hw;
2807         u32 vmolr;
2808
2809         /* if it isn't the PF check to see if VFs are enabled and
2810          * increase the size to support vlan tags */
2811         if (vfn < adapter->vfs_allocated_count &&
2812             adapter->vf_data[vfn].vlans_enabled)
2813                 size += VLAN_TAG_SIZE;
2814
2815         vmolr = rd32(E1000_VMOLR(vfn));
2816         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2817         vmolr |= size | E1000_VMOLR_LPE;
2818         wr32(E1000_VMOLR(vfn), vmolr);
2819
2820         return 0;
2821 }
2822
2823 /**
2824  * igb_rlpml_set - set maximum receive packet size
2825  * @adapter: board private structure
2826  *
2827  * Configure maximum receivable packet size.
2828  **/
2829 static void igb_rlpml_set(struct igb_adapter *adapter)
2830 {
2831         u32 max_frame_size = adapter->max_frame_size;
2832         struct e1000_hw *hw = &adapter->hw;
2833         u16 pf_id = adapter->vfs_allocated_count;
2834
2835         if (adapter->vlgrp)
2836                 max_frame_size += VLAN_TAG_SIZE;
2837
2838         /* if vfs are enabled we set RLPML to the largest possible request
2839          * size and set the VMOLR RLPML to the size we need */
2840         if (pf_id) {
2841                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2842                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2843         }
2844
2845         wr32(E1000_RLPML, max_frame_size);
2846 }
2847
2848 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2849                                  int vfn, bool aupe)
2850 {
2851         struct e1000_hw *hw = &adapter->hw;
2852         u32 vmolr;
2853
2854         /*
2855          * This register exists only on 82576 and newer so if we are older then
2856          * we should exit and do nothing
2857          */
2858         if (hw->mac.type < e1000_82576)
2859                 return;
2860
2861         vmolr = rd32(E1000_VMOLR(vfn));
2862         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2863         if (aupe)
2864                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2865         else
2866                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2867
2868         /* clear all bits that might not be set */
2869         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2870
2871         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2872                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2873         /*
2874          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2875          * multicast packets
2876          */
2877         if (vfn <= adapter->vfs_allocated_count)
2878                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2879
2880         wr32(E1000_VMOLR(vfn), vmolr);
2881 }
2882
2883 /**
2884  * igb_configure_rx_ring - Configure a receive ring after Reset
2885  * @adapter: board private structure
2886  * @ring: receive ring to be configured
2887  *
2888  * Configure the Rx unit of the MAC after a reset.
2889  **/
2890 void igb_configure_rx_ring(struct igb_adapter *adapter,
2891                            struct igb_ring *ring)
2892 {
2893         struct e1000_hw *hw = &adapter->hw;
2894         u64 rdba = ring->dma;
2895         int reg_idx = ring->reg_idx;
2896         u32 srrctl, rxdctl;
2897
2898         /* disable the queue */
2899         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2900         wr32(E1000_RXDCTL(reg_idx),
2901                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2902
2903         /* Set DMA base address registers */
2904         wr32(E1000_RDBAL(reg_idx),
2905              rdba & 0x00000000ffffffffULL);
2906         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2907         wr32(E1000_RDLEN(reg_idx),
2908                        ring->count * sizeof(union e1000_adv_rx_desc));
2909
2910         /* initialize head and tail */
2911         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2912         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2913         writel(0, ring->head);
2914         writel(0, ring->tail);
2915
2916         /* set descriptor configuration */
2917         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2918                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2919                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2920 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2921                 srrctl |= IGB_RXBUFFER_16384 >>
2922                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2923 #else
2924                 srrctl |= (PAGE_SIZE / 2) >>
2925                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2926 #endif
2927                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2928         } else {
2929                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2930                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2931                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2932         }
2933         if (hw->mac.type == e1000_82580)
2934                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2935         /* Only set Drop Enable if we are supporting multiple queues */
2936         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2937                 srrctl |= E1000_SRRCTL_DROP_EN;
2938
2939         wr32(E1000_SRRCTL(reg_idx), srrctl);
2940
2941         /* set filtering for VMDQ pools */
2942         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2943
2944         /* enable receive descriptor fetching */
2945         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2946         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2947         rxdctl &= 0xFFF00000;
2948         rxdctl |= IGB_RX_PTHRESH;
2949         rxdctl |= IGB_RX_HTHRESH << 8;
2950         rxdctl |= IGB_RX_WTHRESH << 16;
2951         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2952 }
2953
2954 /**
2955  * igb_configure_rx - Configure receive Unit after Reset
2956  * @adapter: board private structure
2957  *
2958  * Configure the Rx unit of the MAC after a reset.
2959  **/
2960 static void igb_configure_rx(struct igb_adapter *adapter)
2961 {
2962         int i;
2963
2964         /* set UTA to appropriate mode */
2965         igb_set_uta(adapter);
2966
2967         /* set the correct pool for the PF default MAC address in entry 0 */
2968         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2969                          adapter->vfs_allocated_count);
2970
2971         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2972          * the Base and Length of the Rx Descriptor Ring */
2973         for (i = 0; i < adapter->num_rx_queues; i++)
2974                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2975 }
2976
2977 /**
2978  * igb_free_tx_resources - Free Tx Resources per Queue
2979  * @tx_ring: Tx descriptor ring for a specific queue
2980  *
2981  * Free all transmit software resources
2982  **/
2983 void igb_free_tx_resources(struct igb_ring *tx_ring)
2984 {
2985         igb_clean_tx_ring(tx_ring);
2986
2987         vfree(tx_ring->buffer_info);
2988         tx_ring->buffer_info = NULL;
2989
2990         /* if not set, then don't free */
2991         if (!tx_ring->desc)
2992                 return;
2993
2994         dma_free_coherent(tx_ring->dev, tx_ring->size,
2995                           tx_ring->desc, tx_ring->dma);
2996
2997         tx_ring->desc = NULL;
2998 }
2999
3000 /**
3001  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3002  * @adapter: board private structure
3003  *
3004  * Free all transmit software resources
3005  **/
3006 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3007 {
3008         int i;
3009
3010         for (i = 0; i < adapter->num_tx_queues; i++)
3011                 igb_free_tx_resources(adapter->tx_ring[i]);
3012 }
3013
3014 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3015                                     struct igb_buffer *buffer_info)
3016 {
3017         if (buffer_info->dma) {
3018                 if (buffer_info->mapped_as_page)
3019                         dma_unmap_page(tx_ring->dev,
3020                                         buffer_info->dma,
3021                                         buffer_info->length,
3022                                         DMA_TO_DEVICE);
3023                 else
3024                         dma_unmap_single(tx_ring->dev,
3025                                         buffer_info->dma,
3026                                         buffer_info->length,
3027                                         DMA_TO_DEVICE);
3028                 buffer_info->dma = 0;
3029         }
3030         if (buffer_info->skb) {
3031                 dev_kfree_skb_any(buffer_info->skb);
3032                 buffer_info->skb = NULL;
3033         }
3034         buffer_info->time_stamp = 0;
3035         buffer_info->length = 0;
3036         buffer_info->next_to_watch = 0;
3037         buffer_info->mapped_as_page = false;
3038 }
3039
3040 /**
3041  * igb_clean_tx_ring - Free Tx Buffers
3042  * @tx_ring: ring to be cleaned
3043  **/
3044 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3045 {
3046         struct igb_buffer *buffer_info;
3047         unsigned long size;
3048         unsigned int i;
3049
3050         if (!tx_ring->buffer_info)
3051                 return;
3052         /* Free all the Tx ring sk_buffs */
3053
3054         for (i = 0; i < tx_ring->count; i++) {
3055                 buffer_info = &tx_ring->buffer_info[i];
3056                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3057         }
3058
3059         size = sizeof(struct igb_buffer) * tx_ring->count;
3060         memset(tx_ring->buffer_info, 0, size);
3061
3062         /* Zero out the descriptor ring */
3063         memset(tx_ring->desc, 0, tx_ring->size);
3064
3065         tx_ring->next_to_use = 0;
3066         tx_ring->next_to_clean = 0;
3067 }
3068
3069 /**
3070  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3071  * @adapter: board private structure
3072  **/
3073 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3074 {
3075         int i;
3076
3077         for (i = 0; i < adapter->num_tx_queues; i++)
3078                 igb_clean_tx_ring(adapter->tx_ring[i]);
3079 }
3080
3081 /**
3082  * igb_free_rx_resources - Free Rx Resources
3083  * @rx_ring: ring to clean the resources from
3084  *
3085  * Free all receive software resources
3086  **/
3087 void igb_free_rx_resources(struct igb_ring *rx_ring)
3088 {
3089         igb_clean_rx_ring(rx_ring);
3090
3091         vfree(rx_ring->buffer_info);
3092         rx_ring->buffer_info = NULL;
3093
3094         /* if not set, then don't free */
3095         if (!rx_ring->desc)
3096                 return;
3097
3098         dma_free_coherent(rx_ring->dev, rx_ring->size,
3099                           rx_ring->desc, rx_ring->dma);
3100
3101         rx_ring->desc = NULL;
3102 }
3103
3104 /**
3105  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3106  * @adapter: board private structure
3107  *
3108  * Free all receive software resources
3109  **/
3110 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3111 {
3112         int i;
3113
3114         for (i = 0; i < adapter->num_rx_queues; i++)
3115                 igb_free_rx_resources(adapter->rx_ring[i]);
3116 }
3117
3118 /**
3119  * igb_clean_rx_ring - Free Rx Buffers per Queue
3120  * @rx_ring: ring to free buffers from
3121  **/
3122 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3123 {
3124         struct igb_buffer *buffer_info;
3125         unsigned long size;
3126         unsigned int i;
3127
3128         if (!rx_ring->buffer_info)
3129                 return;
3130
3131         /* Free all the Rx ring sk_buffs */
3132         for (i = 0; i < rx_ring->count; i++) {
3133                 buffer_info = &rx_ring->buffer_info[i];
3134                 if (buffer_info->dma) {
3135                         dma_unmap_single(rx_ring->dev,
3136                                          buffer_info->dma,
3137                                          rx_ring->rx_buffer_len,
3138                                          DMA_FROM_DEVICE);
3139                         buffer_info->dma = 0;
3140                 }
3141
3142                 if (buffer_info->skb) {
3143                         dev_kfree_skb(buffer_info->skb);
3144                         buffer_info->skb = NULL;
3145                 }
3146                 if (buffer_info->page_dma) {
3147                         dma_unmap_page(rx_ring->dev,
3148                                        buffer_info->page_dma,
3149                                        PAGE_SIZE / 2,
3150                                        DMA_FROM_DEVICE);
3151                         buffer_info->page_dma = 0;
3152                 }
3153                 if (buffer_info->page) {
3154                         put_page(buffer_info->page);
3155                         buffer_info->page = NULL;
3156                         buffer_info->page_offset = 0;
3157                 }
3158         }
3159
3160         size = sizeof(struct igb_buffer) * rx_ring->count;
3161         memset(rx_ring->buffer_info, 0, size);
3162
3163         /* Zero out the descriptor ring */
3164         memset(rx_ring->desc, 0, rx_ring->size);
3165
3166         rx_ring->next_to_clean = 0;
3167         rx_ring->next_to_use = 0;
3168 }
3169
3170 /**
3171  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3172  * @adapter: board private structure
3173  **/
3174 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3175 {
3176         int i;
3177
3178         for (i = 0; i < adapter->num_rx_queues; i++)
3179                 igb_clean_rx_ring(adapter->rx_ring[i]);
3180 }
3181
3182 /**
3183  * igb_set_mac - Change the Ethernet Address of the NIC
3184  * @netdev: network interface device structure
3185  * @p: pointer to an address structure
3186  *
3187  * Returns 0 on success, negative on failure
3188  **/
3189 static int igb_set_mac(struct net_device *netdev, void *p)
3190 {
3191         struct igb_adapter *adapter = netdev_priv(netdev);
3192         struct e1000_hw *hw = &adapter->hw;
3193         struct sockaddr *addr = p;
3194
3195         if (!is_valid_ether_addr(addr->sa_data))
3196                 return -EADDRNOTAVAIL;
3197
3198         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3199         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3200
3201         /* set the correct pool for the new PF MAC address in entry 0 */
3202         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3203                          adapter->vfs_allocated_count);
3204
3205         return 0;
3206 }
3207
3208 /**
3209  * igb_write_mc_addr_list - write multicast addresses to MTA
3210  * @netdev: network interface device structure
3211  *
3212  * Writes multicast address list to the MTA hash table.
3213  * Returns: -ENOMEM on failure
3214  *                0 on no addresses written
3215  *                X on writing X addresses to MTA
3216  **/
3217 static int igb_write_mc_addr_list(struct net_device *netdev)
3218 {
3219         struct igb_adapter *adapter = netdev_priv(netdev);
3220         struct e1000_hw *hw = &adapter->hw;
3221         struct netdev_hw_addr *ha;
3222         u8  *mta_list;
3223         int i;
3224
3225         if (netdev_mc_empty(netdev)) {
3226                 /* nothing to program, so clear mc list */
3227                 igb_update_mc_addr_list(hw, NULL, 0);
3228                 igb_restore_vf_multicasts(adapter);
3229                 return 0;
3230         }
3231
3232         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3233         if (!mta_list)
3234                 return -ENOMEM;
3235
3236         /* The shared function expects a packed array of only addresses. */
3237         i = 0;
3238         netdev_for_each_mc_addr(ha, netdev)
3239                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3240
3241         igb_update_mc_addr_list(hw, mta_list, i);
3242         kfree(mta_list);
3243
3244         return netdev_mc_count(netdev);
3245 }
3246
3247 /**
3248  * igb_write_uc_addr_list - write unicast addresses to RAR table
3249  * @netdev: network interface device structure
3250  *
3251  * Writes unicast address list to the RAR table.
3252  * Returns: -ENOMEM on failure/insufficient address space
3253  *                0 on no addresses written
3254  *                X on writing X addresses to the RAR table
3255  **/
3256 static int igb_write_uc_addr_list(struct net_device *netdev)
3257 {
3258         struct igb_adapter *adapter = netdev_priv(netdev);
3259         struct e1000_hw *hw = &adapter->hw;
3260         unsigned int vfn = adapter->vfs_allocated_count;
3261         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3262         int count = 0;
3263
3264         /* return ENOMEM indicating insufficient memory for addresses */
3265         if (netdev_uc_count(netdev) > rar_entries)
3266                 return -ENOMEM;
3267
3268         if (!netdev_uc_empty(netdev) && rar_entries) {
3269                 struct netdev_hw_addr *ha;
3270
3271                 netdev_for_each_uc_addr(ha, netdev) {
3272                         if (!rar_entries)
3273                                 break;
3274                         igb_rar_set_qsel(adapter, ha->addr,
3275                                          rar_entries--,
3276                                          vfn);
3277                         count++;
3278                 }
3279         }
3280         /* write the addresses in reverse order to avoid write combining */
3281         for (; rar_entries > 0 ; rar_entries--) {
3282                 wr32(E1000_RAH(rar_entries), 0);
3283                 wr32(E1000_RAL(rar_entries), 0);
3284         }
3285         wrfl();
3286
3287         return count;
3288 }
3289
3290 /**
3291  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3292  * @netdev: network interface device structure
3293  *
3294  * The set_rx_mode entry point is called whenever the unicast or multicast
3295  * address lists or the network interface flags are updated.  This routine is
3296  * responsible for configuring the hardware for proper unicast, multicast,
3297  * promiscuous mode, and all-multi behavior.
3298  **/
3299 static void igb_set_rx_mode(struct net_device *netdev)
3300 {
3301         struct igb_adapter *adapter = netdev_priv(netdev);
3302         struct e1000_hw *hw = &adapter->hw;
3303         unsigned int vfn = adapter->vfs_allocated_count;
3304         u32 rctl, vmolr = 0;
3305         int count;
3306
3307         /* Check for Promiscuous and All Multicast modes */
3308         rctl = rd32(E1000_RCTL);
3309
3310         /* clear the effected bits */
3311         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3312
3313         if (netdev->flags & IFF_PROMISC) {
3314                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3315                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3316         } else {
3317                 if (netdev->flags & IFF_ALLMULTI) {
3318                         rctl |= E1000_RCTL_MPE;
3319                         vmolr |= E1000_VMOLR_MPME;
3320                 } else {
3321                         /*
3322                          * Write addresses to the MTA, if the attempt fails
3323                          * then we should just turn on promiscous mode so
3324                          * that we can at least receive multicast traffic
3325                          */
3326                         count = igb_write_mc_addr_list(netdev);
3327                         if (count < 0) {
3328                                 rctl |= E1000_RCTL_MPE;
3329                                 vmolr |= E1000_VMOLR_MPME;
3330                         } else if (count) {
3331                                 vmolr |= E1000_VMOLR_ROMPE;
3332                         }
3333                 }
3334                 /*
3335                  * Write addresses to available RAR registers, if there is not
3336                  * sufficient space to store all the addresses then enable
3337                  * unicast promiscous mode
3338                  */
3339                 count = igb_write_uc_addr_list(netdev);
3340                 if (count < 0) {
3341                         rctl |= E1000_RCTL_UPE;
3342                         vmolr |= E1000_VMOLR_ROPE;
3343                 }
3344                 rctl |= E1000_RCTL_VFE;
3345         }
3346         wr32(E1000_RCTL, rctl);
3347
3348         /*
3349          * In order to support SR-IOV and eventually VMDq it is necessary to set
3350          * the VMOLR to enable the appropriate modes.  Without this workaround
3351          * we will have issues with VLAN tag stripping not being done for frames
3352          * that are only arriving because we are the default pool
3353          */
3354         if (hw->mac.type < e1000_82576)
3355                 return;
3356
3357         vmolr |= rd32(E1000_VMOLR(vfn)) &
3358                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3359         wr32(E1000_VMOLR(vfn), vmolr);
3360         igb_restore_vf_multicasts(adapter);
3361 }
3362
3363 /* Need to wait a few seconds after link up to get diagnostic information from
3364  * the phy */
3365 static void igb_update_phy_info(unsigned long data)
3366 {
3367         struct igb_adapter *adapter = (struct igb_adapter *) data;
3368         igb_get_phy_info(&adapter->hw);
3369 }
3370
3371 /**
3372  * igb_has_link - check shared code for link and determine up/down
3373  * @adapter: pointer to driver private info
3374  **/
3375 bool igb_has_link(struct igb_adapter *adapter)
3376 {
3377         struct e1000_hw *hw = &adapter->hw;
3378         bool link_active = false;
3379         s32 ret_val = 0;
3380
3381         /* get_link_status is set on LSC (link status) interrupt or
3382          * rx sequence error interrupt.  get_link_status will stay
3383          * false until the e1000_check_for_link establishes link
3384          * for copper adapters ONLY
3385          */
3386         switch (hw->phy.media_type) {
3387         case e1000_media_type_copper:
3388                 if (hw->mac.get_link_status) {
3389                         ret_val = hw->mac.ops.check_for_link(hw);
3390                         link_active = !hw->mac.get_link_status;
3391                 } else {
3392                         link_active = true;
3393                 }
3394                 break;
3395         case e1000_media_type_internal_serdes:
3396                 ret_val = hw->mac.ops.check_for_link(hw);
3397                 link_active = hw->mac.serdes_has_link;
3398                 break;
3399         default:
3400         case e1000_media_type_unknown:
3401                 break;
3402         }
3403
3404         return link_active;
3405 }
3406
3407 /**
3408  * igb_watchdog - Timer Call-back
3409  * @data: pointer to adapter cast into an unsigned long
3410  **/
3411 static void igb_watchdog(unsigned long data)
3412 {
3413         struct igb_adapter *adapter = (struct igb_adapter *)data;
3414         /* Do the rest outside of interrupt context */
3415         schedule_work(&adapter->watchdog_task);
3416 }
3417
3418 static void igb_watchdog_task(struct work_struct *work)
3419 {
3420         struct igb_adapter *adapter = container_of(work,
3421                                                    struct igb_adapter,
3422                                                    watchdog_task);
3423         struct e1000_hw *hw = &adapter->hw;
3424         struct net_device *netdev = adapter->netdev;
3425         u32 link;
3426         int i;
3427
3428         link = igb_has_link(adapter);
3429         if (link) {
3430                 if (!netif_carrier_ok(netdev)) {
3431                         u32 ctrl;
3432                         hw->mac.ops.get_speed_and_duplex(hw,
3433                                                          &adapter->link_speed,
3434                                                          &adapter->link_duplex);
3435
3436                         ctrl = rd32(E1000_CTRL);
3437                         /* Links status message must follow this format */
3438                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3439                                  "Flow Control: %s\n",
3440                                netdev->name,
3441                                adapter->link_speed,
3442                                adapter->link_duplex == FULL_DUPLEX ?
3443                                  "Full Duplex" : "Half Duplex",
3444                                ((ctrl & E1000_CTRL_TFCE) &&
3445                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3446                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3447                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3448
3449                         /* adjust timeout factor according to speed/duplex */
3450                         adapter->tx_timeout_factor = 1;
3451                         switch (adapter->link_speed) {
3452                         case SPEED_10:
3453                                 adapter->tx_timeout_factor = 14;
3454                                 break;
3455                         case SPEED_100:
3456                                 /* maybe add some timeout factor ? */
3457                                 break;
3458                         }
3459
3460                         netif_carrier_on(netdev);
3461
3462                         igb_ping_all_vfs(adapter);
3463
3464                         /* link state has changed, schedule phy info update */
3465                         if (!test_bit(__IGB_DOWN, &adapter->state))
3466                                 mod_timer(&adapter->phy_info_timer,
3467                                           round_jiffies(jiffies + 2 * HZ));
3468                 }
3469         } else {
3470                 if (netif_carrier_ok(netdev)) {
3471                         adapter->link_speed = 0;
3472                         adapter->link_duplex = 0;
3473                         /* Links status message must follow this format */
3474                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3475                                netdev->name);
3476                         netif_carrier_off(netdev);
3477
3478                         igb_ping_all_vfs(adapter);
3479
3480                         /* link state has changed, schedule phy info update */
3481                         if (!test_bit(__IGB_DOWN, &adapter->state))
3482                                 mod_timer(&adapter->phy_info_timer,
3483                                           round_jiffies(jiffies + 2 * HZ));
3484                 }
3485         }
3486
3487         spin_lock(&adapter->stats64_lock);
3488         igb_update_stats(adapter, &adapter->stats64);
3489         spin_unlock(&adapter->stats64_lock);
3490
3491         for (i = 0; i < adapter->num_tx_queues; i++) {
3492                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3493                 if (!netif_carrier_ok(netdev)) {
3494                         /* We've lost link, so the controller stops DMA,
3495                          * but we've got queued Tx work that's never going
3496                          * to get done, so reset controller to flush Tx.
3497                          * (Do the reset outside of interrupt context). */
3498                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3499                                 adapter->tx_timeout_count++;
3500                                 schedule_work(&adapter->reset_task);
3501                                 /* return immediately since reset is imminent */
3502                                 return;
3503                         }
3504                 }
3505
3506                 /* Force detection of hung controller every watchdog period */
3507                 tx_ring->detect_tx_hung = true;
3508         }
3509
3510         /* Cause software interrupt to ensure rx ring is cleaned */
3511         if (adapter->msix_entries) {
3512                 u32 eics = 0;
3513                 for (i = 0; i < adapter->num_q_vectors; i++) {
3514                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3515                         eics |= q_vector->eims_value;
3516                 }
3517                 wr32(E1000_EICS, eics);
3518         } else {
3519                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3520         }
3521
3522         /* Reset the timer */
3523         if (!test_bit(__IGB_DOWN, &adapter->state))
3524                 mod_timer(&adapter->watchdog_timer,
3525                           round_jiffies(jiffies + 2 * HZ));
3526 }
3527
3528 enum latency_range {
3529         lowest_latency = 0,
3530         low_latency = 1,
3531         bulk_latency = 2,
3532         latency_invalid = 255
3533 };
3534
3535 /**
3536  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3537  *
3538  *      Stores a new ITR value based on strictly on packet size.  This
3539  *      algorithm is less sophisticated than that used in igb_update_itr,
3540  *      due to the difficulty of synchronizing statistics across multiple
3541  *      receive rings.  The divisors and thresholds used by this function
3542  *      were determined based on theoretical maximum wire speed and testing
3543  *      data, in order to minimize response time while increasing bulk
3544  *      throughput.
3545  *      This functionality is controlled by the InterruptThrottleRate module
3546  *      parameter (see igb_param.c)
3547  *      NOTE:  This function is called only when operating in a multiqueue
3548  *             receive environment.
3549  * @q_vector: pointer to q_vector
3550  **/
3551 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3552 {
3553         int new_val = q_vector->itr_val;
3554         int avg_wire_size = 0;
3555         struct igb_adapter *adapter = q_vector->adapter;
3556         struct igb_ring *ring;
3557         unsigned int packets;
3558
3559         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3560          * ints/sec - ITR timer value of 120 ticks.
3561          */
3562         if (adapter->link_speed != SPEED_1000) {
3563                 new_val = 976;
3564                 goto set_itr_val;
3565         }
3566
3567         ring = q_vector->rx_ring;
3568         if (ring) {
3569                 packets = ACCESS_ONCE(ring->total_packets);
3570
3571                 if (packets)
3572                         avg_wire_size = ring->total_bytes / packets;
3573         }
3574
3575         ring = q_vector->tx_ring;
3576         if (ring) {
3577                 packets = ACCESS_ONCE(ring->total_packets);
3578
3579                 if (packets)
3580                         avg_wire_size = max_t(u32, avg_wire_size,
3581                                               ring->total_bytes / packets);
3582         }
3583
3584         /* if avg_wire_size isn't set no work was done */
3585         if (!avg_wire_size)
3586                 goto clear_counts;
3587
3588         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3589         avg_wire_size += 24;
3590
3591         /* Don't starve jumbo frames */
3592         avg_wire_size = min(avg_wire_size, 3000);
3593
3594         /* Give a little boost to mid-size frames */
3595         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3596                 new_val = avg_wire_size / 3;
3597         else
3598                 new_val = avg_wire_size / 2;
3599
3600         /* when in itr mode 3 do not exceed 20K ints/sec */
3601         if (adapter->rx_itr_setting == 3 && new_val < 196)
3602                 new_val = 196;
3603
3604 set_itr_val:
3605         if (new_val != q_vector->itr_val) {
3606                 q_vector->itr_val = new_val;
3607                 q_vector->set_itr = 1;
3608         }
3609 clear_counts:
3610         if (q_vector->rx_ring) {
3611                 q_vector->rx_ring->total_bytes = 0;
3612                 q_vector->rx_ring->total_packets = 0;
3613         }
3614         if (q_vector->tx_ring) {
3615                 q_vector->tx_ring->total_bytes = 0;
3616                 q_vector->tx_ring->total_packets = 0;
3617         }
3618 }
3619
3620 /**
3621  * igb_update_itr - update the dynamic ITR value based on statistics
3622  *      Stores a new ITR value based on packets and byte
3623  *      counts during the last interrupt.  The advantage of per interrupt
3624  *      computation is faster updates and more accurate ITR for the current
3625  *      traffic pattern.  Constants in this function were computed
3626  *      based on theoretical maximum wire speed and thresholds were set based
3627  *      on testing data as well as attempting to minimize response time
3628  *      while increasing bulk throughput.
3629  *      this functionality is controlled by the InterruptThrottleRate module
3630  *      parameter (see igb_param.c)
3631  *      NOTE:  These calculations are only valid when operating in a single-
3632  *             queue environment.
3633  * @adapter: pointer to adapter
3634  * @itr_setting: current q_vector->itr_val
3635  * @packets: the number of packets during this measurement interval
3636  * @bytes: the number of bytes during this measurement interval
3637  **/
3638 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3639                                    int packets, int bytes)
3640 {
3641         unsigned int retval = itr_setting;
3642
3643         if (packets == 0)
3644                 goto update_itr_done;
3645
3646         switch (itr_setting) {
3647         case lowest_latency:
3648                 /* handle TSO and jumbo frames */
3649                 if (bytes/packets > 8000)
3650                         retval = bulk_latency;
3651                 else if ((packets < 5) && (bytes > 512))
3652                         retval = low_latency;
3653                 break;
3654         case low_latency:  /* 50 usec aka 20000 ints/s */
3655                 if (bytes > 10000) {
3656                         /* this if handles the TSO accounting */
3657                         if (bytes/packets > 8000) {
3658                                 retval = bulk_latency;
3659                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3660                                 retval = bulk_latency;
3661                         } else if ((packets > 35)) {
3662                                 retval = lowest_latency;
3663                         }
3664                 } else if (bytes/packets > 2000) {
3665                         retval = bulk_latency;
3666                 } else if (packets <= 2 && bytes < 512) {
3667                         retval = lowest_latency;
3668                 }
3669                 break;
3670         case bulk_latency: /* 250 usec aka 4000 ints/s */
3671                 if (bytes > 25000) {
3672                         if (packets > 35)
3673                                 retval = low_latency;
3674                 } else if (bytes < 1500) {
3675                         retval = low_latency;
3676                 }
3677                 break;
3678         }
3679
3680 update_itr_done:
3681         return retval;
3682 }
3683
3684 static void igb_set_itr(struct igb_adapter *adapter)
3685 {
3686         struct igb_q_vector *q_vector = adapter->q_vector[0];
3687         u16 current_itr;
3688         u32 new_itr = q_vector->itr_val;
3689
3690         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3691         if (adapter->link_speed != SPEED_1000) {
3692                 current_itr = 0;
3693                 new_itr = 4000;
3694                 goto set_itr_now;
3695         }
3696
3697         adapter->rx_itr = igb_update_itr(adapter,
3698                                     adapter->rx_itr,
3699                                     q_vector->rx_ring->total_packets,
3700                                     q_vector->rx_ring->total_bytes);
3701
3702         adapter->tx_itr = igb_update_itr(adapter,
3703                                     adapter->tx_itr,
3704                                     q_vector->tx_ring->total_packets,
3705                                     q_vector->tx_ring->total_bytes);
3706         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3707
3708         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3709         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3710                 current_itr = low_latency;
3711
3712         switch (current_itr) {
3713         /* counts and packets in update_itr are dependent on these numbers */
3714         case lowest_latency:
3715                 new_itr = 56;  /* aka 70,000 ints/sec */
3716                 break;
3717         case low_latency:
3718                 new_itr = 196; /* aka 20,000 ints/sec */
3719                 break;
3720         case bulk_latency:
3721                 new_itr = 980; /* aka 4,000 ints/sec */
3722                 break;
3723         default:
3724                 break;
3725         }
3726
3727 set_itr_now:
3728         q_vector->rx_ring->total_bytes = 0;
3729         q_vector->rx_ring->total_packets = 0;
3730         q_vector->tx_ring->total_bytes = 0;
3731         q_vector->tx_ring->total_packets = 0;
3732
3733         if (new_itr != q_vector->itr_val) {
3734                 /* this attempts to bias the interrupt rate towards Bulk
3735                  * by adding intermediate steps when interrupt rate is
3736                  * increasing */
3737                 new_itr = new_itr > q_vector->itr_val ?
3738                              max((new_itr * q_vector->itr_val) /
3739                                  (new_itr + (q_vector->itr_val >> 2)),
3740                                  new_itr) :
3741                              new_itr;
3742                 /* Don't write the value here; it resets the adapter's
3743                  * internal timer, and causes us to delay far longer than
3744                  * we should between interrupts.  Instead, we write the ITR
3745                  * value at the beginning of the next interrupt so the timing
3746                  * ends up being correct.
3747                  */
3748                 q_vector->itr_val = new_itr;
3749                 q_vector->set_itr = 1;
3750         }
3751 }
3752
3753 #define IGB_TX_FLAGS_CSUM               0x00000001
3754 #define IGB_TX_FLAGS_VLAN               0x00000002
3755 #define IGB_TX_FLAGS_TSO                0x00000004
3756 #define IGB_TX_FLAGS_IPV4               0x00000008
3757 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3758 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3759 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3760
3761 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3762                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3763 {
3764         struct e1000_adv_tx_context_desc *context_desc;
3765         unsigned int i;
3766         int err;
3767         struct igb_buffer *buffer_info;
3768         u32 info = 0, tu_cmd = 0;
3769         u32 mss_l4len_idx;
3770         u8 l4len;
3771
3772         if (skb_header_cloned(skb)) {
3773                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3774                 if (err)
3775                         return err;
3776         }
3777
3778         l4len = tcp_hdrlen(skb);
3779         *hdr_len += l4len;
3780
3781         if (skb->protocol == htons(ETH_P_IP)) {
3782                 struct iphdr *iph = ip_hdr(skb);
3783                 iph->tot_len = 0;
3784                 iph->check = 0;
3785                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3786                                                          iph->daddr, 0,
3787                                                          IPPROTO_TCP,
3788                                                          0);
3789         } else if (skb_is_gso_v6(skb)) {
3790                 ipv6_hdr(skb)->payload_len = 0;
3791                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3792                                                        &ipv6_hdr(skb)->daddr,
3793                                                        0, IPPROTO_TCP, 0);
3794         }
3795
3796         i = tx_ring->next_to_use;
3797
3798         buffer_info = &tx_ring->buffer_info[i];
3799         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3800         /* VLAN MACLEN IPLEN */
3801         if (tx_flags & IGB_TX_FLAGS_VLAN)
3802                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3803         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3804         *hdr_len += skb_network_offset(skb);
3805         info |= skb_network_header_len(skb);
3806         *hdr_len += skb_network_header_len(skb);
3807         context_desc->vlan_macip_lens = cpu_to_le32(info);
3808
3809         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3810         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3811
3812         if (skb->protocol == htons(ETH_P_IP))
3813                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3814         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3815
3816         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3817
3818         /* MSS L4LEN IDX */
3819         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3820         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3821
3822         /* For 82575, context index must be unique per ring. */
3823         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3824                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3825
3826         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3827         context_desc->seqnum_seed = 0;
3828
3829         buffer_info->time_stamp = jiffies;
3830         buffer_info->next_to_watch = i;
3831         buffer_info->dma = 0;
3832         i++;
3833         if (i == tx_ring->count)
3834                 i = 0;
3835
3836         tx_ring->next_to_use = i;
3837
3838         return true;
3839 }
3840
3841 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3842                                    struct sk_buff *skb, u32 tx_flags)
3843 {
3844         struct e1000_adv_tx_context_desc *context_desc;
3845         struct device *dev = tx_ring->dev;
3846         struct igb_buffer *buffer_info;
3847         u32 info = 0, tu_cmd = 0;
3848         unsigned int i;
3849
3850         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3851             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3852                 i = tx_ring->next_to_use;
3853                 buffer_info = &tx_ring->buffer_info[i];
3854                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3855
3856                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3857                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3858
3859                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3860                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3861                         info |= skb_network_header_len(skb);
3862
3863                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3864
3865                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3866
3867                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3868                         __be16 protocol;
3869
3870                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3871                                 const struct vlan_ethhdr *vhdr =
3872                                           (const struct vlan_ethhdr*)skb->data;
3873
3874                                 protocol = vhdr->h_vlan_encapsulated_proto;
3875                         } else {
3876                                 protocol = skb->protocol;
3877                         }
3878
3879                         switch (protocol) {
3880                         case cpu_to_be16(ETH_P_IP):
3881                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3882                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3883                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3884                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3885                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3886                                 break;
3887                         case cpu_to_be16(ETH_P_IPV6):
3888                                 /* XXX what about other V6 headers?? */
3889                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3890                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3891                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3892                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3893                                 break;
3894                         default:
3895                                 if (unlikely(net_ratelimit()))
3896                                         dev_warn(dev,
3897                                             "partial checksum but proto=%x!\n",
3898                                             skb->protocol);
3899                                 break;
3900                         }
3901                 }
3902
3903                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3904                 context_desc->seqnum_seed = 0;
3905                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3906                         context_desc->mss_l4len_idx =
3907                                 cpu_to_le32(tx_ring->reg_idx << 4);
3908
3909                 buffer_info->time_stamp = jiffies;
3910                 buffer_info->next_to_watch = i;
3911                 buffer_info->dma = 0;
3912
3913                 i++;
3914                 if (i == tx_ring->count)
3915                         i = 0;
3916                 tx_ring->next_to_use = i;
3917
3918                 return true;
3919         }
3920         return false;
3921 }
3922
3923 #define IGB_MAX_TXD_PWR 16
3924 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3925
3926 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3927                                  unsigned int first)
3928 {
3929         struct igb_buffer *buffer_info;
3930         struct device *dev = tx_ring->dev;
3931         unsigned int hlen = skb_headlen(skb);
3932         unsigned int count = 0, i;
3933         unsigned int f;
3934         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3935
3936         i = tx_ring->next_to_use;
3937
3938         buffer_info = &tx_ring->buffer_info[i];
3939         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3940         buffer_info->length = hlen;
3941         /* set time_stamp *before* dma to help avoid a possible race */
3942         buffer_info->time_stamp = jiffies;
3943         buffer_info->next_to_watch = i;
3944         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3945                                           DMA_TO_DEVICE);
3946         if (dma_mapping_error(dev, buffer_info->dma))
3947                 goto dma_error;
3948
3949         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3950                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3951                 unsigned int len = frag->size;
3952
3953                 count++;
3954                 i++;
3955                 if (i == tx_ring->count)
3956                         i = 0;
3957
3958                 buffer_info = &tx_ring->buffer_info[i];
3959                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3960                 buffer_info->length = len;
3961                 buffer_info->time_stamp = jiffies;
3962                 buffer_info->next_to_watch = i;
3963                 buffer_info->mapped_as_page = true;
3964                 buffer_info->dma = dma_map_page(dev,
3965                                                 frag->page,
3966                                                 frag->page_offset,
3967                                                 len,
3968                                                 DMA_TO_DEVICE);
3969                 if (dma_mapping_error(dev, buffer_info->dma))
3970                         goto dma_error;
3971
3972         }
3973
3974         tx_ring->buffer_info[i].skb = skb;
3975         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3976         /* multiply data chunks by size of headers */
3977         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3978         tx_ring->buffer_info[i].gso_segs = gso_segs;
3979         tx_ring->buffer_info[first].next_to_watch = i;
3980
3981         return ++count;
3982
3983 dma_error:
3984         dev_err(dev, "TX DMA map failed\n");
3985
3986         /* clear timestamp and dma mappings for failed buffer_info mapping */
3987         buffer_info->dma = 0;
3988         buffer_info->time_stamp = 0;
3989         buffer_info->length = 0;
3990         buffer_info->next_to_watch = 0;
3991         buffer_info->mapped_as_page = false;
3992
3993         /* clear timestamp and dma mappings for remaining portion of packet */
3994         while (count--) {
3995                 if (i == 0)
3996                         i = tx_ring->count;
3997                 i--;
3998                 buffer_info = &tx_ring->buffer_info[i];
3999                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4000         }
4001
4002         return 0;
4003 }
4004
4005 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4006                                     u32 tx_flags, int count, u32 paylen,
4007                                     u8 hdr_len)
4008 {
4009         union e1000_adv_tx_desc *tx_desc;
4010         struct igb_buffer *buffer_info;
4011         u32 olinfo_status = 0, cmd_type_len;
4012         unsigned int i = tx_ring->next_to_use;
4013
4014         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4015                         E1000_ADVTXD_DCMD_DEXT);
4016
4017         if (tx_flags & IGB_TX_FLAGS_VLAN)
4018                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4019
4020         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4021                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4022
4023         if (tx_flags & IGB_TX_FLAGS_TSO) {
4024                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4025
4026                 /* insert tcp checksum */
4027                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4028
4029                 /* insert ip checksum */
4030                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4031                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4032
4033         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4034                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4035         }
4036
4037         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4038             (tx_flags & (IGB_TX_FLAGS_CSUM |
4039                          IGB_TX_FLAGS_TSO |
4040                          IGB_TX_FLAGS_VLAN)))
4041                 olinfo_status |= tx_ring->reg_idx << 4;
4042
4043         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4044
4045         do {
4046                 buffer_info = &tx_ring->buffer_info[i];
4047                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4048                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4049                 tx_desc->read.cmd_type_len =
4050                         cpu_to_le32(cmd_type_len | buffer_info->length);
4051                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4052                 count--;
4053                 i++;
4054                 if (i == tx_ring->count)
4055                         i = 0;
4056         } while (count > 0);
4057
4058         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4059         /* Force memory writes to complete before letting h/w
4060          * know there are new descriptors to fetch.  (Only
4061          * applicable for weak-ordered memory model archs,
4062          * such as IA-64). */
4063         wmb();
4064
4065         tx_ring->next_to_use = i;
4066         writel(i, tx_ring->tail);
4067         /* we need this if more than one processor can write to our tail
4068          * at a time, it syncronizes IO on IA64/Altix systems */
4069         mmiowb();
4070 }
4071
4072 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4073 {
4074         struct net_device *netdev = tx_ring->netdev;
4075
4076         netif_stop_subqueue(netdev, tx_ring->queue_index);
4077
4078         /* Herbert's original patch had:
4079          *  smp_mb__after_netif_stop_queue();
4080          * but since that doesn't exist yet, just open code it. */
4081         smp_mb();
4082
4083         /* We need to check again in a case another CPU has just
4084          * made room available. */
4085         if (igb_desc_unused(tx_ring) < size)
4086                 return -EBUSY;
4087
4088         /* A reprieve! */
4089         netif_wake_subqueue(netdev, tx_ring->queue_index);
4090
4091         u64_stats_update_begin(&tx_ring->tx_syncp2);
4092         tx_ring->tx_stats.restart_queue2++;
4093         u64_stats_update_end(&tx_ring->tx_syncp2);
4094
4095         return 0;
4096 }
4097
4098 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4099 {
4100         if (igb_desc_unused(tx_ring) >= size)
4101                 return 0;
4102         return __igb_maybe_stop_tx(tx_ring, size);
4103 }
4104
4105 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4106                                     struct igb_ring *tx_ring)
4107 {
4108         int tso = 0, count;
4109         u32 tx_flags = 0;
4110         u16 first;
4111         u8 hdr_len = 0;
4112
4113         /* need: 1 descriptor per page,
4114          *       + 2 desc gap to keep tail from touching head,
4115          *       + 1 desc for skb->data,
4116          *       + 1 desc for context descriptor,
4117          * otherwise try next time */
4118         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4119                 /* this is a hard error */
4120                 return NETDEV_TX_BUSY;
4121         }
4122
4123         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4124                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4125                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4126         }
4127
4128         if (vlan_tx_tag_present(skb)) {
4129                 tx_flags |= IGB_TX_FLAGS_VLAN;
4130                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4131         }
4132
4133         if (skb->protocol == htons(ETH_P_IP))
4134                 tx_flags |= IGB_TX_FLAGS_IPV4;
4135
4136         first = tx_ring->next_to_use;
4137         if (skb_is_gso(skb)) {
4138                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4139
4140                 if (tso < 0) {
4141                         dev_kfree_skb_any(skb);
4142                         return NETDEV_TX_OK;
4143                 }
4144         }
4145
4146         if (tso)
4147                 tx_flags |= IGB_TX_FLAGS_TSO;
4148         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4149                  (skb->ip_summed == CHECKSUM_PARTIAL))
4150                 tx_flags |= IGB_TX_FLAGS_CSUM;
4151
4152         /*
4153          * count reflects descriptors mapped, if 0 or less then mapping error
4154          * has occured and we need to rewind the descriptor queue
4155          */
4156         count = igb_tx_map_adv(tx_ring, skb, first);
4157         if (!count) {
4158                 dev_kfree_skb_any(skb);
4159                 tx_ring->buffer_info[first].time_stamp = 0;
4160                 tx_ring->next_to_use = first;
4161                 return NETDEV_TX_OK;
4162         }
4163
4164         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4165
4166         /* Make sure there is space in the ring for the next send. */
4167         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4168
4169         return NETDEV_TX_OK;
4170 }
4171
4172 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4173                                       struct net_device *netdev)
4174 {
4175         struct igb_adapter *adapter = netdev_priv(netdev);
4176         struct igb_ring *tx_ring;
4177         int r_idx = 0;
4178
4179         if (test_bit(__IGB_DOWN, &adapter->state)) {
4180                 dev_kfree_skb_any(skb);
4181                 return NETDEV_TX_OK;
4182         }
4183
4184         if (skb->len <= 0) {
4185                 dev_kfree_skb_any(skb);
4186                 return NETDEV_TX_OK;
4187         }
4188
4189         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4190         tx_ring = adapter->multi_tx_table[r_idx];
4191
4192         /* This goes back to the question of how to logically map a tx queue
4193          * to a flow.  Right now, performance is impacted slightly negatively
4194          * if using multiple tx queues.  If the stack breaks away from a
4195          * single qdisc implementation, we can look at this again. */
4196         return igb_xmit_frame_ring_adv(skb, tx_ring);
4197 }
4198
4199 /**
4200  * igb_tx_timeout - Respond to a Tx Hang
4201  * @netdev: network interface device structure
4202  **/
4203 static void igb_tx_timeout(struct net_device *netdev)
4204 {
4205         struct igb_adapter *adapter = netdev_priv(netdev);
4206         struct e1000_hw *hw = &adapter->hw;
4207
4208         /* Do the reset outside of interrupt context */
4209         adapter->tx_timeout_count++;
4210
4211         if (hw->mac.type == e1000_82580)
4212                 hw->dev_spec._82575.global_device_reset = true;
4213
4214         schedule_work(&adapter->reset_task);
4215         wr32(E1000_EICS,
4216              (adapter->eims_enable_mask & ~adapter->eims_other));
4217 }
4218
4219 static void igb_reset_task(struct work_struct *work)
4220 {
4221         struct igb_adapter *adapter;
4222         adapter = container_of(work, struct igb_adapter, reset_task);
4223
4224         igb_dump(adapter);
4225         netdev_err(adapter->netdev, "Reset adapter\n");
4226         igb_reinit_locked(adapter);
4227 }
4228
4229 /**
4230  * igb_get_stats64 - Get System Network Statistics
4231  * @netdev: network interface device structure
4232  * @stats: rtnl_link_stats64 pointer
4233  *
4234  **/
4235 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4236                                                  struct rtnl_link_stats64 *stats)
4237 {
4238         struct igb_adapter *adapter = netdev_priv(netdev);
4239
4240         spin_lock(&adapter->stats64_lock);
4241         igb_update_stats(adapter, &adapter->stats64);
4242         memcpy(stats, &adapter->stats64, sizeof(*stats));
4243         spin_unlock(&adapter->stats64_lock);
4244
4245         return stats;
4246 }
4247
4248 /**
4249  * igb_change_mtu - Change the Maximum Transfer Unit
4250  * @netdev: network interface device structure
4251  * @new_mtu: new value for maximum frame size
4252  *
4253  * Returns 0 on success, negative on failure
4254  **/
4255 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4256 {
4257         struct igb_adapter *adapter = netdev_priv(netdev);
4258         struct pci_dev *pdev = adapter->pdev;
4259         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4260         u32 rx_buffer_len, i;
4261
4262         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4263                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4264                 return -EINVAL;
4265         }
4266
4267         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4268                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4269                 return -EINVAL;
4270         }
4271
4272         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4273                 msleep(1);
4274
4275         /* igb_down has a dependency on max_frame_size */
4276         adapter->max_frame_size = max_frame;
4277
4278         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4279          * means we reserve 2 more, this pushes us to allocate from the next
4280          * larger slab size.
4281          * i.e. RXBUFFER_2048 --> size-4096 slab
4282          */
4283
4284         if (adapter->hw.mac.type == e1000_82580)
4285                 max_frame += IGB_TS_HDR_LEN;
4286
4287         if (max_frame <= IGB_RXBUFFER_1024)
4288                 rx_buffer_len = IGB_RXBUFFER_1024;
4289         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4290                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4291         else
4292                 rx_buffer_len = IGB_RXBUFFER_128;
4293
4294         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4295              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4296                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4297
4298         if ((adapter->hw.mac.type == e1000_82580) &&
4299             (rx_buffer_len == IGB_RXBUFFER_128))
4300                 rx_buffer_len += IGB_RXBUFFER_64;
4301
4302         if (netif_running(netdev))
4303                 igb_down(adapter);
4304
4305         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4306                  netdev->mtu, new_mtu);
4307         netdev->mtu = new_mtu;
4308
4309         for (i = 0; i < adapter->num_rx_queues; i++)
4310                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4311
4312         if (netif_running(netdev))
4313                 igb_up(adapter);
4314         else
4315                 igb_reset(adapter);
4316
4317         clear_bit(__IGB_RESETTING, &adapter->state);
4318
4319         return 0;
4320 }
4321
4322 /**
4323  * igb_update_stats - Update the board statistics counters
4324  * @adapter: board private structure
4325  **/
4326
4327 void igb_update_stats(struct igb_adapter *adapter,
4328                       struct rtnl_link_stats64 *net_stats)
4329 {
4330         struct e1000_hw *hw = &adapter->hw;
4331         struct pci_dev *pdev = adapter->pdev;
4332         u32 reg, mpc;
4333         u16 phy_tmp;
4334         int i;
4335         u64 bytes, packets;
4336         unsigned int start;
4337         u64 _bytes, _packets;
4338
4339 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4340
4341         /*
4342          * Prevent stats update while adapter is being reset, or if the pci
4343          * connection is down.
4344          */
4345         if (adapter->link_speed == 0)
4346                 return;
4347         if (pci_channel_offline(pdev))
4348                 return;
4349
4350         bytes = 0;
4351         packets = 0;
4352         for (i = 0; i < adapter->num_rx_queues; i++) {
4353                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4354                 struct igb_ring *ring = adapter->rx_ring[i];
4355
4356                 ring->rx_stats.drops += rqdpc_tmp;
4357                 net_stats->rx_fifo_errors += rqdpc_tmp;
4358
4359                 do {
4360                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4361                         _bytes = ring->rx_stats.bytes;
4362                         _packets = ring->rx_stats.packets;
4363                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4364                 bytes += _bytes;
4365                 packets += _packets;
4366         }
4367
4368         net_stats->rx_bytes = bytes;
4369         net_stats->rx_packets = packets;
4370
4371         bytes = 0;
4372         packets = 0;
4373         for (i = 0; i < adapter->num_tx_queues; i++) {
4374                 struct igb_ring *ring = adapter->tx_ring[i];
4375                 do {
4376                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4377                         _bytes = ring->tx_stats.bytes;
4378                         _packets = ring->tx_stats.packets;
4379                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4380                 bytes += _bytes;
4381                 packets += _packets;
4382         }
4383         net_stats->tx_bytes = bytes;
4384         net_stats->tx_packets = packets;
4385
4386         /* read stats registers */
4387         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4388         adapter->stats.gprc += rd32(E1000_GPRC);
4389         adapter->stats.gorc += rd32(E1000_GORCL);
4390         rd32(E1000_GORCH); /* clear GORCL */
4391         adapter->stats.bprc += rd32(E1000_BPRC);
4392         adapter->stats.mprc += rd32(E1000_MPRC);
4393         adapter->stats.roc += rd32(E1000_ROC);
4394
4395         adapter->stats.prc64 += rd32(E1000_PRC64);
4396         adapter->stats.prc127 += rd32(E1000_PRC127);
4397         adapter->stats.prc255 += rd32(E1000_PRC255);
4398         adapter->stats.prc511 += rd32(E1000_PRC511);
4399         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4400         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4401         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4402         adapter->stats.sec += rd32(E1000_SEC);
4403
4404         mpc = rd32(E1000_MPC);
4405         adapter->stats.mpc += mpc;
4406         net_stats->rx_fifo_errors += mpc;
4407         adapter->stats.scc += rd32(E1000_SCC);
4408         adapter->stats.ecol += rd32(E1000_ECOL);
4409         adapter->stats.mcc += rd32(E1000_MCC);
4410         adapter->stats.latecol += rd32(E1000_LATECOL);
4411         adapter->stats.dc += rd32(E1000_DC);
4412         adapter->stats.rlec += rd32(E1000_RLEC);
4413         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4414         adapter->stats.xontxc += rd32(E1000_XONTXC);
4415         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4416         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4417         adapter->stats.fcruc += rd32(E1000_FCRUC);
4418         adapter->stats.gptc += rd32(E1000_GPTC);
4419         adapter->stats.gotc += rd32(E1000_GOTCL);
4420         rd32(E1000_GOTCH); /* clear GOTCL */
4421         adapter->stats.rnbc += rd32(E1000_RNBC);
4422         adapter->stats.ruc += rd32(E1000_RUC);
4423         adapter->stats.rfc += rd32(E1000_RFC);
4424         adapter->stats.rjc += rd32(E1000_RJC);
4425         adapter->stats.tor += rd32(E1000_TORH);
4426         adapter->stats.tot += rd32(E1000_TOTH);
4427         adapter->stats.tpr += rd32(E1000_TPR);
4428
4429         adapter->stats.ptc64 += rd32(E1000_PTC64);
4430         adapter->stats.ptc127 += rd32(E1000_PTC127);
4431         adapter->stats.ptc255 += rd32(E1000_PTC255);
4432         adapter->stats.ptc511 += rd32(E1000_PTC511);
4433         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4434         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4435
4436         adapter->stats.mptc += rd32(E1000_MPTC);
4437         adapter->stats.bptc += rd32(E1000_BPTC);
4438
4439         adapter->stats.tpt += rd32(E1000_TPT);
4440         adapter->stats.colc += rd32(E1000_COLC);
4441
4442         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4443         /* read internal phy specific stats */
4444         reg = rd32(E1000_CTRL_EXT);
4445         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4446                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4447                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4448         }
4449
4450         adapter->stats.tsctc += rd32(E1000_TSCTC);
4451         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4452
4453         adapter->stats.iac += rd32(E1000_IAC);
4454         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4455         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4456         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4457         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4458         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4459         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4460         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4461         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4462
4463         /* Fill out the OS statistics structure */
4464         net_stats->multicast = adapter->stats.mprc;
4465         net_stats->collisions = adapter->stats.colc;
4466
4467         /* Rx Errors */
4468
4469         /* RLEC on some newer hardware can be incorrect so build
4470          * our own version based on RUC and ROC */
4471         net_stats->rx_errors = adapter->stats.rxerrc +
4472                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4473                 adapter->stats.ruc + adapter->stats.roc +
4474                 adapter->stats.cexterr;
4475         net_stats->rx_length_errors = adapter->stats.ruc +
4476                                       adapter->stats.roc;
4477         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4478         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4479         net_stats->rx_missed_errors = adapter->stats.mpc;
4480
4481         /* Tx Errors */
4482         net_stats->tx_errors = adapter->stats.ecol +
4483                                adapter->stats.latecol;
4484         net_stats->tx_aborted_errors = adapter->stats.ecol;
4485         net_stats->tx_window_errors = adapter->stats.latecol;
4486         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4487
4488         /* Tx Dropped needs to be maintained elsewhere */
4489
4490         /* Phy Stats */
4491         if (hw->phy.media_type == e1000_media_type_copper) {
4492                 if ((adapter->link_speed == SPEED_1000) &&
4493                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4494                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4495                         adapter->phy_stats.idle_errors += phy_tmp;
4496                 }
4497         }
4498
4499         /* Management Stats */
4500         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4501         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4502         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4503 }
4504
4505 static irqreturn_t igb_msix_other(int irq, void *data)
4506 {
4507         struct igb_adapter *adapter = data;
4508         struct e1000_hw *hw = &adapter->hw;
4509         u32 icr = rd32(E1000_ICR);
4510         /* reading ICR causes bit 31 of EICR to be cleared */
4511
4512         if (icr & E1000_ICR_DRSTA)
4513                 schedule_work(&adapter->reset_task);
4514
4515         if (icr & E1000_ICR_DOUTSYNC) {
4516                 /* HW is reporting DMA is out of sync */
4517                 adapter->stats.doosync++;
4518         }
4519
4520         /* Check for a mailbox event */
4521         if (icr & E1000_ICR_VMMB)
4522                 igb_msg_task(adapter);
4523
4524         if (icr & E1000_ICR_LSC) {
4525                 hw->mac.get_link_status = 1;
4526                 /* guard against interrupt when we're going down */
4527                 if (!test_bit(__IGB_DOWN, &adapter->state))
4528                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4529         }
4530
4531         if (adapter->vfs_allocated_count)
4532                 wr32(E1000_IMS, E1000_IMS_LSC |
4533                                 E1000_IMS_VMMB |
4534                                 E1000_IMS_DOUTSYNC);
4535         else
4536                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4537         wr32(E1000_EIMS, adapter->eims_other);
4538
4539         return IRQ_HANDLED;
4540 }
4541
4542 static void igb_write_itr(struct igb_q_vector *q_vector)
4543 {
4544         struct igb_adapter *adapter = q_vector->adapter;
4545         u32 itr_val = q_vector->itr_val & 0x7FFC;
4546
4547         if (!q_vector->set_itr)
4548                 return;
4549
4550         if (!itr_val)
4551                 itr_val = 0x4;
4552
4553         if (adapter->hw.mac.type == e1000_82575)
4554                 itr_val |= itr_val << 16;
4555         else
4556                 itr_val |= 0x8000000;
4557
4558         writel(itr_val, q_vector->itr_register);
4559         q_vector->set_itr = 0;
4560 }
4561
4562 static irqreturn_t igb_msix_ring(int irq, void *data)
4563 {
4564         struct igb_q_vector *q_vector = data;
4565
4566         /* Write the ITR value calculated from the previous interrupt. */
4567         igb_write_itr(q_vector);
4568
4569         napi_schedule(&q_vector->napi);
4570
4571         return IRQ_HANDLED;
4572 }
4573
4574 #ifdef CONFIG_IGB_DCA
4575 static void igb_update_dca(struct igb_q_vector *q_vector)
4576 {
4577         struct igb_adapter *adapter = q_vector->adapter;
4578         struct e1000_hw *hw = &adapter->hw;
4579         int cpu = get_cpu();
4580
4581         if (q_vector->cpu == cpu)
4582                 goto out_no_update;
4583
4584         if (q_vector->tx_ring) {
4585                 int q = q_vector->tx_ring->reg_idx;
4586                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4587                 if (hw->mac.type == e1000_82575) {
4588                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4589                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4590                 } else {
4591                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4592                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4593                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4594                 }
4595                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4596                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4597         }
4598         if (q_vector->rx_ring) {
4599                 int q = q_vector->rx_ring->reg_idx;
4600                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4601                 if (hw->mac.type == e1000_82575) {
4602                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4603                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4604                 } else {
4605                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4606                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4607                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4608                 }
4609                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4610                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4611                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4612                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4613         }
4614         q_vector->cpu = cpu;
4615 out_no_update:
4616         put_cpu();
4617 }
4618
4619 static void igb_setup_dca(struct igb_adapter *adapter)
4620 {
4621         struct e1000_hw *hw = &adapter->hw;
4622         int i;
4623
4624         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4625                 return;
4626
4627         /* Always use CB2 mode, difference is masked in the CB driver. */
4628         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4629
4630         for (i = 0; i < adapter->num_q_vectors; i++) {
4631                 adapter->q_vector[i]->cpu = -1;
4632                 igb_update_dca(adapter->q_vector[i]);
4633         }
4634 }
4635
4636 static int __igb_notify_dca(struct device *dev, void *data)
4637 {
4638         struct net_device *netdev = dev_get_drvdata(dev);
4639         struct igb_adapter *adapter = netdev_priv(netdev);
4640         struct pci_dev *pdev = adapter->pdev;
4641         struct e1000_hw *hw = &adapter->hw;
4642         unsigned long event = *(unsigned long *)data;
4643
4644         switch (event) {
4645         case DCA_PROVIDER_ADD:
4646                 /* if already enabled, don't do it again */
4647                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4648                         break;
4649                 if (dca_add_requester(dev) == 0) {
4650                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4651                         dev_info(&pdev->dev, "DCA enabled\n");
4652                         igb_setup_dca(adapter);
4653                         break;
4654                 }
4655                 /* Fall Through since DCA is disabled. */
4656         case DCA_PROVIDER_REMOVE:
4657                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4658                         /* without this a class_device is left
4659                          * hanging around in the sysfs model */
4660                         dca_remove_requester(dev);
4661                         dev_info(&pdev->dev, "DCA disabled\n");
4662                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4663                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4664                 }
4665                 break;
4666         }
4667
4668         return 0;
4669 }
4670
4671 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4672                           void *p)
4673 {
4674         int ret_val;
4675
4676         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4677                                          __igb_notify_dca);
4678
4679         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4680 }
4681 #endif /* CONFIG_IGB_DCA */
4682
4683 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4684 {
4685         struct e1000_hw *hw = &adapter->hw;
4686         u32 ping;
4687         int i;
4688
4689         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4690                 ping = E1000_PF_CONTROL_MSG;
4691                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4692                         ping |= E1000_VT_MSGTYPE_CTS;
4693                 igb_write_mbx(hw, &ping, 1, i);
4694         }
4695 }
4696
4697 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4698 {
4699         struct e1000_hw *hw = &adapter->hw;
4700         u32 vmolr = rd32(E1000_VMOLR(vf));
4701         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4702
4703         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4704                             IGB_VF_FLAG_MULTI_PROMISC);
4705         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4706
4707         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4708                 vmolr |= E1000_VMOLR_MPME;
4709                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4710                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4711         } else {
4712                 /*
4713                  * if we have hashes and we are clearing a multicast promisc
4714                  * flag we need to write the hashes to the MTA as this step
4715                  * was previously skipped
4716                  */
4717                 if (vf_data->num_vf_mc_hashes > 30) {
4718                         vmolr |= E1000_VMOLR_MPME;
4719                 } else if (vf_data->num_vf_mc_hashes) {
4720                         int j;
4721                         vmolr |= E1000_VMOLR_ROMPE;
4722                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4723                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4724                 }
4725         }
4726
4727         wr32(E1000_VMOLR(vf), vmolr);
4728
4729         /* there are flags left unprocessed, likely not supported */
4730         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4731                 return -EINVAL;
4732
4733         return 0;
4734
4735 }
4736
4737 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4738                                   u32 *msgbuf, u32 vf)
4739 {
4740         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4741         u16 *hash_list = (u16 *)&msgbuf[1];
4742         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4743         int i;
4744
4745         /* salt away the number of multicast addresses assigned
4746          * to this VF for later use to restore when the PF multi cast
4747          * list changes
4748          */
4749         vf_data->num_vf_mc_hashes = n;
4750
4751         /* only up to 30 hash values supported */
4752         if (n > 30)
4753                 n = 30;
4754
4755         /* store the hashes for later use */
4756         for (i = 0; i < n; i++)
4757                 vf_data->vf_mc_hashes[i] = hash_list[i];
4758
4759         /* Flush and reset the mta with the new values */
4760         igb_set_rx_mode(adapter->netdev);
4761
4762         return 0;
4763 }
4764
4765 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4766 {
4767         struct e1000_hw *hw = &adapter->hw;
4768         struct vf_data_storage *vf_data;
4769         int i, j;
4770
4771         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4772                 u32 vmolr = rd32(E1000_VMOLR(i));
4773                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4774
4775                 vf_data = &adapter->vf_data[i];
4776
4777                 if ((vf_data->num_vf_mc_hashes > 30) ||
4778                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4779                         vmolr |= E1000_VMOLR_MPME;
4780                 } else if (vf_data->num_vf_mc_hashes) {
4781                         vmolr |= E1000_VMOLR_ROMPE;
4782                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4783                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4784                 }
4785                 wr32(E1000_VMOLR(i), vmolr);
4786         }
4787 }
4788
4789 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4790 {
4791         struct e1000_hw *hw = &adapter->hw;
4792         u32 pool_mask, reg, vid;
4793         int i;
4794
4795         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4796
4797         /* Find the vlan filter for this id */
4798         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4799                 reg = rd32(E1000_VLVF(i));
4800
4801                 /* remove the vf from the pool */
4802                 reg &= ~pool_mask;
4803
4804                 /* if pool is empty then remove entry from vfta */
4805                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4806                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4807                         reg = 0;
4808                         vid = reg & E1000_VLVF_VLANID_MASK;
4809                         igb_vfta_set(hw, vid, false);
4810                 }
4811
4812                 wr32(E1000_VLVF(i), reg);
4813         }
4814
4815         adapter->vf_data[vf].vlans_enabled = 0;
4816 }
4817
4818 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4819 {
4820         struct e1000_hw *hw = &adapter->hw;
4821         u32 reg, i;
4822
4823         /* The vlvf table only exists on 82576 hardware and newer */
4824         if (hw->mac.type < e1000_82576)
4825                 return -1;
4826
4827         /* we only need to do this if VMDq is enabled */
4828         if (!adapter->vfs_allocated_count)
4829                 return -1;
4830
4831         /* Find the vlan filter for this id */
4832         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4833                 reg = rd32(E1000_VLVF(i));
4834                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4835                     vid == (reg & E1000_VLVF_VLANID_MASK))
4836                         break;
4837         }
4838
4839         if (add) {
4840                 if (i == E1000_VLVF_ARRAY_SIZE) {
4841                         /* Did not find a matching VLAN ID entry that was
4842                          * enabled.  Search for a free filter entry, i.e.
4843                          * one without the enable bit set
4844                          */
4845                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4846                                 reg = rd32(E1000_VLVF(i));
4847                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4848                                         break;
4849                         }
4850                 }
4851                 if (i < E1000_VLVF_ARRAY_SIZE) {
4852                         /* Found an enabled/available entry */
4853                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4854
4855                         /* if !enabled we need to set this up in vfta */
4856                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4857                                 /* add VID to filter table */
4858                                 igb_vfta_set(hw, vid, true);
4859                                 reg |= E1000_VLVF_VLANID_ENABLE;
4860                         }
4861                         reg &= ~E1000_VLVF_VLANID_MASK;
4862                         reg |= vid;
4863                         wr32(E1000_VLVF(i), reg);
4864
4865                         /* do not modify RLPML for PF devices */
4866                         if (vf >= adapter->vfs_allocated_count)
4867                                 return 0;
4868
4869                         if (!adapter->vf_data[vf].vlans_enabled) {
4870                                 u32 size;
4871                                 reg = rd32(E1000_VMOLR(vf));
4872                                 size = reg & E1000_VMOLR_RLPML_MASK;
4873                                 size += 4;
4874                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4875                                 reg |= size;
4876                                 wr32(E1000_VMOLR(vf), reg);
4877                         }
4878
4879                         adapter->vf_data[vf].vlans_enabled++;
4880                         return 0;
4881                 }
4882         } else {
4883                 if (i < E1000_VLVF_ARRAY_SIZE) {
4884                         /* remove vf from the pool */
4885                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4886                         /* if pool is empty then remove entry from vfta */
4887                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4888                                 reg = 0;
4889                                 igb_vfta_set(hw, vid, false);
4890                         }
4891                         wr32(E1000_VLVF(i), reg);
4892
4893                         /* do not modify RLPML for PF devices */
4894                         if (vf >= adapter->vfs_allocated_count)
4895                                 return 0;
4896
4897                         adapter->vf_data[vf].vlans_enabled--;
4898                         if (!adapter->vf_data[vf].vlans_enabled) {
4899                                 u32 size;
4900                                 reg = rd32(E1000_VMOLR(vf));
4901                                 size = reg & E1000_VMOLR_RLPML_MASK;
4902                                 size -= 4;
4903                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4904                                 reg |= size;
4905                                 wr32(E1000_VMOLR(vf), reg);
4906                         }
4907                 }
4908         }
4909         return 0;
4910 }
4911
4912 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4913 {
4914         struct e1000_hw *hw = &adapter->hw;
4915
4916         if (vid)
4917                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4918         else
4919                 wr32(E1000_VMVIR(vf), 0);
4920 }
4921
4922 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4923                                int vf, u16 vlan, u8 qos)
4924 {
4925         int err = 0;
4926         struct igb_adapter *adapter = netdev_priv(netdev);
4927
4928         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4929                 return -EINVAL;
4930         if (vlan || qos) {
4931                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4932                 if (err)
4933                         goto out;
4934                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4935                 igb_set_vmolr(adapter, vf, !vlan);
4936                 adapter->vf_data[vf].pf_vlan = vlan;
4937                 adapter->vf_data[vf].pf_qos = qos;
4938                 dev_info(&adapter->pdev->dev,
4939                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4940                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4941                         dev_warn(&adapter->pdev->dev,
4942                                  "The VF VLAN has been set,"
4943                                  " but the PF device is not up.\n");
4944                         dev_warn(&adapter->pdev->dev,
4945                                  "Bring the PF device up before"
4946                                  " attempting to use the VF device.\n");
4947                 }
4948         } else {
4949                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4950                                    false, vf);
4951                 igb_set_vmvir(adapter, vlan, vf);
4952                 igb_set_vmolr(adapter, vf, true);
4953                 adapter->vf_data[vf].pf_vlan = 0;
4954                 adapter->vf_data[vf].pf_qos = 0;
4955        }
4956 out:
4957        return err;
4958 }
4959
4960 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4961 {
4962         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4963         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4964
4965         return igb_vlvf_set(adapter, vid, add, vf);
4966 }
4967
4968 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4969 {
4970         /* clear flags */
4971         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4972         adapter->vf_data[vf].last_nack = jiffies;
4973
4974         /* reset offloads to defaults */
4975         igb_set_vmolr(adapter, vf, true);
4976
4977         /* reset vlans for device */
4978         igb_clear_vf_vfta(adapter, vf);
4979         if (adapter->vf_data[vf].pf_vlan)
4980                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4981                                     adapter->vf_data[vf].pf_vlan,
4982                                     adapter->vf_data[vf].pf_qos);
4983         else
4984                 igb_clear_vf_vfta(adapter, vf);
4985
4986         /* reset multicast table array for vf */
4987         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4988
4989         /* Flush and reset the mta with the new values */
4990         igb_set_rx_mode(adapter->netdev);
4991 }
4992
4993 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4994 {
4995         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4996
4997         /* generate a new mac address as we were hotplug removed/added */
4998         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4999                 random_ether_addr(vf_mac);
5000
5001         /* process remaining reset events */
5002         igb_vf_reset(adapter, vf);
5003 }
5004
5005 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5006 {
5007         struct e1000_hw *hw = &adapter->hw;
5008         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5009         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5010         u32 reg, msgbuf[3];
5011         u8 *addr = (u8 *)(&msgbuf[1]);
5012
5013         /* process all the same items cleared in a function level reset */
5014         igb_vf_reset(adapter, vf);
5015
5016         /* set vf mac address */
5017         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5018
5019         /* enable transmit and receive for vf */
5020         reg = rd32(E1000_VFTE);
5021         wr32(E1000_VFTE, reg | (1 << vf));
5022         reg = rd32(E1000_VFRE);
5023         wr32(E1000_VFRE, reg | (1 << vf));
5024
5025         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
5026
5027         /* reply to reset with ack and vf mac address */
5028         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5029         memcpy(addr, vf_mac, 6);
5030         igb_write_mbx(hw, msgbuf, 3, vf);
5031 }
5032
5033 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5034 {
5035         /*
5036          * The VF MAC Address is stored in a packed array of bytes
5037          * starting at the second 32 bit word of the msg array
5038          */
5039         unsigned char *addr = (char *)&msg[1];
5040         int err = -1;
5041
5042         if (is_valid_ether_addr(addr))
5043                 err = igb_set_vf_mac(adapter, vf, addr);
5044
5045         return err;
5046 }
5047
5048 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5049 {
5050         struct e1000_hw *hw = &adapter->hw;
5051         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5052         u32 msg = E1000_VT_MSGTYPE_NACK;
5053
5054         /* if device isn't clear to send it shouldn't be reading either */
5055         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5056             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5057                 igb_write_mbx(hw, &msg, 1, vf);
5058                 vf_data->last_nack = jiffies;
5059         }
5060 }
5061
5062 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5063 {
5064         struct pci_dev *pdev = adapter->pdev;
5065         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5066         struct e1000_hw *hw = &adapter->hw;
5067         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5068         s32 retval;
5069
5070         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5071
5072         if (retval) {
5073                 /* if receive failed revoke VF CTS stats and restart init */
5074                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5075                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5076                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5077                         return;
5078                 goto out;
5079         }
5080
5081         /* this is a message we already processed, do nothing */
5082         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5083                 return;
5084
5085         /*
5086          * until the vf completes a reset it should not be
5087          * allowed to start any configuration.
5088          */
5089
5090         if (msgbuf[0] == E1000_VF_RESET) {
5091                 igb_vf_reset_msg(adapter, vf);
5092                 return;
5093         }
5094
5095         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5096                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5097                         return;
5098                 retval = -1;
5099                 goto out;
5100         }
5101
5102         switch ((msgbuf[0] & 0xFFFF)) {
5103         case E1000_VF_SET_MAC_ADDR:
5104                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5105                 break;
5106         case E1000_VF_SET_PROMISC:
5107                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5108                 break;
5109         case E1000_VF_SET_MULTICAST:
5110                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5111                 break;
5112         case E1000_VF_SET_LPE:
5113                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5114                 break;
5115         case E1000_VF_SET_VLAN:
5116                 if (adapter->vf_data[vf].pf_vlan)
5117                         retval = -1;
5118                 else
5119                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5120                 break;
5121         default:
5122                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5123                 retval = -1;
5124                 break;
5125         }
5126
5127         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5128 out:
5129         /* notify the VF of the results of what it sent us */
5130         if (retval)
5131                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5132         else
5133                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5134
5135         igb_write_mbx(hw, msgbuf, 1, vf);
5136 }
5137
5138 static void igb_msg_task(struct igb_adapter *adapter)
5139 {
5140         struct e1000_hw *hw = &adapter->hw;
5141         u32 vf;
5142
5143         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5144                 /* process any reset requests */
5145                 if (!igb_check_for_rst(hw, vf))
5146                         igb_vf_reset_event(adapter, vf);
5147
5148                 /* process any messages pending */
5149                 if (!igb_check_for_msg(hw, vf))
5150                         igb_rcv_msg_from_vf(adapter, vf);
5151
5152                 /* process any acks */
5153                 if (!igb_check_for_ack(hw, vf))
5154                         igb_rcv_ack_from_vf(adapter, vf);
5155         }
5156 }
5157
5158 /**
5159  *  igb_set_uta - Set unicast filter table address
5160  *  @adapter: board private structure
5161  *
5162  *  The unicast table address is a register array of 32-bit registers.
5163  *  The table is meant to be used in a way similar to how the MTA is used
5164  *  however due to certain limitations in the hardware it is necessary to
5165  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5166  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5167  **/
5168 static void igb_set_uta(struct igb_adapter *adapter)
5169 {
5170         struct e1000_hw *hw = &adapter->hw;
5171         int i;
5172
5173         /* The UTA table only exists on 82576 hardware and newer */
5174         if (hw->mac.type < e1000_82576)
5175                 return;
5176
5177         /* we only need to do this if VMDq is enabled */
5178         if (!adapter->vfs_allocated_count)
5179                 return;
5180
5181         for (i = 0; i < hw->mac.uta_reg_count; i++)
5182                 array_wr32(E1000_UTA, i, ~0);
5183 }
5184
5185 /**
5186  * igb_intr_msi - Interrupt Handler
5187  * @irq: interrupt number
5188  * @data: pointer to a network interface device structure
5189  **/
5190 static irqreturn_t igb_intr_msi(int irq, void *data)
5191 {
5192         struct igb_adapter *adapter = data;
5193         struct igb_q_vector *q_vector = adapter->q_vector[0];
5194         struct e1000_hw *hw = &adapter->hw;
5195         /* read ICR disables interrupts using IAM */
5196         u32 icr = rd32(E1000_ICR);
5197
5198         igb_write_itr(q_vector);
5199
5200         if (icr & E1000_ICR_DRSTA)
5201                 schedule_work(&adapter->reset_task);
5202
5203         if (icr & E1000_ICR_DOUTSYNC) {
5204                 /* HW is reporting DMA is out of sync */
5205                 adapter->stats.doosync++;
5206         }
5207
5208         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5209                 hw->mac.get_link_status = 1;
5210                 if (!test_bit(__IGB_DOWN, &adapter->state))
5211                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5212         }
5213
5214         napi_schedule(&q_vector->napi);
5215
5216         return IRQ_HANDLED;
5217 }
5218
5219 /**
5220  * igb_intr - Legacy Interrupt Handler
5221  * @irq: interrupt number
5222  * @data: pointer to a network interface device structure
5223  **/
5224 static irqreturn_t igb_intr(int irq, void *data)
5225 {
5226         struct igb_adapter *adapter = data;
5227         struct igb_q_vector *q_vector = adapter->q_vector[0];
5228         struct e1000_hw *hw = &adapter->hw;
5229         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5230          * need for the IMC write */
5231         u32 icr = rd32(E1000_ICR);
5232         if (!icr)
5233                 return IRQ_NONE;  /* Not our interrupt */
5234
5235         igb_write_itr(q_vector);
5236
5237         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5238          * not set, then the adapter didn't send an interrupt */
5239         if (!(icr & E1000_ICR_INT_ASSERTED))
5240                 return IRQ_NONE;
5241
5242         if (icr & E1000_ICR_DRSTA)
5243                 schedule_work(&adapter->reset_task);
5244
5245         if (icr & E1000_ICR_DOUTSYNC) {
5246                 /* HW is reporting DMA is out of sync */
5247                 adapter->stats.doosync++;
5248         }
5249
5250         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5251                 hw->mac.get_link_status = 1;
5252                 /* guard against interrupt when we're going down */
5253                 if (!test_bit(__IGB_DOWN, &adapter->state))
5254                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5255         }
5256
5257         napi_schedule(&q_vector->napi);
5258
5259         return IRQ_HANDLED;
5260 }
5261
5262 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5263 {
5264         struct igb_adapter *adapter = q_vector->adapter;
5265         struct e1000_hw *hw = &adapter->hw;
5266
5267         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5268             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5269                 if (!adapter->msix_entries)
5270                         igb_set_itr(adapter);
5271                 else
5272                         igb_update_ring_itr(q_vector);
5273         }
5274
5275         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5276                 if (adapter->msix_entries)
5277                         wr32(E1000_EIMS, q_vector->eims_value);
5278                 else
5279                         igb_irq_enable(adapter);
5280         }
5281 }
5282
5283 /**
5284  * igb_poll - NAPI Rx polling callback
5285  * @napi: napi polling structure
5286  * @budget: count of how many packets we should handle
5287  **/
5288 static int igb_poll(struct napi_struct *napi, int budget)
5289 {
5290         struct igb_q_vector *q_vector = container_of(napi,
5291                                                      struct igb_q_vector,
5292                                                      napi);
5293         int tx_clean_complete = 1, work_done = 0;
5294
5295 #ifdef CONFIG_IGB_DCA
5296         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5297                 igb_update_dca(q_vector);
5298 #endif
5299         if (q_vector->tx_ring)
5300                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5301
5302         if (q_vector->rx_ring)
5303                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5304
5305         if (!tx_clean_complete)
5306                 work_done = budget;
5307
5308         /* If not enough Rx work done, exit the polling mode */
5309         if (work_done < budget) {
5310                 napi_complete(napi);
5311                 igb_ring_irq_enable(q_vector);
5312         }
5313
5314         return work_done;
5315 }
5316
5317 /**
5318  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5319  * @adapter: board private structure
5320  * @shhwtstamps: timestamp structure to update
5321  * @regval: unsigned 64bit system time value.
5322  *
5323  * We need to convert the system time value stored in the RX/TXSTMP registers
5324  * into a hwtstamp which can be used by the upper level timestamping functions
5325  */
5326 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5327                                    struct skb_shared_hwtstamps *shhwtstamps,
5328                                    u64 regval)
5329 {
5330         u64 ns;
5331
5332         /*
5333          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5334          * 24 to match clock shift we setup earlier.
5335          */
5336         if (adapter->hw.mac.type == e1000_82580)
5337                 regval <<= IGB_82580_TSYNC_SHIFT;
5338
5339         ns = timecounter_cyc2time(&adapter->clock, regval);
5340         timecompare_update(&adapter->compare, ns);
5341         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5342         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5343         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5344 }
5345
5346 /**
5347  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5348  * @q_vector: pointer to q_vector containing needed info
5349  * @buffer: pointer to igb_buffer structure
5350  *
5351  * If we were asked to do hardware stamping and such a time stamp is
5352  * available, then it must have been for this skb here because we only
5353  * allow only one such packet into the queue.
5354  */
5355 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5356 {
5357         struct igb_adapter *adapter = q_vector->adapter;
5358         struct e1000_hw *hw = &adapter->hw;
5359         struct skb_shared_hwtstamps shhwtstamps;
5360         u64 regval;
5361
5362         /* if skb does not support hw timestamp or TX stamp not valid exit */
5363         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5364             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5365                 return;
5366
5367         regval = rd32(E1000_TXSTMPL);
5368         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5369
5370         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5371         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5372 }
5373
5374 /**
5375  * igb_clean_tx_irq - Reclaim resources after transmit completes
5376  * @q_vector: pointer to q_vector containing needed info
5377  * returns true if ring is completely cleaned
5378  **/
5379 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5380 {
5381         struct igb_adapter *adapter = q_vector->adapter;
5382         struct igb_ring *tx_ring = q_vector->tx_ring;
5383         struct net_device *netdev = tx_ring->netdev;
5384         struct e1000_hw *hw = &adapter->hw;
5385         struct igb_buffer *buffer_info;
5386         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5387         unsigned int total_bytes = 0, total_packets = 0;
5388         unsigned int i, eop, count = 0;
5389         bool cleaned = false;
5390
5391         i = tx_ring->next_to_clean;
5392         eop = tx_ring->buffer_info[i].next_to_watch;
5393         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5394
5395         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5396                (count < tx_ring->count)) {
5397                 rmb();  /* read buffer_info after eop_desc status */
5398                 for (cleaned = false; !cleaned; count++) {
5399                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5400                         buffer_info = &tx_ring->buffer_info[i];
5401                         cleaned = (i == eop);
5402
5403                         if (buffer_info->skb) {
5404                                 total_bytes += buffer_info->bytecount;
5405                                 /* gso_segs is currently only valid for tcp */
5406                                 total_packets += buffer_info->gso_segs;
5407                                 igb_tx_hwtstamp(q_vector, buffer_info);
5408                         }
5409
5410                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5411                         tx_desc->wb.status = 0;
5412
5413                         i++;
5414                         if (i == tx_ring->count)
5415                                 i = 0;
5416                 }
5417                 eop = tx_ring->buffer_info[i].next_to_watch;
5418                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5419         }
5420
5421         tx_ring->next_to_clean = i;
5422
5423         if (unlikely(count &&
5424                      netif_carrier_ok(netdev) &&
5425                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5426                 /* Make sure that anybody stopping the queue after this
5427                  * sees the new next_to_clean.
5428                  */
5429                 smp_mb();
5430                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5431                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5432                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5433
5434                         u64_stats_update_begin(&tx_ring->tx_syncp);
5435                         tx_ring->tx_stats.restart_queue++;
5436                         u64_stats_update_end(&tx_ring->tx_syncp);
5437                 }
5438         }
5439
5440         if (tx_ring->detect_tx_hung) {
5441                 /* Detect a transmit hang in hardware, this serializes the
5442                  * check with the clearing of time_stamp and movement of i */
5443                 tx_ring->detect_tx_hung = false;
5444                 if (tx_ring->buffer_info[i].time_stamp &&
5445                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5446                                (adapter->tx_timeout_factor * HZ)) &&
5447                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5448
5449                         /* detected Tx unit hang */
5450                         dev_err(tx_ring->dev,
5451                                 "Detected Tx Unit Hang\n"
5452                                 "  Tx Queue             <%d>\n"
5453                                 "  TDH                  <%x>\n"
5454                                 "  TDT                  <%x>\n"
5455                                 "  next_to_use          <%x>\n"
5456                                 "  next_to_clean        <%x>\n"
5457                                 "buffer_info[next_to_clean]\n"
5458                                 "  time_stamp           <%lx>\n"
5459                                 "  next_to_watch        <%x>\n"
5460                                 "  jiffies              <%lx>\n"
5461                                 "  desc.status          <%x>\n",
5462                                 tx_ring->queue_index,
5463                                 readl(tx_ring->head),
5464                                 readl(tx_ring->tail),
5465                                 tx_ring->next_to_use,
5466                                 tx_ring->next_to_clean,
5467                                 tx_ring->buffer_info[eop].time_stamp,
5468                                 eop,
5469                                 jiffies,
5470                                 eop_desc->wb.status);
5471                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5472                 }
5473         }
5474         tx_ring->total_bytes += total_bytes;
5475         tx_ring->total_packets += total_packets;
5476         u64_stats_update_begin(&tx_ring->tx_syncp);
5477         tx_ring->tx_stats.bytes += total_bytes;
5478         tx_ring->tx_stats.packets += total_packets;
5479         u64_stats_update_end(&tx_ring->tx_syncp);
5480         return count < tx_ring->count;
5481 }
5482
5483 /**
5484  * igb_receive_skb - helper function to handle rx indications
5485  * @q_vector: structure containing interrupt and ring information
5486  * @skb: packet to send up
5487  * @vlan_tag: vlan tag for packet
5488  **/
5489 static void igb_receive_skb(struct igb_q_vector *q_vector,
5490                             struct sk_buff *skb,
5491                             u16 vlan_tag)
5492 {
5493         struct igb_adapter *adapter = q_vector->adapter;
5494
5495         if (vlan_tag && adapter->vlgrp)
5496                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5497                                  vlan_tag, skb);
5498         else
5499                 napi_gro_receive(&q_vector->napi, skb);
5500 }
5501
5502 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5503                                        u32 status_err, struct sk_buff *skb)
5504 {
5505         skb_checksum_none_assert(skb);
5506
5507         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5508         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5509              (status_err & E1000_RXD_STAT_IXSM))
5510                 return;
5511
5512         /* TCP/UDP checksum error bit is set */
5513         if (status_err &
5514             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5515                 /*
5516                  * work around errata with sctp packets where the TCPE aka
5517                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5518                  * packets, (aka let the stack check the crc32c)
5519                  */
5520                 if ((skb->len == 60) &&
5521                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5522                         u64_stats_update_begin(&ring->rx_syncp);
5523                         ring->rx_stats.csum_err++;
5524                         u64_stats_update_end(&ring->rx_syncp);
5525                 }
5526                 /* let the stack verify checksum errors */
5527                 return;
5528         }
5529         /* It must be a TCP or UDP packet with a valid checksum */
5530         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5531                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5532
5533         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5534 }
5535
5536 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5537                                    struct sk_buff *skb)
5538 {
5539         struct igb_adapter *adapter = q_vector->adapter;
5540         struct e1000_hw *hw = &adapter->hw;
5541         u64 regval;
5542
5543         /*
5544          * If this bit is set, then the RX registers contain the time stamp. No
5545          * other packet will be time stamped until we read these registers, so
5546          * read the registers to make them available again. Because only one
5547          * packet can be time stamped at a time, we know that the register
5548          * values must belong to this one here and therefore we don't need to
5549          * compare any of the additional attributes stored for it.
5550          *
5551          * If nothing went wrong, then it should have a shared tx_flags that we
5552          * can turn into a skb_shared_hwtstamps.
5553          */
5554         if (staterr & E1000_RXDADV_STAT_TSIP) {
5555                 u32 *stamp = (u32 *)skb->data;
5556                 regval = le32_to_cpu(*(stamp + 2));
5557                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5558                 skb_pull(skb, IGB_TS_HDR_LEN);
5559         } else {
5560                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5561                         return;
5562
5563                 regval = rd32(E1000_RXSTMPL);
5564                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5565         }
5566
5567         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5568 }
5569 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5570                                union e1000_adv_rx_desc *rx_desc)
5571 {
5572         /* HW will not DMA in data larger than the given buffer, even if it
5573          * parses the (NFS, of course) header to be larger.  In that case, it
5574          * fills the header buffer and spills the rest into the page.
5575          */
5576         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5577                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5578         if (hlen > rx_ring->rx_buffer_len)
5579                 hlen = rx_ring->rx_buffer_len;
5580         return hlen;
5581 }
5582
5583 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5584                                  int *work_done, int budget)
5585 {
5586         struct igb_ring *rx_ring = q_vector->rx_ring;
5587         struct net_device *netdev = rx_ring->netdev;
5588         struct device *dev = rx_ring->dev;
5589         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5590         struct igb_buffer *buffer_info , *next_buffer;
5591         struct sk_buff *skb;
5592         bool cleaned = false;
5593         int cleaned_count = 0;
5594         int current_node = numa_node_id();
5595         unsigned int total_bytes = 0, total_packets = 0;
5596         unsigned int i;
5597         u32 staterr;
5598         u16 length;
5599         u16 vlan_tag;
5600
5601         i = rx_ring->next_to_clean;
5602         buffer_info = &rx_ring->buffer_info[i];
5603         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5604         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5605
5606         while (staterr & E1000_RXD_STAT_DD) {
5607                 if (*work_done >= budget)
5608                         break;
5609                 (*work_done)++;
5610                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5611
5612                 skb = buffer_info->skb;
5613                 prefetch(skb->data - NET_IP_ALIGN);
5614                 buffer_info->skb = NULL;
5615
5616                 i++;
5617                 if (i == rx_ring->count)
5618                         i = 0;
5619
5620                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5621                 prefetch(next_rxd);
5622                 next_buffer = &rx_ring->buffer_info[i];
5623
5624                 length = le16_to_cpu(rx_desc->wb.upper.length);
5625                 cleaned = true;
5626                 cleaned_count++;
5627
5628                 if (buffer_info->dma) {
5629                         dma_unmap_single(dev, buffer_info->dma,
5630                                          rx_ring->rx_buffer_len,
5631                                          DMA_FROM_DEVICE);
5632                         buffer_info->dma = 0;
5633                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5634                                 skb_put(skb, length);
5635                                 goto send_up;
5636                         }
5637                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5638                 }
5639
5640                 if (length) {
5641                         dma_unmap_page(dev, buffer_info->page_dma,
5642                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5643                         buffer_info->page_dma = 0;
5644
5645                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5646                                                 buffer_info->page,
5647                                                 buffer_info->page_offset,
5648                                                 length);
5649
5650                         if ((page_count(buffer_info->page) != 1) ||
5651                             (page_to_nid(buffer_info->page) != current_node))
5652                                 buffer_info->page = NULL;
5653                         else
5654                                 get_page(buffer_info->page);
5655
5656                         skb->len += length;
5657                         skb->data_len += length;
5658                         skb->truesize += length;
5659                 }
5660
5661                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5662                         buffer_info->skb = next_buffer->skb;
5663                         buffer_info->dma = next_buffer->dma;
5664                         next_buffer->skb = skb;
5665                         next_buffer->dma = 0;
5666                         goto next_desc;
5667                 }
5668 send_up:
5669                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5670                         dev_kfree_skb_irq(skb);
5671                         goto next_desc;
5672                 }
5673
5674                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5675                         igb_rx_hwtstamp(q_vector, staterr, skb);
5676                 total_bytes += skb->len;
5677                 total_packets++;
5678
5679                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5680
5681                 skb->protocol = eth_type_trans(skb, netdev);
5682                 skb_record_rx_queue(skb, rx_ring->queue_index);
5683
5684                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5685                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5686
5687                 igb_receive_skb(q_vector, skb, vlan_tag);
5688
5689 next_desc:
5690                 rx_desc->wb.upper.status_error = 0;
5691
5692                 /* return some buffers to hardware, one at a time is too slow */
5693                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5694                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5695                         cleaned_count = 0;
5696                 }
5697
5698                 /* use prefetched values */
5699                 rx_desc = next_rxd;
5700                 buffer_info = next_buffer;
5701                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5702         }
5703
5704         rx_ring->next_to_clean = i;
5705         cleaned_count = igb_desc_unused(rx_ring);
5706
5707         if (cleaned_count)
5708                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5709
5710         rx_ring->total_packets += total_packets;
5711         rx_ring->total_bytes += total_bytes;
5712         u64_stats_update_begin(&rx_ring->rx_syncp);
5713         rx_ring->rx_stats.packets += total_packets;
5714         rx_ring->rx_stats.bytes += total_bytes;
5715         u64_stats_update_end(&rx_ring->rx_syncp);
5716         return cleaned;
5717 }
5718
5719 /**
5720  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5721  * @adapter: address of board private structure
5722  **/
5723 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5724 {
5725         struct net_device *netdev = rx_ring->netdev;
5726         union e1000_adv_rx_desc *rx_desc;
5727         struct igb_buffer *buffer_info;
5728         struct sk_buff *skb;
5729         unsigned int i;
5730         int bufsz;
5731
5732         i = rx_ring->next_to_use;
5733         buffer_info = &rx_ring->buffer_info[i];
5734
5735         bufsz = rx_ring->rx_buffer_len;
5736
5737         while (cleaned_count--) {
5738                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5739
5740                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5741                         if (!buffer_info->page) {
5742                                 buffer_info->page = netdev_alloc_page(netdev);
5743                                 if (unlikely(!buffer_info->page)) {
5744                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5745                                         rx_ring->rx_stats.alloc_failed++;
5746                                         u64_stats_update_end(&rx_ring->rx_syncp);
5747                                         goto no_buffers;
5748                                 }
5749                                 buffer_info->page_offset = 0;
5750                         } else {
5751                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5752                         }
5753                         buffer_info->page_dma =
5754                                 dma_map_page(rx_ring->dev, buffer_info->page,
5755                                              buffer_info->page_offset,
5756                                              PAGE_SIZE / 2,
5757                                              DMA_FROM_DEVICE);
5758                         if (dma_mapping_error(rx_ring->dev,
5759                                               buffer_info->page_dma)) {
5760                                 buffer_info->page_dma = 0;
5761                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5762                                 rx_ring->rx_stats.alloc_failed++;
5763                                 u64_stats_update_end(&rx_ring->rx_syncp);
5764                                 goto no_buffers;
5765                         }
5766                 }
5767
5768                 skb = buffer_info->skb;
5769                 if (!skb) {
5770                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5771                         if (unlikely(!skb)) {
5772                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5773                                 rx_ring->rx_stats.alloc_failed++;
5774                                 u64_stats_update_end(&rx_ring->rx_syncp);
5775                                 goto no_buffers;
5776                         }
5777
5778                         buffer_info->skb = skb;
5779                 }
5780                 if (!buffer_info->dma) {
5781                         buffer_info->dma = dma_map_single(rx_ring->dev,
5782                                                           skb->data,
5783                                                           bufsz,
5784                                                           DMA_FROM_DEVICE);
5785                         if (dma_mapping_error(rx_ring->dev,
5786                                               buffer_info->dma)) {
5787                                 buffer_info->dma = 0;
5788                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5789                                 rx_ring->rx_stats.alloc_failed++;
5790                                 u64_stats_update_end(&rx_ring->rx_syncp);
5791                                 goto no_buffers;
5792                         }
5793                 }
5794                 /* Refresh the desc even if buffer_addrs didn't change because
5795                  * each write-back erases this info. */
5796                 if (bufsz < IGB_RXBUFFER_1024) {
5797                         rx_desc->read.pkt_addr =
5798                              cpu_to_le64(buffer_info->page_dma);
5799                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5800                 } else {
5801                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5802                         rx_desc->read.hdr_addr = 0;
5803                 }
5804
5805                 i++;
5806                 if (i == rx_ring->count)
5807                         i = 0;
5808                 buffer_info = &rx_ring->buffer_info[i];
5809         }
5810
5811 no_buffers:
5812         if (rx_ring->next_to_use != i) {
5813                 rx_ring->next_to_use = i;
5814                 if (i == 0)
5815                         i = (rx_ring->count - 1);
5816                 else
5817                         i--;
5818
5819                 /* Force memory writes to complete before letting h/w
5820                  * know there are new descriptors to fetch.  (Only
5821                  * applicable for weak-ordered memory model archs,
5822                  * such as IA-64). */
5823                 wmb();
5824                 writel(i, rx_ring->tail);
5825         }
5826 }
5827
5828 /**
5829  * igb_mii_ioctl -
5830  * @netdev:
5831  * @ifreq:
5832  * @cmd:
5833  **/
5834 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5835 {
5836         struct igb_adapter *adapter = netdev_priv(netdev);
5837         struct mii_ioctl_data *data = if_mii(ifr);
5838
5839         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5840                 return -EOPNOTSUPP;
5841
5842         switch (cmd) {
5843         case SIOCGMIIPHY:
5844                 data->phy_id = adapter->hw.phy.addr;
5845                 break;
5846         case SIOCGMIIREG:
5847                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5848                                      &data->val_out))
5849                         return -EIO;
5850                 break;
5851         case SIOCSMIIREG:
5852         default:
5853                 return -EOPNOTSUPP;
5854         }
5855         return 0;
5856 }
5857
5858 /**
5859  * igb_hwtstamp_ioctl - control hardware time stamping
5860  * @netdev:
5861  * @ifreq:
5862  * @cmd:
5863  *
5864  * Outgoing time stamping can be enabled and disabled. Play nice and
5865  * disable it when requested, although it shouldn't case any overhead
5866  * when no packet needs it. At most one packet in the queue may be
5867  * marked for time stamping, otherwise it would be impossible to tell
5868  * for sure to which packet the hardware time stamp belongs.
5869  *
5870  * Incoming time stamping has to be configured via the hardware
5871  * filters. Not all combinations are supported, in particular event
5872  * type has to be specified. Matching the kind of event packet is
5873  * not supported, with the exception of "all V2 events regardless of
5874  * level 2 or 4".
5875  *
5876  **/
5877 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5878                               struct ifreq *ifr, int cmd)
5879 {
5880         struct igb_adapter *adapter = netdev_priv(netdev);
5881         struct e1000_hw *hw = &adapter->hw;
5882         struct hwtstamp_config config;
5883         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5884         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5885         u32 tsync_rx_cfg = 0;
5886         bool is_l4 = false;
5887         bool is_l2 = false;
5888         u32 regval;
5889
5890         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5891                 return -EFAULT;
5892
5893         /* reserved for future extensions */
5894         if (config.flags)
5895                 return -EINVAL;
5896
5897         switch (config.tx_type) {
5898         case HWTSTAMP_TX_OFF:
5899                 tsync_tx_ctl = 0;
5900         case HWTSTAMP_TX_ON:
5901                 break;
5902         default:
5903                 return -ERANGE;
5904         }
5905
5906         switch (config.rx_filter) {
5907         case HWTSTAMP_FILTER_NONE:
5908                 tsync_rx_ctl = 0;
5909                 break;
5910         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5911         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5912         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5913         case HWTSTAMP_FILTER_ALL:
5914                 /*
5915                  * register TSYNCRXCFG must be set, therefore it is not
5916                  * possible to time stamp both Sync and Delay_Req messages
5917                  * => fall back to time stamping all packets
5918                  */
5919                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5920                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5921                 break;
5922         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5923                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5924                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5925                 is_l4 = true;
5926                 break;
5927         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5928                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5929                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5930                 is_l4 = true;
5931                 break;
5932         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5933         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5934                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5935                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5936                 is_l2 = true;
5937                 is_l4 = true;
5938                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5939                 break;
5940         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5941         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5942                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5943                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5944                 is_l2 = true;
5945                 is_l4 = true;
5946                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5947                 break;
5948         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5949         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5950         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5951                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5952                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5953                 is_l2 = true;
5954                 break;
5955         default:
5956                 return -ERANGE;
5957         }
5958
5959         if (hw->mac.type == e1000_82575) {
5960                 if (tsync_rx_ctl | tsync_tx_ctl)
5961                         return -EINVAL;
5962                 return 0;
5963         }
5964
5965         /*
5966          * Per-packet timestamping only works if all packets are
5967          * timestamped, so enable timestamping in all packets as
5968          * long as one rx filter was configured.
5969          */
5970         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5971                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5972                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5973         }
5974
5975         /* enable/disable TX */
5976         regval = rd32(E1000_TSYNCTXCTL);
5977         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5978         regval |= tsync_tx_ctl;
5979         wr32(E1000_TSYNCTXCTL, regval);
5980
5981         /* enable/disable RX */
5982         regval = rd32(E1000_TSYNCRXCTL);
5983         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5984         regval |= tsync_rx_ctl;
5985         wr32(E1000_TSYNCRXCTL, regval);
5986
5987         /* define which PTP packets are time stamped */
5988         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5989
5990         /* define ethertype filter for timestamped packets */
5991         if (is_l2)
5992                 wr32(E1000_ETQF(3),
5993                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5994                                  E1000_ETQF_1588 | /* enable timestamping */
5995                                  ETH_P_1588));     /* 1588 eth protocol type */
5996         else
5997                 wr32(E1000_ETQF(3), 0);
5998
5999 #define PTP_PORT 319
6000         /* L4 Queue Filter[3]: filter by destination port and protocol */
6001         if (is_l4) {
6002                 u32 ftqf = (IPPROTO_UDP /* UDP */
6003                         | E1000_FTQF_VF_BP /* VF not compared */
6004                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6005                         | E1000_FTQF_MASK); /* mask all inputs */
6006                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6007
6008                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6009                 wr32(E1000_IMIREXT(3),
6010                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6011                 if (hw->mac.type == e1000_82576) {
6012                         /* enable source port check */
6013                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6014                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6015                 }
6016                 wr32(E1000_FTQF(3), ftqf);
6017         } else {
6018                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6019         }
6020         wrfl();
6021
6022         adapter->hwtstamp_config = config;
6023
6024         /* clear TX/RX time stamp registers, just to be sure */
6025         regval = rd32(E1000_TXSTMPH);
6026         regval = rd32(E1000_RXSTMPH);
6027
6028         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6029                 -EFAULT : 0;
6030 }
6031
6032 /**
6033  * igb_ioctl -
6034  * @netdev:
6035  * @ifreq:
6036  * @cmd:
6037  **/
6038 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6039 {
6040         switch (cmd) {
6041         case SIOCGMIIPHY:
6042         case SIOCGMIIREG:
6043         case SIOCSMIIREG:
6044                 return igb_mii_ioctl(netdev, ifr, cmd);
6045         case SIOCSHWTSTAMP:
6046                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6047         default:
6048                 return -EOPNOTSUPP;
6049         }
6050 }
6051
6052 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6053 {
6054         struct igb_adapter *adapter = hw->back;
6055         u16 cap_offset;
6056
6057         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6058         if (!cap_offset)
6059                 return -E1000_ERR_CONFIG;
6060
6061         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6062
6063         return 0;
6064 }
6065
6066 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6067 {
6068         struct igb_adapter *adapter = hw->back;
6069         u16 cap_offset;
6070
6071         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6072         if (!cap_offset)
6073                 return -E1000_ERR_CONFIG;
6074
6075         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6076
6077         return 0;
6078 }
6079
6080 static void igb_vlan_rx_register(struct net_device *netdev,
6081                                  struct vlan_group *grp)
6082 {
6083         struct igb_adapter *adapter = netdev_priv(netdev);
6084         struct e1000_hw *hw = &adapter->hw;
6085         u32 ctrl, rctl;
6086
6087         igb_irq_disable(adapter);
6088         adapter->vlgrp = grp;
6089
6090         if (grp) {
6091                 /* enable VLAN tag insert/strip */
6092                 ctrl = rd32(E1000_CTRL);
6093                 ctrl |= E1000_CTRL_VME;
6094                 wr32(E1000_CTRL, ctrl);
6095
6096                 /* Disable CFI check */
6097                 rctl = rd32(E1000_RCTL);
6098                 rctl &= ~E1000_RCTL_CFIEN;
6099                 wr32(E1000_RCTL, rctl);
6100         } else {
6101                 /* disable VLAN tag insert/strip */
6102                 ctrl = rd32(E1000_CTRL);
6103                 ctrl &= ~E1000_CTRL_VME;
6104                 wr32(E1000_CTRL, ctrl);
6105         }
6106
6107         igb_rlpml_set(adapter);
6108
6109         if (!test_bit(__IGB_DOWN, &adapter->state))
6110                 igb_irq_enable(adapter);
6111 }
6112
6113 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6114 {
6115         struct igb_adapter *adapter = netdev_priv(netdev);
6116         struct e1000_hw *hw = &adapter->hw;
6117         int pf_id = adapter->vfs_allocated_count;
6118
6119         /* attempt to add filter to vlvf array */
6120         igb_vlvf_set(adapter, vid, true, pf_id);
6121
6122         /* add the filter since PF can receive vlans w/o entry in vlvf */
6123         igb_vfta_set(hw, vid, true);
6124 }
6125
6126 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6127 {
6128         struct igb_adapter *adapter = netdev_priv(netdev);
6129         struct e1000_hw *hw = &adapter->hw;
6130         int pf_id = adapter->vfs_allocated_count;
6131         s32 err;
6132
6133         igb_irq_disable(adapter);
6134         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6135
6136         if (!test_bit(__IGB_DOWN, &adapter->state))
6137                 igb_irq_enable(adapter);
6138
6139         /* remove vlan from VLVF table array */
6140         err = igb_vlvf_set(adapter, vid, false, pf_id);
6141
6142         /* if vid was not present in VLVF just remove it from table */
6143         if (err)
6144                 igb_vfta_set(hw, vid, false);
6145 }
6146
6147 static void igb_restore_vlan(struct igb_adapter *adapter)
6148 {
6149         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6150
6151         if (adapter->vlgrp) {
6152                 u16 vid;
6153                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6154                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6155                                 continue;
6156                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6157                 }
6158         }
6159 }
6160
6161 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6162 {
6163         struct pci_dev *pdev = adapter->pdev;
6164         struct e1000_mac_info *mac = &adapter->hw.mac;
6165
6166         mac->autoneg = 0;
6167
6168         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6169         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6170                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6171                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6172                 return -EINVAL;
6173         }
6174
6175         switch (spddplx) {
6176         case SPEED_10 + DUPLEX_HALF:
6177                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6178                 break;
6179         case SPEED_10 + DUPLEX_FULL:
6180                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6181                 break;
6182         case SPEED_100 + DUPLEX_HALF:
6183                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6184                 break;
6185         case SPEED_100 + DUPLEX_FULL:
6186                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6187                 break;
6188         case SPEED_1000 + DUPLEX_FULL:
6189                 mac->autoneg = 1;
6190                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6191                 break;
6192         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6193         default:
6194                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6195                 return -EINVAL;
6196         }
6197         return 0;
6198 }
6199
6200 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6201 {
6202         struct net_device *netdev = pci_get_drvdata(pdev);
6203         struct igb_adapter *adapter = netdev_priv(netdev);
6204         struct e1000_hw *hw = &adapter->hw;
6205         u32 ctrl, rctl, status;
6206         u32 wufc = adapter->wol;
6207 #ifdef CONFIG_PM
6208         int retval = 0;
6209 #endif
6210
6211         netif_device_detach(netdev);
6212
6213         if (netif_running(netdev))
6214                 igb_close(netdev);
6215
6216         igb_clear_interrupt_scheme(adapter);
6217
6218 #ifdef CONFIG_PM
6219         retval = pci_save_state(pdev);
6220         if (retval)
6221                 return retval;
6222 #endif
6223
6224         status = rd32(E1000_STATUS);
6225         if (status & E1000_STATUS_LU)
6226                 wufc &= ~E1000_WUFC_LNKC;
6227
6228         if (wufc) {
6229                 igb_setup_rctl(adapter);
6230                 igb_set_rx_mode(netdev);
6231
6232                 /* turn on all-multi mode if wake on multicast is enabled */
6233                 if (wufc & E1000_WUFC_MC) {
6234                         rctl = rd32(E1000_RCTL);
6235                         rctl |= E1000_RCTL_MPE;
6236                         wr32(E1000_RCTL, rctl);
6237                 }
6238
6239                 ctrl = rd32(E1000_CTRL);
6240                 /* advertise wake from D3Cold */
6241                 #define E1000_CTRL_ADVD3WUC 0x00100000
6242                 /* phy power management enable */
6243                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6244                 ctrl |= E1000_CTRL_ADVD3WUC;
6245                 wr32(E1000_CTRL, ctrl);
6246
6247                 /* Allow time for pending master requests to run */
6248                 igb_disable_pcie_master(hw);
6249
6250                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6251                 wr32(E1000_WUFC, wufc);
6252         } else {
6253                 wr32(E1000_WUC, 0);
6254                 wr32(E1000_WUFC, 0);
6255         }
6256
6257         *enable_wake = wufc || adapter->en_mng_pt;
6258         if (!*enable_wake)
6259                 igb_power_down_link(adapter);
6260         else
6261                 igb_power_up_link(adapter);
6262
6263         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6264          * would have already happened in close and is redundant. */
6265         igb_release_hw_control(adapter);
6266
6267         pci_disable_device(pdev);
6268
6269         return 0;
6270 }
6271
6272 #ifdef CONFIG_PM
6273 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6274 {
6275         int retval;
6276         bool wake;
6277
6278         retval = __igb_shutdown(pdev, &wake);
6279         if (retval)
6280                 return retval;
6281
6282         if (wake) {
6283                 pci_prepare_to_sleep(pdev);
6284         } else {
6285                 pci_wake_from_d3(pdev, false);
6286                 pci_set_power_state(pdev, PCI_D3hot);
6287         }
6288
6289         return 0;
6290 }
6291
6292 static int igb_resume(struct pci_dev *pdev)
6293 {
6294         struct net_device *netdev = pci_get_drvdata(pdev);
6295         struct igb_adapter *adapter = netdev_priv(netdev);
6296         struct e1000_hw *hw = &adapter->hw;
6297         u32 err;
6298
6299         pci_set_power_state(pdev, PCI_D0);
6300         pci_restore_state(pdev);
6301         pci_save_state(pdev);
6302
6303         err = pci_enable_device_mem(pdev);
6304         if (err) {
6305                 dev_err(&pdev->dev,
6306                         "igb: Cannot enable PCI device from suspend\n");
6307                 return err;
6308         }
6309         pci_set_master(pdev);
6310
6311         pci_enable_wake(pdev, PCI_D3hot, 0);
6312         pci_enable_wake(pdev, PCI_D3cold, 0);
6313
6314         if (igb_init_interrupt_scheme(adapter)) {
6315                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6316                 return -ENOMEM;
6317         }
6318
6319         igb_reset(adapter);
6320
6321         /* let the f/w know that the h/w is now under the control of the
6322          * driver. */
6323         igb_get_hw_control(adapter);
6324
6325         wr32(E1000_WUS, ~0);
6326
6327         if (netif_running(netdev)) {
6328                 err = igb_open(netdev);
6329                 if (err)
6330                         return err;
6331         }
6332
6333         netif_device_attach(netdev);
6334
6335         return 0;
6336 }
6337 #endif
6338
6339 static void igb_shutdown(struct pci_dev *pdev)
6340 {
6341         bool wake;
6342
6343         __igb_shutdown(pdev, &wake);
6344
6345         if (system_state == SYSTEM_POWER_OFF) {
6346                 pci_wake_from_d3(pdev, wake);
6347                 pci_set_power_state(pdev, PCI_D3hot);
6348         }
6349 }
6350
6351 #ifdef CONFIG_NET_POLL_CONTROLLER
6352 /*
6353  * Polling 'interrupt' - used by things like netconsole to send skbs
6354  * without having to re-enable interrupts. It's not called while
6355  * the interrupt routine is executing.
6356  */
6357 static void igb_netpoll(struct net_device *netdev)
6358 {
6359         struct igb_adapter *adapter = netdev_priv(netdev);
6360         struct e1000_hw *hw = &adapter->hw;
6361         int i;
6362
6363         if (!adapter->msix_entries) {
6364                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6365                 igb_irq_disable(adapter);
6366                 napi_schedule(&q_vector->napi);
6367                 return;
6368         }
6369
6370         for (i = 0; i < adapter->num_q_vectors; i++) {
6371                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6372                 wr32(E1000_EIMC, q_vector->eims_value);
6373                 napi_schedule(&q_vector->napi);
6374         }
6375 }
6376 #endif /* CONFIG_NET_POLL_CONTROLLER */
6377
6378 /**
6379  * igb_io_error_detected - called when PCI error is detected
6380  * @pdev: Pointer to PCI device
6381  * @state: The current pci connection state
6382  *
6383  * This function is called after a PCI bus error affecting
6384  * this device has been detected.
6385  */
6386 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6387                                               pci_channel_state_t state)
6388 {
6389         struct net_device *netdev = pci_get_drvdata(pdev);
6390         struct igb_adapter *adapter = netdev_priv(netdev);
6391
6392         netif_device_detach(netdev);
6393
6394         if (state == pci_channel_io_perm_failure)
6395                 return PCI_ERS_RESULT_DISCONNECT;
6396
6397         if (netif_running(netdev))
6398                 igb_down(adapter);
6399         pci_disable_device(pdev);
6400
6401         /* Request a slot slot reset. */
6402         return PCI_ERS_RESULT_NEED_RESET;
6403 }
6404
6405 /**
6406  * igb_io_slot_reset - called after the pci bus has been reset.
6407  * @pdev: Pointer to PCI device
6408  *
6409  * Restart the card from scratch, as if from a cold-boot. Implementation
6410  * resembles the first-half of the igb_resume routine.
6411  */
6412 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6413 {
6414         struct net_device *netdev = pci_get_drvdata(pdev);
6415         struct igb_adapter *adapter = netdev_priv(netdev);
6416         struct e1000_hw *hw = &adapter->hw;
6417         pci_ers_result_t result;
6418         int err;
6419
6420         if (pci_enable_device_mem(pdev)) {
6421                 dev_err(&pdev->dev,
6422                         "Cannot re-enable PCI device after reset.\n");
6423                 result = PCI_ERS_RESULT_DISCONNECT;
6424         } else {
6425                 pci_set_master(pdev);
6426                 pci_restore_state(pdev);
6427                 pci_save_state(pdev);
6428
6429                 pci_enable_wake(pdev, PCI_D3hot, 0);
6430                 pci_enable_wake(pdev, PCI_D3cold, 0);
6431
6432                 igb_reset(adapter);
6433                 wr32(E1000_WUS, ~0);
6434                 result = PCI_ERS_RESULT_RECOVERED;
6435         }
6436
6437         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6438         if (err) {
6439                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6440                         "failed 0x%0x\n", err);
6441                 /* non-fatal, continue */
6442         }
6443
6444         return result;
6445 }
6446
6447 /**
6448  * igb_io_resume - called when traffic can start flowing again.
6449  * @pdev: Pointer to PCI device
6450  *
6451  * This callback is called when the error recovery driver tells us that
6452  * its OK to resume normal operation. Implementation resembles the
6453  * second-half of the igb_resume routine.
6454  */
6455 static void igb_io_resume(struct pci_dev *pdev)
6456 {
6457         struct net_device *netdev = pci_get_drvdata(pdev);
6458         struct igb_adapter *adapter = netdev_priv(netdev);
6459
6460         if (netif_running(netdev)) {
6461                 if (igb_up(adapter)) {
6462                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6463                         return;
6464                 }
6465         }
6466
6467         netif_device_attach(netdev);
6468
6469         /* let the f/w know that the h/w is now under the control of the
6470          * driver. */
6471         igb_get_hw_control(adapter);
6472 }
6473
6474 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6475                              u8 qsel)
6476 {
6477         u32 rar_low, rar_high;
6478         struct e1000_hw *hw = &adapter->hw;
6479
6480         /* HW expects these in little endian so we reverse the byte order
6481          * from network order (big endian) to little endian
6482          */
6483         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6484                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6485         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6486
6487         /* Indicate to hardware the Address is Valid. */
6488         rar_high |= E1000_RAH_AV;
6489
6490         if (hw->mac.type == e1000_82575)
6491                 rar_high |= E1000_RAH_POOL_1 * qsel;
6492         else
6493                 rar_high |= E1000_RAH_POOL_1 << qsel;
6494
6495         wr32(E1000_RAL(index), rar_low);
6496         wrfl();
6497         wr32(E1000_RAH(index), rar_high);
6498         wrfl();
6499 }
6500
6501 static int igb_set_vf_mac(struct igb_adapter *adapter,
6502                           int vf, unsigned char *mac_addr)
6503 {
6504         struct e1000_hw *hw = &adapter->hw;
6505         /* VF MAC addresses start at end of receive addresses and moves
6506          * torwards the first, as a result a collision should not be possible */
6507         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6508
6509         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6510
6511         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6512
6513         return 0;
6514 }
6515
6516 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6517 {
6518         struct igb_adapter *adapter = netdev_priv(netdev);
6519         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6520                 return -EINVAL;
6521         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6522         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6523         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6524                                       " change effective.");
6525         if (test_bit(__IGB_DOWN, &adapter->state)) {
6526                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6527                          " but the PF device is not up.\n");
6528                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6529                          " attempting to use the VF device.\n");
6530         }
6531         return igb_set_vf_mac(adapter, vf, mac);
6532 }
6533
6534 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6535 {
6536         return -EOPNOTSUPP;
6537 }
6538
6539 static int igb_ndo_get_vf_config(struct net_device *netdev,
6540                                  int vf, struct ifla_vf_info *ivi)
6541 {
6542         struct igb_adapter *adapter = netdev_priv(netdev);
6543         if (vf >= adapter->vfs_allocated_count)
6544                 return -EINVAL;
6545         ivi->vf = vf;
6546         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6547         ivi->tx_rate = 0;
6548         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6549         ivi->qos = adapter->vf_data[vf].pf_qos;
6550         return 0;
6551 }
6552
6553 static void igb_vmm_control(struct igb_adapter *adapter)
6554 {
6555         struct e1000_hw *hw = &adapter->hw;
6556         u32 reg;
6557
6558         switch (hw->mac.type) {
6559         case e1000_82575:
6560         default:
6561                 /* replication is not supported for 82575 */
6562                 return;
6563         case e1000_82576:
6564                 /* notify HW that the MAC is adding vlan tags */
6565                 reg = rd32(E1000_DTXCTL);
6566                 reg |= E1000_DTXCTL_VLAN_ADDED;
6567                 wr32(E1000_DTXCTL, reg);
6568         case e1000_82580:
6569                 /* enable replication vlan tag stripping */
6570                 reg = rd32(E1000_RPLOLR);
6571                 reg |= E1000_RPLOLR_STRVLAN;
6572                 wr32(E1000_RPLOLR, reg);
6573         case e1000_i350:
6574                 /* none of the above registers are supported by i350 */
6575                 break;
6576         }
6577
6578         if (adapter->vfs_allocated_count) {
6579                 igb_vmdq_set_loopback_pf(hw, true);
6580                 igb_vmdq_set_replication_pf(hw, true);
6581         } else {
6582                 igb_vmdq_set_loopback_pf(hw, false);
6583                 igb_vmdq_set_replication_pf(hw, false);
6584         }
6585 }
6586
6587 /* igb_main.c */