]> Pileus Git - ~andy/linux/blob - drivers/net/ethernet/intel/igb/igb_main.c
Merge tag 'sound-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
[~andy/linux] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 4
64 #define BUILD 7
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108         /* required last entry */
109         {0, }
110 };
111
112 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
113
114 void igb_reset(struct igb_adapter *);
115 static int igb_setup_all_tx_resources(struct igb_adapter *);
116 static int igb_setup_all_rx_resources(struct igb_adapter *);
117 static void igb_free_all_tx_resources(struct igb_adapter *);
118 static void igb_free_all_rx_resources(struct igb_adapter *);
119 static void igb_setup_mrqc(struct igb_adapter *);
120 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121 static void __devexit igb_remove(struct pci_dev *pdev);
122 static int igb_sw_init(struct igb_adapter *);
123 static int igb_open(struct net_device *);
124 static int igb_close(struct net_device *);
125 static void igb_configure_tx(struct igb_adapter *);
126 static void igb_configure_rx(struct igb_adapter *);
127 static void igb_clean_all_tx_rings(struct igb_adapter *);
128 static void igb_clean_all_rx_rings(struct igb_adapter *);
129 static void igb_clean_tx_ring(struct igb_ring *);
130 static void igb_clean_rx_ring(struct igb_ring *);
131 static void igb_set_rx_mode(struct net_device *);
132 static void igb_update_phy_info(unsigned long);
133 static void igb_watchdog(unsigned long);
134 static void igb_watchdog_task(struct work_struct *);
135 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137                                                  struct rtnl_link_stats64 *stats);
138 static int igb_change_mtu(struct net_device *, int);
139 static int igb_set_mac(struct net_device *, void *);
140 static void igb_set_uta(struct igb_adapter *adapter);
141 static irqreturn_t igb_intr(int irq, void *);
142 static irqreturn_t igb_intr_msi(int irq, void *);
143 static irqreturn_t igb_msix_other(int irq, void *);
144 static irqreturn_t igb_msix_ring(int irq, void *);
145 #ifdef CONFIG_IGB_DCA
146 static void igb_update_dca(struct igb_q_vector *);
147 static void igb_setup_dca(struct igb_adapter *);
148 #endif /* CONFIG_IGB_DCA */
149 static int igb_poll(struct napi_struct *, int);
150 static bool igb_clean_tx_irq(struct igb_q_vector *);
151 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153 static void igb_tx_timeout(struct net_device *);
154 static void igb_reset_task(struct work_struct *);
155 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156 static int igb_vlan_rx_add_vid(struct net_device *, u16);
157 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_restore_vlan(struct igb_adapter *);
159 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160 static void igb_ping_all_vfs(struct igb_adapter *);
161 static void igb_msg_task(struct igb_adapter *);
162 static void igb_vmm_control(struct igb_adapter *);
163 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167                                int vf, u16 vlan, u8 qos);
168 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170                                  struct ifla_vf_info *ivi);
171 static void igb_check_vf_rate_limit(struct igb_adapter *);
172
173 #ifdef CONFIG_PCI_IOV
174 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176 static int igb_check_vf_assignment(struct igb_adapter *adapter);
177 #endif
178
179 #ifdef CONFIG_PM
180 #ifdef CONFIG_PM_SLEEP
181 static int igb_suspend(struct device *);
182 #endif
183 static int igb_resume(struct device *);
184 #ifdef CONFIG_PM_RUNTIME
185 static int igb_runtime_suspend(struct device *dev);
186 static int igb_runtime_resume(struct device *dev);
187 static int igb_runtime_idle(struct device *dev);
188 #endif
189 static const struct dev_pm_ops igb_pm_ops = {
190         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
192                         igb_runtime_idle)
193 };
194 #endif
195 static void igb_shutdown(struct pci_dev *);
196 #ifdef CONFIG_IGB_DCA
197 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198 static struct notifier_block dca_notifier = {
199         .notifier_call  = igb_notify_dca,
200         .next           = NULL,
201         .priority       = 0
202 };
203 #endif
204 #ifdef CONFIG_NET_POLL_CONTROLLER
205 /* for netdump / net console */
206 static void igb_netpoll(struct net_device *);
207 #endif
208 #ifdef CONFIG_PCI_IOV
209 static unsigned int max_vfs = 0;
210 module_param(max_vfs, uint, 0);
211 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212                  "per physical function");
213 #endif /* CONFIG_PCI_IOV */
214
215 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216                      pci_channel_state_t);
217 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218 static void igb_io_resume(struct pci_dev *);
219
220 static struct pci_error_handlers igb_err_handler = {
221         .error_detected = igb_io_error_detected,
222         .slot_reset = igb_io_slot_reset,
223         .resume = igb_io_resume,
224 };
225
226 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
227
228 static struct pci_driver igb_driver = {
229         .name     = igb_driver_name,
230         .id_table = igb_pci_tbl,
231         .probe    = igb_probe,
232         .remove   = __devexit_p(igb_remove),
233 #ifdef CONFIG_PM
234         .driver.pm = &igb_pm_ops,
235 #endif
236         .shutdown = igb_shutdown,
237         .err_handler = &igb_err_handler
238 };
239
240 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242 MODULE_LICENSE("GPL");
243 MODULE_VERSION(DRV_VERSION);
244
245 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246 static int debug = -1;
247 module_param(debug, int, 0);
248 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
249
250 struct igb_reg_info {
251         u32 ofs;
252         char *name;
253 };
254
255 static const struct igb_reg_info igb_reg_info_tbl[] = {
256
257         /* General Registers */
258         {E1000_CTRL, "CTRL"},
259         {E1000_STATUS, "STATUS"},
260         {E1000_CTRL_EXT, "CTRL_EXT"},
261
262         /* Interrupt Registers */
263         {E1000_ICR, "ICR"},
264
265         /* RX Registers */
266         {E1000_RCTL, "RCTL"},
267         {E1000_RDLEN(0), "RDLEN"},
268         {E1000_RDH(0), "RDH"},
269         {E1000_RDT(0), "RDT"},
270         {E1000_RXDCTL(0), "RXDCTL"},
271         {E1000_RDBAL(0), "RDBAL"},
272         {E1000_RDBAH(0), "RDBAH"},
273
274         /* TX Registers */
275         {E1000_TCTL, "TCTL"},
276         {E1000_TDBAL(0), "TDBAL"},
277         {E1000_TDBAH(0), "TDBAH"},
278         {E1000_TDLEN(0), "TDLEN"},
279         {E1000_TDH(0), "TDH"},
280         {E1000_TDT(0), "TDT"},
281         {E1000_TXDCTL(0), "TXDCTL"},
282         {E1000_TDFH, "TDFH"},
283         {E1000_TDFT, "TDFT"},
284         {E1000_TDFHS, "TDFHS"},
285         {E1000_TDFPC, "TDFPC"},
286
287         /* List Terminator */
288         {}
289 };
290
291 /*
292  * igb_regdump - register printout routine
293  */
294 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
295 {
296         int n = 0;
297         char rname[16];
298         u32 regs[8];
299
300         switch (reginfo->ofs) {
301         case E1000_RDLEN(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_RDLEN(n));
304                 break;
305         case E1000_RDH(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_RDH(n));
308                 break;
309         case E1000_RDT(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_RDT(n));
312                 break;
313         case E1000_RXDCTL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_RXDCTL(n));
316                 break;
317         case E1000_RDBAL(0):
318                 for (n = 0; n < 4; n++)
319                         regs[n] = rd32(E1000_RDBAL(n));
320                 break;
321         case E1000_RDBAH(0):
322                 for (n = 0; n < 4; n++)
323                         regs[n] = rd32(E1000_RDBAH(n));
324                 break;
325         case E1000_TDBAL(0):
326                 for (n = 0; n < 4; n++)
327                         regs[n] = rd32(E1000_RDBAL(n));
328                 break;
329         case E1000_TDBAH(0):
330                 for (n = 0; n < 4; n++)
331                         regs[n] = rd32(E1000_TDBAH(n));
332                 break;
333         case E1000_TDLEN(0):
334                 for (n = 0; n < 4; n++)
335                         regs[n] = rd32(E1000_TDLEN(n));
336                 break;
337         case E1000_TDH(0):
338                 for (n = 0; n < 4; n++)
339                         regs[n] = rd32(E1000_TDH(n));
340                 break;
341         case E1000_TDT(0):
342                 for (n = 0; n < 4; n++)
343                         regs[n] = rd32(E1000_TDT(n));
344                 break;
345         case E1000_TXDCTL(0):
346                 for (n = 0; n < 4; n++)
347                         regs[n] = rd32(E1000_TXDCTL(n));
348                 break;
349         default:
350                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
351                 return;
352         }
353
354         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
356                 regs[2], regs[3]);
357 }
358
359 /*
360  * igb_dump - Print registers, tx-rings and rx-rings
361  */
362 static void igb_dump(struct igb_adapter *adapter)
363 {
364         struct net_device *netdev = adapter->netdev;
365         struct e1000_hw *hw = &adapter->hw;
366         struct igb_reg_info *reginfo;
367         struct igb_ring *tx_ring;
368         union e1000_adv_tx_desc *tx_desc;
369         struct my_u0 { u64 a; u64 b; } *u0;
370         struct igb_ring *rx_ring;
371         union e1000_adv_rx_desc *rx_desc;
372         u32 staterr;
373         u16 i, n;
374
375         if (!netif_msg_hw(adapter))
376                 return;
377
378         /* Print netdevice Info */
379         if (netdev) {
380                 dev_info(&adapter->pdev->dev, "Net device Info\n");
381                 pr_info("Device Name     state            trans_start      "
382                         "last_rx\n");
383                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384                         netdev->state, netdev->trans_start, netdev->last_rx);
385         }
386
387         /* Print Registers */
388         dev_info(&adapter->pdev->dev, "Register Dump\n");
389         pr_info(" Register Name   Value\n");
390         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391              reginfo->name; reginfo++) {
392                 igb_regdump(hw, reginfo);
393         }
394
395         /* Print TX Ring Summary */
396         if (!netdev || !netif_running(netdev))
397                 goto exit;
398
399         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
401         for (n = 0; n < adapter->num_tx_queues; n++) {
402                 struct igb_tx_buffer *buffer_info;
403                 tx_ring = adapter->tx_ring[n];
404                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
407                         (u64)buffer_info->dma,
408                         buffer_info->length,
409                         buffer_info->next_to_watch,
410                         (u64)buffer_info->time_stamp);
411         }
412
413         /* Print TX Rings */
414         if (!netif_msg_tx_done(adapter))
415                 goto rx_ring_summary;
416
417         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
418
419         /* Transmit Descriptor Formats
420          *
421          * Advanced Transmit Descriptor
422          *   +--------------------------------------------------------------+
423          * 0 |         Buffer Address [63:0]                                |
424          *   +--------------------------------------------------------------+
425          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
426          *   +--------------------------------------------------------------+
427          *   63      46 45    40 39 38 36 35 32 31   24             15       0
428          */
429
430         for (n = 0; n < adapter->num_tx_queues; n++) {
431                 tx_ring = adapter->tx_ring[n];
432                 pr_info("------------------------------------\n");
433                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434                 pr_info("------------------------------------\n");
435                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
436                         "[bi->dma       ] leng  ntw timestamp        "
437                         "bi->skb\n");
438
439                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440                         const char *next_desc;
441                         struct igb_tx_buffer *buffer_info;
442                         tx_desc = IGB_TX_DESC(tx_ring, i);
443                         buffer_info = &tx_ring->tx_buffer_info[i];
444                         u0 = (struct my_u0 *)tx_desc;
445                         if (i == tx_ring->next_to_use &&
446                             i == tx_ring->next_to_clean)
447                                 next_desc = " NTC/U";
448                         else if (i == tx_ring->next_to_use)
449                                 next_desc = " NTU";
450                         else if (i == tx_ring->next_to_clean)
451                                 next_desc = " NTC";
452                         else
453                                 next_desc = "";
454
455                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
456                                 " %04X  %p %016llX %p%s\n", i,
457                                 le64_to_cpu(u0->a),
458                                 le64_to_cpu(u0->b),
459                                 (u64)buffer_info->dma,
460                                 buffer_info->length,
461                                 buffer_info->next_to_watch,
462                                 (u64)buffer_info->time_stamp,
463                                 buffer_info->skb, next_desc);
464
465                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466                                 print_hex_dump(KERN_INFO, "",
467                                         DUMP_PREFIX_ADDRESS,
468                                         16, 1, phys_to_virt(buffer_info->dma),
469                                         buffer_info->length, true);
470                 }
471         }
472
473         /* Print RX Rings Summary */
474 rx_ring_summary:
475         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476         pr_info("Queue [NTU] [NTC]\n");
477         for (n = 0; n < adapter->num_rx_queues; n++) {
478                 rx_ring = adapter->rx_ring[n];
479                 pr_info(" %5d %5X %5X\n",
480                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
481         }
482
483         /* Print RX Rings */
484         if (!netif_msg_rx_status(adapter))
485                 goto exit;
486
487         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
488
489         /* Advanced Receive Descriptor (Read) Format
490          *    63                                           1        0
491          *    +-----------------------------------------------------+
492          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
493          *    +----------------------------------------------+------+
494          *  8 |       Header Buffer Address [63:1]           |  DD  |
495          *    +-----------------------------------------------------+
496          *
497          *
498          * Advanced Receive Descriptor (Write-Back) Format
499          *
500          *   63       48 47    32 31  30      21 20 17 16   4 3     0
501          *   +------------------------------------------------------+
502          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
503          *   | Checksum   Ident  |   |           |    | Type | Type |
504          *   +------------------------------------------------------+
505          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506          *   +------------------------------------------------------+
507          *   63       48 47    32 31            20 19               0
508          */
509
510         for (n = 0; n < adapter->num_rx_queues; n++) {
511                 rx_ring = adapter->rx_ring[n];
512                 pr_info("------------------------------------\n");
513                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514                 pr_info("------------------------------------\n");
515                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
516                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
517                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
519
520                 for (i = 0; i < rx_ring->count; i++) {
521                         const char *next_desc;
522                         struct igb_rx_buffer *buffer_info;
523                         buffer_info = &rx_ring->rx_buffer_info[i];
524                         rx_desc = IGB_RX_DESC(rx_ring, i);
525                         u0 = (struct my_u0 *)rx_desc;
526                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
527
528                         if (i == rx_ring->next_to_use)
529                                 next_desc = " NTU";
530                         else if (i == rx_ring->next_to_clean)
531                                 next_desc = " NTC";
532                         else
533                                 next_desc = "";
534
535                         if (staterr & E1000_RXD_STAT_DD) {
536                                 /* Descriptor Done */
537                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
538                                         "--------- %p%s\n", "RWB", i,
539                                         le64_to_cpu(u0->a),
540                                         le64_to_cpu(u0->b),
541                                         buffer_info->skb, next_desc);
542                         } else {
543                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
544                                         " %p%s\n", "R  ", i,
545                                         le64_to_cpu(u0->a),
546                                         le64_to_cpu(u0->b),
547                                         (u64)buffer_info->dma,
548                                         buffer_info->skb, next_desc);
549
550                                 if (netif_msg_pktdata(adapter)) {
551                                         print_hex_dump(KERN_INFO, "",
552                                                 DUMP_PREFIX_ADDRESS,
553                                                 16, 1,
554                                                 phys_to_virt(buffer_info->dma),
555                                                 IGB_RX_HDR_LEN, true);
556                                         print_hex_dump(KERN_INFO, "",
557                                           DUMP_PREFIX_ADDRESS,
558                                           16, 1,
559                                           phys_to_virt(
560                                             buffer_info->page_dma +
561                                             buffer_info->page_offset),
562                                           PAGE_SIZE/2, true);
563                                 }
564                         }
565                 }
566         }
567
568 exit:
569         return;
570 }
571
572 /**
573  * igb_get_hw_dev - return device
574  * used by hardware layer to print debugging information
575  **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578         struct igb_adapter *adapter = hw->back;
579         return adapter->netdev;
580 }
581
582 /**
583  * igb_init_module - Driver Registration Routine
584  *
585  * igb_init_module is the first routine called when the driver is
586  * loaded. All it does is register with the PCI subsystem.
587  **/
588 static int __init igb_init_module(void)
589 {
590         int ret;
591         pr_info("%s - version %s\n",
592                igb_driver_string, igb_driver_version);
593
594         pr_info("%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597         dca_register_notify(&dca_notifier);
598 #endif
599         ret = pci_register_driver(&igb_driver);
600         return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606  * igb_exit_module - Driver Exit Cleanup Routine
607  *
608  * igb_exit_module is called just before the driver is removed
609  * from memory.
610  **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614         dca_unregister_notify(&dca_notifier);
615 #endif
616         pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623  * igb_cache_ring_register - Descriptor ring to register mapping
624  * @adapter: board private structure to initialize
625  *
626  * Once we know the feature-set enabled for the device, we'll cache
627  * the register offset the descriptor ring is assigned to.
628  **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631         int i = 0, j = 0;
632         u32 rbase_offset = adapter->vfs_allocated_count;
633
634         switch (adapter->hw.mac.type) {
635         case e1000_82576:
636                 /* The queues are allocated for virtualization such that VF 0
637                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638                  * In order to avoid collision we start at the first free queue
639                  * and continue consuming queues in the same sequence
640                  */
641                 if (adapter->vfs_allocated_count) {
642                         for (; i < adapter->rss_queues; i++)
643                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
644                                                                Q_IDX_82576(i);
645                 }
646         case e1000_82575:
647         case e1000_82580:
648         case e1000_i350:
649         case e1000_i210:
650         case e1000_i211:
651         default:
652                 for (; i < adapter->num_rx_queues; i++)
653                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654                 for (; j < adapter->num_tx_queues; j++)
655                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
656                 break;
657         }
658 }
659
660 static void igb_free_queues(struct igb_adapter *adapter)
661 {
662         int i;
663
664         for (i = 0; i < adapter->num_tx_queues; i++) {
665                 kfree(adapter->tx_ring[i]);
666                 adapter->tx_ring[i] = NULL;
667         }
668         for (i = 0; i < adapter->num_rx_queues; i++) {
669                 kfree(adapter->rx_ring[i]);
670                 adapter->rx_ring[i] = NULL;
671         }
672         adapter->num_rx_queues = 0;
673         adapter->num_tx_queues = 0;
674 }
675
676 /**
677  * igb_alloc_queues - Allocate memory for all rings
678  * @adapter: board private structure to initialize
679  *
680  * We allocate one ring per queue at run-time since we don't know the
681  * number of queues at compile-time.
682  **/
683 static int igb_alloc_queues(struct igb_adapter *adapter)
684 {
685         struct igb_ring *ring;
686         int i;
687         int orig_node = adapter->node;
688
689         for (i = 0; i < adapter->num_tx_queues; i++) {
690                 if (orig_node == -1) {
691                         int cur_node = next_online_node(adapter->node);
692                         if (cur_node == MAX_NUMNODES)
693                                 cur_node = first_online_node;
694                         adapter->node = cur_node;
695                 }
696                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
697                                     adapter->node);
698                 if (!ring)
699                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
700                 if (!ring)
701                         goto err;
702                 ring->count = adapter->tx_ring_count;
703                 ring->queue_index = i;
704                 ring->dev = &adapter->pdev->dev;
705                 ring->netdev = adapter->netdev;
706                 ring->numa_node = adapter->node;
707                 /* For 82575, context index must be unique per ring. */
708                 if (adapter->hw.mac.type == e1000_82575)
709                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710                 adapter->tx_ring[i] = ring;
711         }
712         /* Restore the adapter's original node */
713         adapter->node = orig_node;
714
715         for (i = 0; i < adapter->num_rx_queues; i++) {
716                 if (orig_node == -1) {
717                         int cur_node = next_online_node(adapter->node);
718                         if (cur_node == MAX_NUMNODES)
719                                 cur_node = first_online_node;
720                         adapter->node = cur_node;
721                 }
722                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
723                                     adapter->node);
724                 if (!ring)
725                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
726                 if (!ring)
727                         goto err;
728                 ring->count = adapter->rx_ring_count;
729                 ring->queue_index = i;
730                 ring->dev = &adapter->pdev->dev;
731                 ring->netdev = adapter->netdev;
732                 ring->numa_node = adapter->node;
733                 /* set flag indicating ring supports SCTP checksum offload */
734                 if (adapter->hw.mac.type >= e1000_82576)
735                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
736
737                 /*
738                  * On i350, i210, and i211, loopback VLAN packets
739                  * have the tag byte-swapped.
740                  * */
741                 if (adapter->hw.mac.type >= e1000_i350)
742                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
743
744                 adapter->rx_ring[i] = ring;
745         }
746         /* Restore the adapter's original node */
747         adapter->node = orig_node;
748
749         igb_cache_ring_register(adapter);
750
751         return 0;
752
753 err:
754         /* Restore the adapter's original node */
755         adapter->node = orig_node;
756         igb_free_queues(adapter);
757
758         return -ENOMEM;
759 }
760
761 /**
762  *  igb_write_ivar - configure ivar for given MSI-X vector
763  *  @hw: pointer to the HW structure
764  *  @msix_vector: vector number we are allocating to a given ring
765  *  @index: row index of IVAR register to write within IVAR table
766  *  @offset: column offset of in IVAR, should be multiple of 8
767  *
768  *  This function is intended to handle the writing of the IVAR register
769  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
770  *  each containing an cause allocation for an Rx and Tx ring, and a
771  *  variable number of rows depending on the number of queues supported.
772  **/
773 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774                            int index, int offset)
775 {
776         u32 ivar = array_rd32(E1000_IVAR0, index);
777
778         /* clear any bits that are currently set */
779         ivar &= ~((u32)0xFF << offset);
780
781         /* write vector and valid bit */
782         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
783
784         array_wr32(E1000_IVAR0, index, ivar);
785 }
786
787 #define IGB_N0_QUEUE -1
788 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
789 {
790         struct igb_adapter *adapter = q_vector->adapter;
791         struct e1000_hw *hw = &adapter->hw;
792         int rx_queue = IGB_N0_QUEUE;
793         int tx_queue = IGB_N0_QUEUE;
794         u32 msixbm = 0;
795
796         if (q_vector->rx.ring)
797                 rx_queue = q_vector->rx.ring->reg_idx;
798         if (q_vector->tx.ring)
799                 tx_queue = q_vector->tx.ring->reg_idx;
800
801         switch (hw->mac.type) {
802         case e1000_82575:
803                 /* The 82575 assigns vectors using a bitmask, which matches the
804                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
805                    or more queues to a vector, we write the appropriate bits
806                    into the MSIXBM register for that vector. */
807                 if (rx_queue > IGB_N0_QUEUE)
808                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809                 if (tx_queue > IGB_N0_QUEUE)
810                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811                 if (!adapter->msix_entries && msix_vector == 0)
812                         msixbm |= E1000_EIMS_OTHER;
813                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814                 q_vector->eims_value = msixbm;
815                 break;
816         case e1000_82576:
817                 /*
818                  * 82576 uses a table that essentially consists of 2 columns
819                  * with 8 rows.  The ordering is column-major so we use the
820                  * lower 3 bits as the row index, and the 4th bit as the
821                  * column offset.
822                  */
823                 if (rx_queue > IGB_N0_QUEUE)
824                         igb_write_ivar(hw, msix_vector,
825                                        rx_queue & 0x7,
826                                        (rx_queue & 0x8) << 1);
827                 if (tx_queue > IGB_N0_QUEUE)
828                         igb_write_ivar(hw, msix_vector,
829                                        tx_queue & 0x7,
830                                        ((tx_queue & 0x8) << 1) + 8);
831                 q_vector->eims_value = 1 << msix_vector;
832                 break;
833         case e1000_82580:
834         case e1000_i350:
835         case e1000_i210:
836         case e1000_i211:
837                 /*
838                  * On 82580 and newer adapters the scheme is similar to 82576
839                  * however instead of ordering column-major we have things
840                  * ordered row-major.  So we traverse the table by using
841                  * bit 0 as the column offset, and the remaining bits as the
842                  * row index.
843                  */
844                 if (rx_queue > IGB_N0_QUEUE)
845                         igb_write_ivar(hw, msix_vector,
846                                        rx_queue >> 1,
847                                        (rx_queue & 0x1) << 4);
848                 if (tx_queue > IGB_N0_QUEUE)
849                         igb_write_ivar(hw, msix_vector,
850                                        tx_queue >> 1,
851                                        ((tx_queue & 0x1) << 4) + 8);
852                 q_vector->eims_value = 1 << msix_vector;
853                 break;
854         default:
855                 BUG();
856                 break;
857         }
858
859         /* add q_vector eims value to global eims_enable_mask */
860         adapter->eims_enable_mask |= q_vector->eims_value;
861
862         /* configure q_vector to set itr on first interrupt */
863         q_vector->set_itr = 1;
864 }
865
866 /**
867  * igb_configure_msix - Configure MSI-X hardware
868  *
869  * igb_configure_msix sets up the hardware to properly
870  * generate MSI-X interrupts.
871  **/
872 static void igb_configure_msix(struct igb_adapter *adapter)
873 {
874         u32 tmp;
875         int i, vector = 0;
876         struct e1000_hw *hw = &adapter->hw;
877
878         adapter->eims_enable_mask = 0;
879
880         /* set vector for other causes, i.e. link changes */
881         switch (hw->mac.type) {
882         case e1000_82575:
883                 tmp = rd32(E1000_CTRL_EXT);
884                 /* enable MSI-X PBA support*/
885                 tmp |= E1000_CTRL_EXT_PBA_CLR;
886
887                 /* Auto-Mask interrupts upon ICR read. */
888                 tmp |= E1000_CTRL_EXT_EIAME;
889                 tmp |= E1000_CTRL_EXT_IRCA;
890
891                 wr32(E1000_CTRL_EXT, tmp);
892
893                 /* enable msix_other interrupt */
894                 array_wr32(E1000_MSIXBM(0), vector++,
895                                       E1000_EIMS_OTHER);
896                 adapter->eims_other = E1000_EIMS_OTHER;
897
898                 break;
899
900         case e1000_82576:
901         case e1000_82580:
902         case e1000_i350:
903         case e1000_i210:
904         case e1000_i211:
905                 /* Turn on MSI-X capability first, or our settings
906                  * won't stick.  And it will take days to debug. */
907                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
909                                 E1000_GPIE_NSICR);
910
911                 /* enable msix_other interrupt */
912                 adapter->eims_other = 1 << vector;
913                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
914
915                 wr32(E1000_IVAR_MISC, tmp);
916                 break;
917         default:
918                 /* do nothing, since nothing else supports MSI-X */
919                 break;
920         } /* switch (hw->mac.type) */
921
922         adapter->eims_enable_mask |= adapter->eims_other;
923
924         for (i = 0; i < adapter->num_q_vectors; i++)
925                 igb_assign_vector(adapter->q_vector[i], vector++);
926
927         wrfl();
928 }
929
930 /**
931  * igb_request_msix - Initialize MSI-X interrupts
932  *
933  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934  * kernel.
935  **/
936 static int igb_request_msix(struct igb_adapter *adapter)
937 {
938         struct net_device *netdev = adapter->netdev;
939         struct e1000_hw *hw = &adapter->hw;
940         int i, err = 0, vector = 0;
941
942         err = request_irq(adapter->msix_entries[vector].vector,
943                           igb_msix_other, 0, netdev->name, adapter);
944         if (err)
945                 goto out;
946         vector++;
947
948         for (i = 0; i < adapter->num_q_vectors; i++) {
949                 struct igb_q_vector *q_vector = adapter->q_vector[i];
950
951                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
952
953                 if (q_vector->rx.ring && q_vector->tx.ring)
954                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955                                 q_vector->rx.ring->queue_index);
956                 else if (q_vector->tx.ring)
957                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958                                 q_vector->tx.ring->queue_index);
959                 else if (q_vector->rx.ring)
960                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961                                 q_vector->rx.ring->queue_index);
962                 else
963                         sprintf(q_vector->name, "%s-unused", netdev->name);
964
965                 err = request_irq(adapter->msix_entries[vector].vector,
966                                   igb_msix_ring, 0, q_vector->name,
967                                   q_vector);
968                 if (err)
969                         goto out;
970                 vector++;
971         }
972
973         igb_configure_msix(adapter);
974         return 0;
975 out:
976         return err;
977 }
978
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
980 {
981         if (adapter->msix_entries) {
982                 pci_disable_msix(adapter->pdev);
983                 kfree(adapter->msix_entries);
984                 adapter->msix_entries = NULL;
985         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986                 pci_disable_msi(adapter->pdev);
987         }
988 }
989
990 /**
991  * igb_free_q_vectors - Free memory allocated for interrupt vectors
992  * @adapter: board private structure to initialize
993  *
994  * This function frees the memory allocated to the q_vectors.  In addition if
995  * NAPI is enabled it will delete any references to the NAPI struct prior
996  * to freeing the q_vector.
997  **/
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
999 {
1000         int v_idx;
1001
1002         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004                 adapter->q_vector[v_idx] = NULL;
1005                 if (!q_vector)
1006                         continue;
1007                 netif_napi_del(&q_vector->napi);
1008                 kfree(q_vector);
1009         }
1010         adapter->num_q_vectors = 0;
1011 }
1012
1013 /**
1014  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1015  *
1016  * This function resets the device so that it has 0 rx queues, tx queues, and
1017  * MSI-X interrupts allocated.
1018  */
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1020 {
1021         igb_free_queues(adapter);
1022         igb_free_q_vectors(adapter);
1023         igb_reset_interrupt_capability(adapter);
1024 }
1025
1026 /**
1027  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1028  *
1029  * Attempt to configure interrupts using the best available
1030  * capabilities of the hardware and kernel.
1031  **/
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1033 {
1034         int err;
1035         int numvecs, i;
1036
1037         /* Number of supported queues. */
1038         adapter->num_rx_queues = adapter->rss_queues;
1039         if (adapter->vfs_allocated_count)
1040                 adapter->num_tx_queues = 1;
1041         else
1042                 adapter->num_tx_queues = adapter->rss_queues;
1043
1044         /* start with one vector for every rx queue */
1045         numvecs = adapter->num_rx_queues;
1046
1047         /* if tx handler is separate add 1 for every tx queue */
1048         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049                 numvecs += adapter->num_tx_queues;
1050
1051         /* i210 and i211 can only have 4 MSIX vectors for rx/tx queues. */
1052         if ((adapter->hw.mac.type == e1000_i210)
1053                 || (adapter->hw.mac.type == e1000_i211))
1054                 numvecs = 4;
1055
1056         /* store the number of vectors reserved for queues */
1057         adapter->num_q_vectors = numvecs;
1058
1059         /* add 1 vector for link status interrupts */
1060         numvecs++;
1061         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1062                                         GFP_KERNEL);
1063
1064         if (!adapter->msix_entries)
1065                 goto msi_only;
1066
1067         for (i = 0; i < numvecs; i++)
1068                 adapter->msix_entries[i].entry = i;
1069
1070         err = pci_enable_msix(adapter->pdev,
1071                               adapter->msix_entries,
1072                               numvecs);
1073         if (err == 0)
1074                 goto out;
1075
1076         igb_reset_interrupt_capability(adapter);
1077
1078         /* If we can't do MSI-X, try MSI */
1079 msi_only:
1080 #ifdef CONFIG_PCI_IOV
1081         /* disable SR-IOV for non MSI-X configurations */
1082         if (adapter->vf_data) {
1083                 struct e1000_hw *hw = &adapter->hw;
1084                 /* disable iov and allow time for transactions to clear */
1085                 pci_disable_sriov(adapter->pdev);
1086                 msleep(500);
1087
1088                 kfree(adapter->vf_data);
1089                 adapter->vf_data = NULL;
1090                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1091                 wrfl();
1092                 msleep(100);
1093                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1094         }
1095 #endif
1096         adapter->vfs_allocated_count = 0;
1097         adapter->rss_queues = 1;
1098         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1099         adapter->num_rx_queues = 1;
1100         adapter->num_tx_queues = 1;
1101         adapter->num_q_vectors = 1;
1102         if (!pci_enable_msi(adapter->pdev))
1103                 adapter->flags |= IGB_FLAG_HAS_MSI;
1104 out:
1105         /* Notify the stack of the (possibly) reduced queue counts. */
1106         rtnl_lock();
1107         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108         err = netif_set_real_num_rx_queues(adapter->netdev,
1109                 adapter->num_rx_queues);
1110         rtnl_unlock();
1111         return err;
1112 }
1113
1114 /**
1115  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116  * @adapter: board private structure to initialize
1117  *
1118  * We allocate one q_vector per queue interrupt.  If allocation fails we
1119  * return -ENOMEM.
1120  **/
1121 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1122 {
1123         struct igb_q_vector *q_vector;
1124         struct e1000_hw *hw = &adapter->hw;
1125         int v_idx;
1126         int orig_node = adapter->node;
1127
1128         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130                                                 adapter->num_tx_queues)) &&
1131                     (adapter->num_rx_queues == v_idx))
1132                         adapter->node = orig_node;
1133                 if (orig_node == -1) {
1134                         int cur_node = next_online_node(adapter->node);
1135                         if (cur_node == MAX_NUMNODES)
1136                                 cur_node = first_online_node;
1137                         adapter->node = cur_node;
1138                 }
1139                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140                                         adapter->node);
1141                 if (!q_vector)
1142                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1143                                            GFP_KERNEL);
1144                 if (!q_vector)
1145                         goto err_out;
1146                 q_vector->adapter = adapter;
1147                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148                 q_vector->itr_val = IGB_START_ITR;
1149                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150                 adapter->q_vector[v_idx] = q_vector;
1151         }
1152         /* Restore the adapter's original node */
1153         adapter->node = orig_node;
1154
1155         return 0;
1156
1157 err_out:
1158         /* Restore the adapter's original node */
1159         adapter->node = orig_node;
1160         igb_free_q_vectors(adapter);
1161         return -ENOMEM;
1162 }
1163
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165                                       int ring_idx, int v_idx)
1166 {
1167         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1168
1169         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170         q_vector->rx.ring->q_vector = q_vector;
1171         q_vector->rx.count++;
1172         q_vector->itr_val = adapter->rx_itr_setting;
1173         if (q_vector->itr_val && q_vector->itr_val <= 3)
1174                 q_vector->itr_val = IGB_START_ITR;
1175 }
1176
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178                                       int ring_idx, int v_idx)
1179 {
1180         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1181
1182         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183         q_vector->tx.ring->q_vector = q_vector;
1184         q_vector->tx.count++;
1185         q_vector->itr_val = adapter->tx_itr_setting;
1186         q_vector->tx.work_limit = adapter->tx_work_limit;
1187         if (q_vector->itr_val && q_vector->itr_val <= 3)
1188                 q_vector->itr_val = IGB_START_ITR;
1189 }
1190
1191 /**
1192  * igb_map_ring_to_vector - maps allocated queues to vectors
1193  *
1194  * This function maps the recently allocated queues to vectors.
1195  **/
1196 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1197 {
1198         int i;
1199         int v_idx = 0;
1200
1201         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202             (adapter->num_q_vectors < adapter->num_tx_queues))
1203                 return -ENOMEM;
1204
1205         if (adapter->num_q_vectors >=
1206             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207                 for (i = 0; i < adapter->num_rx_queues; i++)
1208                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209                 for (i = 0; i < adapter->num_tx_queues; i++)
1210                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211         } else {
1212                 for (i = 0; i < adapter->num_rx_queues; i++) {
1213                         if (i < adapter->num_tx_queues)
1214                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1216                 }
1217                 for (; i < adapter->num_tx_queues; i++)
1218                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219         }
1220         return 0;
1221 }
1222
1223 /**
1224  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1225  *
1226  * This function initializes the interrupts and allocates all of the queues.
1227  **/
1228 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1229 {
1230         struct pci_dev *pdev = adapter->pdev;
1231         int err;
1232
1233         err = igb_set_interrupt_capability(adapter);
1234         if (err)
1235                 return err;
1236
1237         err = igb_alloc_q_vectors(adapter);
1238         if (err) {
1239                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240                 goto err_alloc_q_vectors;
1241         }
1242
1243         err = igb_alloc_queues(adapter);
1244         if (err) {
1245                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246                 goto err_alloc_queues;
1247         }
1248
1249         err = igb_map_ring_to_vector(adapter);
1250         if (err) {
1251                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252                 goto err_map_queues;
1253         }
1254
1255
1256         return 0;
1257 err_map_queues:
1258         igb_free_queues(adapter);
1259 err_alloc_queues:
1260         igb_free_q_vectors(adapter);
1261 err_alloc_q_vectors:
1262         igb_reset_interrupt_capability(adapter);
1263         return err;
1264 }
1265
1266 /**
1267  * igb_request_irq - initialize interrupts
1268  *
1269  * Attempts to configure interrupts using the best available
1270  * capabilities of the hardware and kernel.
1271  **/
1272 static int igb_request_irq(struct igb_adapter *adapter)
1273 {
1274         struct net_device *netdev = adapter->netdev;
1275         struct pci_dev *pdev = adapter->pdev;
1276         int err = 0;
1277
1278         if (adapter->msix_entries) {
1279                 err = igb_request_msix(adapter);
1280                 if (!err)
1281                         goto request_done;
1282                 /* fall back to MSI */
1283                 igb_clear_interrupt_scheme(adapter);
1284                 if (!pci_enable_msi(pdev))
1285                         adapter->flags |= IGB_FLAG_HAS_MSI;
1286                 igb_free_all_tx_resources(adapter);
1287                 igb_free_all_rx_resources(adapter);
1288                 adapter->num_tx_queues = 1;
1289                 adapter->num_rx_queues = 1;
1290                 adapter->num_q_vectors = 1;
1291                 err = igb_alloc_q_vectors(adapter);
1292                 if (err) {
1293                         dev_err(&pdev->dev,
1294                                 "Unable to allocate memory for vectors\n");
1295                         goto request_done;
1296                 }
1297                 err = igb_alloc_queues(adapter);
1298                 if (err) {
1299                         dev_err(&pdev->dev,
1300                                 "Unable to allocate memory for queues\n");
1301                         igb_free_q_vectors(adapter);
1302                         goto request_done;
1303                 }
1304                 igb_setup_all_tx_resources(adapter);
1305                 igb_setup_all_rx_resources(adapter);
1306         }
1307
1308         igb_assign_vector(adapter->q_vector[0], 0);
1309
1310         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1312                                   netdev->name, adapter);
1313                 if (!err)
1314                         goto request_done;
1315
1316                 /* fall back to legacy interrupts */
1317                 igb_reset_interrupt_capability(adapter);
1318                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319         }
1320
1321         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322                           netdev->name, adapter);
1323
1324         if (err)
1325                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326                         err);
1327
1328 request_done:
1329         return err;
1330 }
1331
1332 static void igb_free_irq(struct igb_adapter *adapter)
1333 {
1334         if (adapter->msix_entries) {
1335                 int vector = 0, i;
1336
1337                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1338
1339                 for (i = 0; i < adapter->num_q_vectors; i++)
1340                         free_irq(adapter->msix_entries[vector++].vector,
1341                                  adapter->q_vector[i]);
1342         } else {
1343                 free_irq(adapter->pdev->irq, adapter);
1344         }
1345 }
1346
1347 /**
1348  * igb_irq_disable - Mask off interrupt generation on the NIC
1349  * @adapter: board private structure
1350  **/
1351 static void igb_irq_disable(struct igb_adapter *adapter)
1352 {
1353         struct e1000_hw *hw = &adapter->hw;
1354
1355         /*
1356          * we need to be careful when disabling interrupts.  The VFs are also
1357          * mapped into these registers and so clearing the bits can cause
1358          * issues on the VF drivers so we only need to clear what we set
1359          */
1360         if (adapter->msix_entries) {
1361                 u32 regval = rd32(E1000_EIAM);
1362                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1364                 regval = rd32(E1000_EIAC);
1365                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366         }
1367
1368         wr32(E1000_IAM, 0);
1369         wr32(E1000_IMC, ~0);
1370         wrfl();
1371         if (adapter->msix_entries) {
1372                 int i;
1373                 for (i = 0; i < adapter->num_q_vectors; i++)
1374                         synchronize_irq(adapter->msix_entries[i].vector);
1375         } else {
1376                 synchronize_irq(adapter->pdev->irq);
1377         }
1378 }
1379
1380 /**
1381  * igb_irq_enable - Enable default interrupt generation settings
1382  * @adapter: board private structure
1383  **/
1384 static void igb_irq_enable(struct igb_adapter *adapter)
1385 {
1386         struct e1000_hw *hw = &adapter->hw;
1387
1388         if (adapter->msix_entries) {
1389                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390                 u32 regval = rd32(E1000_EIAC);
1391                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392                 regval = rd32(E1000_EIAM);
1393                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1395                 if (adapter->vfs_allocated_count) {
1396                         wr32(E1000_MBVFIMR, 0xFF);
1397                         ims |= E1000_IMS_VMMB;
1398                 }
1399                 wr32(E1000_IMS, ims);
1400         } else {
1401                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1402                                 E1000_IMS_DRSTA);
1403                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1404                                 E1000_IMS_DRSTA);
1405         }
1406 }
1407
1408 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1409 {
1410         struct e1000_hw *hw = &adapter->hw;
1411         u16 vid = adapter->hw.mng_cookie.vlan_id;
1412         u16 old_vid = adapter->mng_vlan_id;
1413
1414         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415                 /* add VID to filter table */
1416                 igb_vfta_set(hw, vid, true);
1417                 adapter->mng_vlan_id = vid;
1418         } else {
1419                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420         }
1421
1422         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423             (vid != old_vid) &&
1424             !test_bit(old_vid, adapter->active_vlans)) {
1425                 /* remove VID from filter table */
1426                 igb_vfta_set(hw, old_vid, false);
1427         }
1428 }
1429
1430 /**
1431  * igb_release_hw_control - release control of the h/w to f/w
1432  * @adapter: address of board private structure
1433  *
1434  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435  * For ASF and Pass Through versions of f/w this means that the
1436  * driver is no longer loaded.
1437  *
1438  **/
1439 static void igb_release_hw_control(struct igb_adapter *adapter)
1440 {
1441         struct e1000_hw *hw = &adapter->hw;
1442         u32 ctrl_ext;
1443
1444         /* Let firmware take over control of h/w */
1445         ctrl_ext = rd32(E1000_CTRL_EXT);
1446         wr32(E1000_CTRL_EXT,
1447                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1448 }
1449
1450 /**
1451  * igb_get_hw_control - get control of the h/w from f/w
1452  * @adapter: address of board private structure
1453  *
1454  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455  * For ASF and Pass Through versions of f/w this means that
1456  * the driver is loaded.
1457  *
1458  **/
1459 static void igb_get_hw_control(struct igb_adapter *adapter)
1460 {
1461         struct e1000_hw *hw = &adapter->hw;
1462         u32 ctrl_ext;
1463
1464         /* Let firmware know the driver has taken over */
1465         ctrl_ext = rd32(E1000_CTRL_EXT);
1466         wr32(E1000_CTRL_EXT,
1467                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1468 }
1469
1470 /**
1471  * igb_configure - configure the hardware for RX and TX
1472  * @adapter: private board structure
1473  **/
1474 static void igb_configure(struct igb_adapter *adapter)
1475 {
1476         struct net_device *netdev = adapter->netdev;
1477         int i;
1478
1479         igb_get_hw_control(adapter);
1480         igb_set_rx_mode(netdev);
1481
1482         igb_restore_vlan(adapter);
1483
1484         igb_setup_tctl(adapter);
1485         igb_setup_mrqc(adapter);
1486         igb_setup_rctl(adapter);
1487
1488         igb_configure_tx(adapter);
1489         igb_configure_rx(adapter);
1490
1491         igb_rx_fifo_flush_82575(&adapter->hw);
1492
1493         /* call igb_desc_unused which always leaves
1494          * at least 1 descriptor unused to make sure
1495          * next_to_use != next_to_clean */
1496         for (i = 0; i < adapter->num_rx_queues; i++) {
1497                 struct igb_ring *ring = adapter->rx_ring[i];
1498                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1499         }
1500 }
1501
1502 /**
1503  * igb_power_up_link - Power up the phy/serdes link
1504  * @adapter: address of board private structure
1505  **/
1506 void igb_power_up_link(struct igb_adapter *adapter)
1507 {
1508         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509                 igb_power_up_phy_copper(&adapter->hw);
1510         else
1511                 igb_power_up_serdes_link_82575(&adapter->hw);
1512         igb_reset_phy(&adapter->hw);
1513 }
1514
1515 /**
1516  * igb_power_down_link - Power down the phy/serdes link
1517  * @adapter: address of board private structure
1518  */
1519 static void igb_power_down_link(struct igb_adapter *adapter)
1520 {
1521         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522                 igb_power_down_phy_copper_82575(&adapter->hw);
1523         else
1524                 igb_shutdown_serdes_link_82575(&adapter->hw);
1525 }
1526
1527 /**
1528  * igb_up - Open the interface and prepare it to handle traffic
1529  * @adapter: board private structure
1530  **/
1531 int igb_up(struct igb_adapter *adapter)
1532 {
1533         struct e1000_hw *hw = &adapter->hw;
1534         int i;
1535
1536         /* hardware has been reset, we need to reload some things */
1537         igb_configure(adapter);
1538
1539         clear_bit(__IGB_DOWN, &adapter->state);
1540
1541         for (i = 0; i < adapter->num_q_vectors; i++)
1542                 napi_enable(&(adapter->q_vector[i]->napi));
1543
1544         if (adapter->msix_entries)
1545                 igb_configure_msix(adapter);
1546         else
1547                 igb_assign_vector(adapter->q_vector[0], 0);
1548
1549         /* Clear any pending interrupts. */
1550         rd32(E1000_ICR);
1551         igb_irq_enable(adapter);
1552
1553         /* notify VFs that reset has been completed */
1554         if (adapter->vfs_allocated_count) {
1555                 u32 reg_data = rd32(E1000_CTRL_EXT);
1556                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1557                 wr32(E1000_CTRL_EXT, reg_data);
1558         }
1559
1560         netif_tx_start_all_queues(adapter->netdev);
1561
1562         /* start the watchdog. */
1563         hw->mac.get_link_status = 1;
1564         schedule_work(&adapter->watchdog_task);
1565
1566         return 0;
1567 }
1568
1569 void igb_down(struct igb_adapter *adapter)
1570 {
1571         struct net_device *netdev = adapter->netdev;
1572         struct e1000_hw *hw = &adapter->hw;
1573         u32 tctl, rctl;
1574         int i;
1575
1576         /* signal that we're down so the interrupt handler does not
1577          * reschedule our watchdog timer */
1578         set_bit(__IGB_DOWN, &adapter->state);
1579
1580         /* disable receives in the hardware */
1581         rctl = rd32(E1000_RCTL);
1582         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583         /* flush and sleep below */
1584
1585         netif_tx_stop_all_queues(netdev);
1586
1587         /* disable transmits in the hardware */
1588         tctl = rd32(E1000_TCTL);
1589         tctl &= ~E1000_TCTL_EN;
1590         wr32(E1000_TCTL, tctl);
1591         /* flush both disables and wait for them to finish */
1592         wrfl();
1593         msleep(10);
1594
1595         for (i = 0; i < adapter->num_q_vectors; i++)
1596                 napi_disable(&(adapter->q_vector[i]->napi));
1597
1598         igb_irq_disable(adapter);
1599
1600         del_timer_sync(&adapter->watchdog_timer);
1601         del_timer_sync(&adapter->phy_info_timer);
1602
1603         netif_carrier_off(netdev);
1604
1605         /* record the stats before reset*/
1606         spin_lock(&adapter->stats64_lock);
1607         igb_update_stats(adapter, &adapter->stats64);
1608         spin_unlock(&adapter->stats64_lock);
1609
1610         adapter->link_speed = 0;
1611         adapter->link_duplex = 0;
1612
1613         if (!pci_channel_offline(adapter->pdev))
1614                 igb_reset(adapter);
1615         igb_clean_all_tx_rings(adapter);
1616         igb_clean_all_rx_rings(adapter);
1617 #ifdef CONFIG_IGB_DCA
1618
1619         /* since we reset the hardware DCA settings were cleared */
1620         igb_setup_dca(adapter);
1621 #endif
1622 }
1623
1624 void igb_reinit_locked(struct igb_adapter *adapter)
1625 {
1626         WARN_ON(in_interrupt());
1627         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628                 msleep(1);
1629         igb_down(adapter);
1630         igb_up(adapter);
1631         clear_bit(__IGB_RESETTING, &adapter->state);
1632 }
1633
1634 void igb_reset(struct igb_adapter *adapter)
1635 {
1636         struct pci_dev *pdev = adapter->pdev;
1637         struct e1000_hw *hw = &adapter->hw;
1638         struct e1000_mac_info *mac = &hw->mac;
1639         struct e1000_fc_info *fc = &hw->fc;
1640         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641         u16 hwm;
1642
1643         /* Repartition Pba for greater than 9k mtu
1644          * To take effect CTRL.RST is required.
1645          */
1646         switch (mac->type) {
1647         case e1000_i350:
1648         case e1000_82580:
1649                 pba = rd32(E1000_RXPBS);
1650                 pba = igb_rxpbs_adjust_82580(pba);
1651                 break;
1652         case e1000_82576:
1653                 pba = rd32(E1000_RXPBS);
1654                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1655                 break;
1656         case e1000_82575:
1657         case e1000_i210:
1658         case e1000_i211:
1659         default:
1660                 pba = E1000_PBA_34K;
1661                 break;
1662         }
1663
1664         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1665             (mac->type < e1000_82576)) {
1666                 /* adjust PBA for jumbo frames */
1667                 wr32(E1000_PBA, pba);
1668
1669                 /* To maintain wire speed transmits, the Tx FIFO should be
1670                  * large enough to accommodate two full transmit packets,
1671                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1672                  * the Rx FIFO should be large enough to accommodate at least
1673                  * one full receive packet and is similarly rounded up and
1674                  * expressed in KB. */
1675                 pba = rd32(E1000_PBA);
1676                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1677                 tx_space = pba >> 16;
1678                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1679                 pba &= 0xffff;
1680                 /* the tx fifo also stores 16 bytes of information about the tx
1681                  * but don't include ethernet FCS because hardware appends it */
1682                 min_tx_space = (adapter->max_frame_size +
1683                                 sizeof(union e1000_adv_tx_desc) -
1684                                 ETH_FCS_LEN) * 2;
1685                 min_tx_space = ALIGN(min_tx_space, 1024);
1686                 min_tx_space >>= 10;
1687                 /* software strips receive CRC, so leave room for it */
1688                 min_rx_space = adapter->max_frame_size;
1689                 min_rx_space = ALIGN(min_rx_space, 1024);
1690                 min_rx_space >>= 10;
1691
1692                 /* If current Tx allocation is less than the min Tx FIFO size,
1693                  * and the min Tx FIFO size is less than the current Rx FIFO
1694                  * allocation, take space away from current Rx allocation */
1695                 if (tx_space < min_tx_space &&
1696                     ((min_tx_space - tx_space) < pba)) {
1697                         pba = pba - (min_tx_space - tx_space);
1698
1699                         /* if short on rx space, rx wins and must trump tx
1700                          * adjustment */
1701                         if (pba < min_rx_space)
1702                                 pba = min_rx_space;
1703                 }
1704                 wr32(E1000_PBA, pba);
1705         }
1706
1707         /* flow control settings */
1708         /* The high water mark must be low enough to fit one full frame
1709          * (or the size used for early receive) above it in the Rx FIFO.
1710          * Set it to the lower of:
1711          * - 90% of the Rx FIFO size, or
1712          * - the full Rx FIFO size minus one full frame */
1713         hwm = min(((pba << 10) * 9 / 10),
1714                         ((pba << 10) - 2 * adapter->max_frame_size));
1715
1716         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1717         fc->low_water = fc->high_water - 16;
1718         fc->pause_time = 0xFFFF;
1719         fc->send_xon = 1;
1720         fc->current_mode = fc->requested_mode;
1721
1722         /* disable receive for all VFs and wait one second */
1723         if (adapter->vfs_allocated_count) {
1724                 int i;
1725                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1726                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1727
1728                 /* ping all the active vfs to let them know we are going down */
1729                 igb_ping_all_vfs(adapter);
1730
1731                 /* disable transmits and receives */
1732                 wr32(E1000_VFRE, 0);
1733                 wr32(E1000_VFTE, 0);
1734         }
1735
1736         /* Allow time for pending master requests to run */
1737         hw->mac.ops.reset_hw(hw);
1738         wr32(E1000_WUC, 0);
1739
1740         if (hw->mac.ops.init_hw(hw))
1741                 dev_err(&pdev->dev, "Hardware Error\n");
1742
1743         /*
1744          * Flow control settings reset on hardware reset, so guarantee flow
1745          * control is off when forcing speed.
1746          */
1747         if (!hw->mac.autoneg)
1748                 igb_force_mac_fc(hw);
1749
1750         igb_init_dmac(adapter, pba);
1751         if (!netif_running(adapter->netdev))
1752                 igb_power_down_link(adapter);
1753
1754         igb_update_mng_vlan(adapter);
1755
1756         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1757         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1758
1759         igb_get_phy_info(hw);
1760 }
1761
1762 static netdev_features_t igb_fix_features(struct net_device *netdev,
1763         netdev_features_t features)
1764 {
1765         /*
1766          * Since there is no support for separate rx/tx vlan accel
1767          * enable/disable make sure tx flag is always in same state as rx.
1768          */
1769         if (features & NETIF_F_HW_VLAN_RX)
1770                 features |= NETIF_F_HW_VLAN_TX;
1771         else
1772                 features &= ~NETIF_F_HW_VLAN_TX;
1773
1774         return features;
1775 }
1776
1777 static int igb_set_features(struct net_device *netdev,
1778         netdev_features_t features)
1779 {
1780         netdev_features_t changed = netdev->features ^ features;
1781         struct igb_adapter *adapter = netdev_priv(netdev);
1782
1783         if (changed & NETIF_F_HW_VLAN_RX)
1784                 igb_vlan_mode(netdev, features);
1785
1786         if (!(changed & NETIF_F_RXALL))
1787                 return 0;
1788
1789         netdev->features = features;
1790
1791         if (netif_running(netdev))
1792                 igb_reinit_locked(adapter);
1793         else
1794                 igb_reset(adapter);
1795
1796         return 0;
1797 }
1798
1799 static const struct net_device_ops igb_netdev_ops = {
1800         .ndo_open               = igb_open,
1801         .ndo_stop               = igb_close,
1802         .ndo_start_xmit         = igb_xmit_frame,
1803         .ndo_get_stats64        = igb_get_stats64,
1804         .ndo_set_rx_mode        = igb_set_rx_mode,
1805         .ndo_set_mac_address    = igb_set_mac,
1806         .ndo_change_mtu         = igb_change_mtu,
1807         .ndo_do_ioctl           = igb_ioctl,
1808         .ndo_tx_timeout         = igb_tx_timeout,
1809         .ndo_validate_addr      = eth_validate_addr,
1810         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1811         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1812         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1813         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1814         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1815         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1816 #ifdef CONFIG_NET_POLL_CONTROLLER
1817         .ndo_poll_controller    = igb_netpoll,
1818 #endif
1819         .ndo_fix_features       = igb_fix_features,
1820         .ndo_set_features       = igb_set_features,
1821 };
1822
1823 /**
1824  * igb_probe - Device Initialization Routine
1825  * @pdev: PCI device information struct
1826  * @ent: entry in igb_pci_tbl
1827  *
1828  * Returns 0 on success, negative on failure
1829  *
1830  * igb_probe initializes an adapter identified by a pci_dev structure.
1831  * The OS initialization, configuring of the adapter private structure,
1832  * and a hardware reset occur.
1833  **/
1834 static int __devinit igb_probe(struct pci_dev *pdev,
1835                                const struct pci_device_id *ent)
1836 {
1837         struct net_device *netdev;
1838         struct igb_adapter *adapter;
1839         struct e1000_hw *hw;
1840         u16 eeprom_data = 0;
1841         s32 ret_val;
1842         static int global_quad_port_a; /* global quad port a indication */
1843         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1844         unsigned long mmio_start, mmio_len;
1845         int err, pci_using_dac;
1846         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1847         u8 part_str[E1000_PBANUM_LENGTH];
1848
1849         /* Catch broken hardware that put the wrong VF device ID in
1850          * the PCIe SR-IOV capability.
1851          */
1852         if (pdev->is_virtfn) {
1853                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1854                         pci_name(pdev), pdev->vendor, pdev->device);
1855                 return -EINVAL;
1856         }
1857
1858         err = pci_enable_device_mem(pdev);
1859         if (err)
1860                 return err;
1861
1862         pci_using_dac = 0;
1863         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1864         if (!err) {
1865                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1866                 if (!err)
1867                         pci_using_dac = 1;
1868         } else {
1869                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1870                 if (err) {
1871                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1872                         if (err) {
1873                                 dev_err(&pdev->dev, "No usable DMA "
1874                                         "configuration, aborting\n");
1875                                 goto err_dma;
1876                         }
1877                 }
1878         }
1879
1880         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1881                                            IORESOURCE_MEM),
1882                                            igb_driver_name);
1883         if (err)
1884                 goto err_pci_reg;
1885
1886         pci_enable_pcie_error_reporting(pdev);
1887
1888         pci_set_master(pdev);
1889         pci_save_state(pdev);
1890
1891         err = -ENOMEM;
1892         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1893                                    IGB_MAX_TX_QUEUES);
1894         if (!netdev)
1895                 goto err_alloc_etherdev;
1896
1897         SET_NETDEV_DEV(netdev, &pdev->dev);
1898
1899         pci_set_drvdata(pdev, netdev);
1900         adapter = netdev_priv(netdev);
1901         adapter->netdev = netdev;
1902         adapter->pdev = pdev;
1903         hw = &adapter->hw;
1904         hw->back = adapter;
1905         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1906
1907         mmio_start = pci_resource_start(pdev, 0);
1908         mmio_len = pci_resource_len(pdev, 0);
1909
1910         err = -EIO;
1911         hw->hw_addr = ioremap(mmio_start, mmio_len);
1912         if (!hw->hw_addr)
1913                 goto err_ioremap;
1914
1915         netdev->netdev_ops = &igb_netdev_ops;
1916         igb_set_ethtool_ops(netdev);
1917         netdev->watchdog_timeo = 5 * HZ;
1918
1919         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1920
1921         netdev->mem_start = mmio_start;
1922         netdev->mem_end = mmio_start + mmio_len;
1923
1924         /* PCI config space info */
1925         hw->vendor_id = pdev->vendor;
1926         hw->device_id = pdev->device;
1927         hw->revision_id = pdev->revision;
1928         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1929         hw->subsystem_device_id = pdev->subsystem_device;
1930
1931         /* Copy the default MAC, PHY and NVM function pointers */
1932         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1933         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1934         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1935         /* Initialize skew-specific constants */
1936         err = ei->get_invariants(hw);
1937         if (err)
1938                 goto err_sw_init;
1939
1940         /* setup the private structure */
1941         err = igb_sw_init(adapter);
1942         if (err)
1943                 goto err_sw_init;
1944
1945         igb_get_bus_info_pcie(hw);
1946
1947         hw->phy.autoneg_wait_to_complete = false;
1948
1949         /* Copper options */
1950         if (hw->phy.media_type == e1000_media_type_copper) {
1951                 hw->phy.mdix = AUTO_ALL_MODES;
1952                 hw->phy.disable_polarity_correction = false;
1953                 hw->phy.ms_type = e1000_ms_hw_default;
1954         }
1955
1956         if (igb_check_reset_block(hw))
1957                 dev_info(&pdev->dev,
1958                         "PHY reset is blocked due to SOL/IDER session.\n");
1959
1960         /*
1961          * features is initialized to 0 in allocation, it might have bits
1962          * set by igb_sw_init so we should use an or instead of an
1963          * assignment.
1964          */
1965         netdev->features |= NETIF_F_SG |
1966                             NETIF_F_IP_CSUM |
1967                             NETIF_F_IPV6_CSUM |
1968                             NETIF_F_TSO |
1969                             NETIF_F_TSO6 |
1970                             NETIF_F_RXHASH |
1971                             NETIF_F_RXCSUM |
1972                             NETIF_F_HW_VLAN_RX |
1973                             NETIF_F_HW_VLAN_TX;
1974
1975         /* copy netdev features into list of user selectable features */
1976         netdev->hw_features |= netdev->features;
1977         netdev->hw_features |= NETIF_F_RXALL;
1978
1979         /* set this bit last since it cannot be part of hw_features */
1980         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1981
1982         netdev->vlan_features |= NETIF_F_TSO |
1983                                  NETIF_F_TSO6 |
1984                                  NETIF_F_IP_CSUM |
1985                                  NETIF_F_IPV6_CSUM |
1986                                  NETIF_F_SG;
1987
1988         netdev->priv_flags |= IFF_SUPP_NOFCS;
1989
1990         if (pci_using_dac) {
1991                 netdev->features |= NETIF_F_HIGHDMA;
1992                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1993         }
1994
1995         if (hw->mac.type >= e1000_82576) {
1996                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1997                 netdev->features |= NETIF_F_SCTP_CSUM;
1998         }
1999
2000         netdev->priv_flags |= IFF_UNICAST_FLT;
2001
2002         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2003
2004         /* before reading the NVM, reset the controller to put the device in a
2005          * known good starting state */
2006         hw->mac.ops.reset_hw(hw);
2007
2008         /*
2009          * make sure the NVM is good , i211 parts have special NVM that
2010          * doesn't contain a checksum
2011          */
2012         if (hw->mac.type != e1000_i211) {
2013                 if (hw->nvm.ops.validate(hw) < 0) {
2014                         dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2015                         err = -EIO;
2016                         goto err_eeprom;
2017                 }
2018         }
2019
2020         /* copy the MAC address out of the NVM */
2021         if (hw->mac.ops.read_mac_addr(hw))
2022                 dev_err(&pdev->dev, "NVM Read Error\n");
2023
2024         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2025         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2026
2027         if (!is_valid_ether_addr(netdev->perm_addr)) {
2028                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2029                 err = -EIO;
2030                 goto err_eeprom;
2031         }
2032
2033         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2034                     (unsigned long) adapter);
2035         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2036                     (unsigned long) adapter);
2037
2038         INIT_WORK(&adapter->reset_task, igb_reset_task);
2039         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2040
2041         /* Initialize link properties that are user-changeable */
2042         adapter->fc_autoneg = true;
2043         hw->mac.autoneg = true;
2044         hw->phy.autoneg_advertised = 0x2f;
2045
2046         hw->fc.requested_mode = e1000_fc_default;
2047         hw->fc.current_mode = e1000_fc_default;
2048
2049         igb_validate_mdi_setting(hw);
2050
2051         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2052          * enable the ACPI Magic Packet filter
2053          */
2054
2055         if (hw->bus.func == 0)
2056                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2057         else if (hw->mac.type >= e1000_82580)
2058                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2059                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2060                                  &eeprom_data);
2061         else if (hw->bus.func == 1)
2062                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2063
2064         if (eeprom_data & eeprom_apme_mask)
2065                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2066
2067         /* now that we have the eeprom settings, apply the special cases where
2068          * the eeprom may be wrong or the board simply won't support wake on
2069          * lan on a particular port */
2070         switch (pdev->device) {
2071         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2072                 adapter->eeprom_wol = 0;
2073                 break;
2074         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2075         case E1000_DEV_ID_82576_FIBER:
2076         case E1000_DEV_ID_82576_SERDES:
2077                 /* Wake events only supported on port A for dual fiber
2078                  * regardless of eeprom setting */
2079                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2080                         adapter->eeprom_wol = 0;
2081                 break;
2082         case E1000_DEV_ID_82576_QUAD_COPPER:
2083         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2084                 /* if quad port adapter, disable WoL on all but port A */
2085                 if (global_quad_port_a != 0)
2086                         adapter->eeprom_wol = 0;
2087                 else
2088                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2089                 /* Reset for multiple quad port adapters */
2090                 if (++global_quad_port_a == 4)
2091                         global_quad_port_a = 0;
2092                 break;
2093         }
2094
2095         /* initialize the wol settings based on the eeprom settings */
2096         adapter->wol = adapter->eeprom_wol;
2097         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2098
2099         /* reset the hardware with the new settings */
2100         igb_reset(adapter);
2101
2102         /* let the f/w know that the h/w is now under the control of the
2103          * driver. */
2104         igb_get_hw_control(adapter);
2105
2106         strcpy(netdev->name, "eth%d");
2107         err = register_netdev(netdev);
2108         if (err)
2109                 goto err_register;
2110
2111         /* carrier off reporting is important to ethtool even BEFORE open */
2112         netif_carrier_off(netdev);
2113
2114 #ifdef CONFIG_IGB_DCA
2115         if (dca_add_requester(&pdev->dev) == 0) {
2116                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2117                 dev_info(&pdev->dev, "DCA enabled\n");
2118                 igb_setup_dca(adapter);
2119         }
2120
2121 #endif
2122 #ifdef CONFIG_IGB_PTP
2123         /* do hw tstamp init after resetting */
2124         igb_ptp_init(adapter);
2125
2126 #endif
2127         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2128         /* print bus type/speed/width info */
2129         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2130                  netdev->name,
2131                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2132                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2133                                                             "unknown"),
2134                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2135                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2136                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2137                    "unknown"),
2138                  netdev->dev_addr);
2139
2140         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2141         if (ret_val)
2142                 strcpy(part_str, "Unknown");
2143         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2144         dev_info(&pdev->dev,
2145                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2146                 adapter->msix_entries ? "MSI-X" :
2147                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2148                 adapter->num_rx_queues, adapter->num_tx_queues);
2149         switch (hw->mac.type) {
2150         case e1000_i350:
2151         case e1000_i210:
2152         case e1000_i211:
2153                 igb_set_eee_i350(hw);
2154                 break;
2155         default:
2156                 break;
2157         }
2158
2159         pm_runtime_put_noidle(&pdev->dev);
2160         return 0;
2161
2162 err_register:
2163         igb_release_hw_control(adapter);
2164 err_eeprom:
2165         if (!igb_check_reset_block(hw))
2166                 igb_reset_phy(hw);
2167
2168         if (hw->flash_address)
2169                 iounmap(hw->flash_address);
2170 err_sw_init:
2171         igb_clear_interrupt_scheme(adapter);
2172         iounmap(hw->hw_addr);
2173 err_ioremap:
2174         free_netdev(netdev);
2175 err_alloc_etherdev:
2176         pci_release_selected_regions(pdev,
2177                                      pci_select_bars(pdev, IORESOURCE_MEM));
2178 err_pci_reg:
2179 err_dma:
2180         pci_disable_device(pdev);
2181         return err;
2182 }
2183
2184 /**
2185  * igb_remove - Device Removal Routine
2186  * @pdev: PCI device information struct
2187  *
2188  * igb_remove is called by the PCI subsystem to alert the driver
2189  * that it should release a PCI device.  The could be caused by a
2190  * Hot-Plug event, or because the driver is going to be removed from
2191  * memory.
2192  **/
2193 static void __devexit igb_remove(struct pci_dev *pdev)
2194 {
2195         struct net_device *netdev = pci_get_drvdata(pdev);
2196         struct igb_adapter *adapter = netdev_priv(netdev);
2197         struct e1000_hw *hw = &adapter->hw;
2198
2199         pm_runtime_get_noresume(&pdev->dev);
2200 #ifdef CONFIG_IGB_PTP
2201         igb_ptp_remove(adapter);
2202
2203 #endif
2204         /*
2205          * The watchdog timer may be rescheduled, so explicitly
2206          * disable watchdog from being rescheduled.
2207          */
2208         set_bit(__IGB_DOWN, &adapter->state);
2209         del_timer_sync(&adapter->watchdog_timer);
2210         del_timer_sync(&adapter->phy_info_timer);
2211
2212         cancel_work_sync(&adapter->reset_task);
2213         cancel_work_sync(&adapter->watchdog_task);
2214
2215 #ifdef CONFIG_IGB_DCA
2216         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2217                 dev_info(&pdev->dev, "DCA disabled\n");
2218                 dca_remove_requester(&pdev->dev);
2219                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2220                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2221         }
2222 #endif
2223
2224         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2225          * would have already happened in close and is redundant. */
2226         igb_release_hw_control(adapter);
2227
2228         unregister_netdev(netdev);
2229
2230         igb_clear_interrupt_scheme(adapter);
2231
2232 #ifdef CONFIG_PCI_IOV
2233         /* reclaim resources allocated to VFs */
2234         if (adapter->vf_data) {
2235                 /* disable iov and allow time for transactions to clear */
2236                 if (!igb_check_vf_assignment(adapter)) {
2237                         pci_disable_sriov(pdev);
2238                         msleep(500);
2239                 } else {
2240                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2241                 }
2242
2243                 kfree(adapter->vf_data);
2244                 adapter->vf_data = NULL;
2245                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2246                 wrfl();
2247                 msleep(100);
2248                 dev_info(&pdev->dev, "IOV Disabled\n");
2249         }
2250 #endif
2251
2252         iounmap(hw->hw_addr);
2253         if (hw->flash_address)
2254                 iounmap(hw->flash_address);
2255         pci_release_selected_regions(pdev,
2256                                      pci_select_bars(pdev, IORESOURCE_MEM));
2257
2258         kfree(adapter->shadow_vfta);
2259         free_netdev(netdev);
2260
2261         pci_disable_pcie_error_reporting(pdev);
2262
2263         pci_disable_device(pdev);
2264 }
2265
2266 /**
2267  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2268  * @adapter: board private structure to initialize
2269  *
2270  * This function initializes the vf specific data storage and then attempts to
2271  * allocate the VFs.  The reason for ordering it this way is because it is much
2272  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2273  * the memory for the VFs.
2274  **/
2275 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2276 {
2277 #ifdef CONFIG_PCI_IOV
2278         struct pci_dev *pdev = adapter->pdev;
2279         struct e1000_hw *hw = &adapter->hw;
2280         int old_vfs = igb_find_enabled_vfs(adapter);
2281         int i;
2282
2283         /* Virtualization features not supported on i210 family. */
2284         if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2285                 return;
2286
2287         if (old_vfs) {
2288                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2289                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2290                 adapter->vfs_allocated_count = old_vfs;
2291         }
2292
2293         if (!adapter->vfs_allocated_count)
2294                 return;
2295
2296         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2297                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2298
2299         /* if allocation failed then we do not support SR-IOV */
2300         if (!adapter->vf_data) {
2301                 adapter->vfs_allocated_count = 0;
2302                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2303                         "Data Storage\n");
2304                 goto out;
2305         }
2306
2307         if (!old_vfs) {
2308                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2309                         goto err_out;
2310         }
2311         dev_info(&pdev->dev, "%d VFs allocated\n",
2312                  adapter->vfs_allocated_count);
2313         for (i = 0; i < adapter->vfs_allocated_count; i++)
2314                 igb_vf_configure(adapter, i);
2315
2316         /* DMA Coalescing is not supported in IOV mode. */
2317         adapter->flags &= ~IGB_FLAG_DMAC;
2318         goto out;
2319 err_out:
2320         kfree(adapter->vf_data);
2321         adapter->vf_data = NULL;
2322         adapter->vfs_allocated_count = 0;
2323 out:
2324         return;
2325 #endif /* CONFIG_PCI_IOV */
2326 }
2327
2328 /**
2329  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2330  * @adapter: board private structure to initialize
2331  *
2332  * igb_sw_init initializes the Adapter private data structure.
2333  * Fields are initialized based on PCI device information and
2334  * OS network device settings (MTU size).
2335  **/
2336 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2337 {
2338         struct e1000_hw *hw = &adapter->hw;
2339         struct net_device *netdev = adapter->netdev;
2340         struct pci_dev *pdev = adapter->pdev;
2341
2342         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2343
2344         /* set default ring sizes */
2345         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2346         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2347
2348         /* set default ITR values */
2349         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2350         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2351
2352         /* set default work limits */
2353         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2354
2355         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2356                                   VLAN_HLEN;
2357         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2358
2359         adapter->node = -1;
2360
2361         spin_lock_init(&adapter->stats64_lock);
2362 #ifdef CONFIG_PCI_IOV
2363         switch (hw->mac.type) {
2364         case e1000_82576:
2365         case e1000_i350:
2366                 if (max_vfs > 7) {
2367                         dev_warn(&pdev->dev,
2368                                  "Maximum of 7 VFs per PF, using max\n");
2369                         adapter->vfs_allocated_count = 7;
2370                 } else
2371                         adapter->vfs_allocated_count = max_vfs;
2372                 break;
2373         case e1000_i210:
2374         case e1000_i211:
2375                 adapter->vfs_allocated_count = 0;
2376                 break;
2377         default:
2378                 break;
2379         }
2380 #endif /* CONFIG_PCI_IOV */
2381         switch (hw->mac.type) {
2382         case e1000_i210:
2383                 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I210,
2384                         num_online_cpus());
2385                 break;
2386         case e1000_i211:
2387                 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES_I211,
2388                         num_online_cpus());
2389                 break;
2390         default:
2391                 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES,
2392                 num_online_cpus());
2393                 break;
2394         }
2395         /* i350 cannot do RSS and SR-IOV at the same time */
2396         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2397                 adapter->rss_queues = 1;
2398
2399         /*
2400          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2401          * then we should combine the queues into a queue pair in order to
2402          * conserve interrupts due to limited supply
2403          */
2404         if ((adapter->rss_queues > 4) ||
2405             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2406                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2407
2408         /* Setup and initialize a copy of the hw vlan table array */
2409         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2410                                 E1000_VLAN_FILTER_TBL_SIZE,
2411                                 GFP_ATOMIC);
2412
2413         /* This call may decrease the number of queues */
2414         if (igb_init_interrupt_scheme(adapter)) {
2415                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2416                 return -ENOMEM;
2417         }
2418
2419         igb_probe_vfs(adapter);
2420
2421         /* Explicitly disable IRQ since the NIC can be in any state. */
2422         igb_irq_disable(adapter);
2423
2424         if (hw->mac.type >= e1000_i350)
2425                 adapter->flags &= ~IGB_FLAG_DMAC;
2426
2427         set_bit(__IGB_DOWN, &adapter->state);
2428         return 0;
2429 }
2430
2431 /**
2432  * igb_open - Called when a network interface is made active
2433  * @netdev: network interface device structure
2434  *
2435  * Returns 0 on success, negative value on failure
2436  *
2437  * The open entry point is called when a network interface is made
2438  * active by the system (IFF_UP).  At this point all resources needed
2439  * for transmit and receive operations are allocated, the interrupt
2440  * handler is registered with the OS, the watchdog timer is started,
2441  * and the stack is notified that the interface is ready.
2442  **/
2443 static int __igb_open(struct net_device *netdev, bool resuming)
2444 {
2445         struct igb_adapter *adapter = netdev_priv(netdev);
2446         struct e1000_hw *hw = &adapter->hw;
2447         struct pci_dev *pdev = adapter->pdev;
2448         int err;
2449         int i;
2450
2451         /* disallow open during test */
2452         if (test_bit(__IGB_TESTING, &adapter->state)) {
2453                 WARN_ON(resuming);
2454                 return -EBUSY;
2455         }
2456
2457         if (!resuming)
2458                 pm_runtime_get_sync(&pdev->dev);
2459
2460         netif_carrier_off(netdev);
2461
2462         /* allocate transmit descriptors */
2463         err = igb_setup_all_tx_resources(adapter);
2464         if (err)
2465                 goto err_setup_tx;
2466
2467         /* allocate receive descriptors */
2468         err = igb_setup_all_rx_resources(adapter);
2469         if (err)
2470                 goto err_setup_rx;
2471
2472         igb_power_up_link(adapter);
2473
2474         /* before we allocate an interrupt, we must be ready to handle it.
2475          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2476          * as soon as we call pci_request_irq, so we have to setup our
2477          * clean_rx handler before we do so.  */
2478         igb_configure(adapter);
2479
2480         err = igb_request_irq(adapter);
2481         if (err)
2482                 goto err_req_irq;
2483
2484         /* From here on the code is the same as igb_up() */
2485         clear_bit(__IGB_DOWN, &adapter->state);
2486
2487         for (i = 0; i < adapter->num_q_vectors; i++)
2488                 napi_enable(&(adapter->q_vector[i]->napi));
2489
2490         /* Clear any pending interrupts. */
2491         rd32(E1000_ICR);
2492
2493         igb_irq_enable(adapter);
2494
2495         /* notify VFs that reset has been completed */
2496         if (adapter->vfs_allocated_count) {
2497                 u32 reg_data = rd32(E1000_CTRL_EXT);
2498                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2499                 wr32(E1000_CTRL_EXT, reg_data);
2500         }
2501
2502         netif_tx_start_all_queues(netdev);
2503
2504         if (!resuming)
2505                 pm_runtime_put(&pdev->dev);
2506
2507         /* start the watchdog. */
2508         hw->mac.get_link_status = 1;
2509         schedule_work(&adapter->watchdog_task);
2510
2511         return 0;
2512
2513 err_req_irq:
2514         igb_release_hw_control(adapter);
2515         igb_power_down_link(adapter);
2516         igb_free_all_rx_resources(adapter);
2517 err_setup_rx:
2518         igb_free_all_tx_resources(adapter);
2519 err_setup_tx:
2520         igb_reset(adapter);
2521         if (!resuming)
2522                 pm_runtime_put(&pdev->dev);
2523
2524         return err;
2525 }
2526
2527 static int igb_open(struct net_device *netdev)
2528 {
2529         return __igb_open(netdev, false);
2530 }
2531
2532 /**
2533  * igb_close - Disables a network interface
2534  * @netdev: network interface device structure
2535  *
2536  * Returns 0, this is not allowed to fail
2537  *
2538  * The close entry point is called when an interface is de-activated
2539  * by the OS.  The hardware is still under the driver's control, but
2540  * needs to be disabled.  A global MAC reset is issued to stop the
2541  * hardware, and all transmit and receive resources are freed.
2542  **/
2543 static int __igb_close(struct net_device *netdev, bool suspending)
2544 {
2545         struct igb_adapter *adapter = netdev_priv(netdev);
2546         struct pci_dev *pdev = adapter->pdev;
2547
2548         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2549
2550         if (!suspending)
2551                 pm_runtime_get_sync(&pdev->dev);
2552
2553         igb_down(adapter);
2554         igb_free_irq(adapter);
2555
2556         igb_free_all_tx_resources(adapter);
2557         igb_free_all_rx_resources(adapter);
2558
2559         if (!suspending)
2560                 pm_runtime_put_sync(&pdev->dev);
2561         return 0;
2562 }
2563
2564 static int igb_close(struct net_device *netdev)
2565 {
2566         return __igb_close(netdev, false);
2567 }
2568
2569 /**
2570  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2571  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2572  *
2573  * Return 0 on success, negative on failure
2574  **/
2575 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2576 {
2577         struct device *dev = tx_ring->dev;
2578         int orig_node = dev_to_node(dev);
2579         int size;
2580
2581         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2582         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2583         if (!tx_ring->tx_buffer_info)
2584                 tx_ring->tx_buffer_info = vzalloc(size);
2585         if (!tx_ring->tx_buffer_info)
2586                 goto err;
2587
2588         /* round up to nearest 4K */
2589         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2590         tx_ring->size = ALIGN(tx_ring->size, 4096);
2591
2592         set_dev_node(dev, tx_ring->numa_node);
2593         tx_ring->desc = dma_alloc_coherent(dev,
2594                                            tx_ring->size,
2595                                            &tx_ring->dma,
2596                                            GFP_KERNEL);
2597         set_dev_node(dev, orig_node);
2598         if (!tx_ring->desc)
2599                 tx_ring->desc = dma_alloc_coherent(dev,
2600                                                    tx_ring->size,
2601                                                    &tx_ring->dma,
2602                                                    GFP_KERNEL);
2603
2604         if (!tx_ring->desc)
2605                 goto err;
2606
2607         tx_ring->next_to_use = 0;
2608         tx_ring->next_to_clean = 0;
2609
2610         return 0;
2611
2612 err:
2613         vfree(tx_ring->tx_buffer_info);
2614         dev_err(dev,
2615                 "Unable to allocate memory for the transmit descriptor ring\n");
2616         return -ENOMEM;
2617 }
2618
2619 /**
2620  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2621  *                                (Descriptors) for all queues
2622  * @adapter: board private structure
2623  *
2624  * Return 0 on success, negative on failure
2625  **/
2626 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2627 {
2628         struct pci_dev *pdev = adapter->pdev;
2629         int i, err = 0;
2630
2631         for (i = 0; i < adapter->num_tx_queues; i++) {
2632                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2633                 if (err) {
2634                         dev_err(&pdev->dev,
2635                                 "Allocation for Tx Queue %u failed\n", i);
2636                         for (i--; i >= 0; i--)
2637                                 igb_free_tx_resources(adapter->tx_ring[i]);
2638                         break;
2639                 }
2640         }
2641
2642         return err;
2643 }
2644
2645 /**
2646  * igb_setup_tctl - configure the transmit control registers
2647  * @adapter: Board private structure
2648  **/
2649 void igb_setup_tctl(struct igb_adapter *adapter)
2650 {
2651         struct e1000_hw *hw = &adapter->hw;
2652         u32 tctl;
2653
2654         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2655         wr32(E1000_TXDCTL(0), 0);
2656
2657         /* Program the Transmit Control Register */
2658         tctl = rd32(E1000_TCTL);
2659         tctl &= ~E1000_TCTL_CT;
2660         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2661                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2662
2663         igb_config_collision_dist(hw);
2664
2665         /* Enable transmits */
2666         tctl |= E1000_TCTL_EN;
2667
2668         wr32(E1000_TCTL, tctl);
2669 }
2670
2671 /**
2672  * igb_configure_tx_ring - Configure transmit ring after Reset
2673  * @adapter: board private structure
2674  * @ring: tx ring to configure
2675  *
2676  * Configure a transmit ring after a reset.
2677  **/
2678 void igb_configure_tx_ring(struct igb_adapter *adapter,
2679                            struct igb_ring *ring)
2680 {
2681         struct e1000_hw *hw = &adapter->hw;
2682         u32 txdctl = 0;
2683         u64 tdba = ring->dma;
2684         int reg_idx = ring->reg_idx;
2685
2686         /* disable the queue */
2687         wr32(E1000_TXDCTL(reg_idx), 0);
2688         wrfl();
2689         mdelay(10);
2690
2691         wr32(E1000_TDLEN(reg_idx),
2692                         ring->count * sizeof(union e1000_adv_tx_desc));
2693         wr32(E1000_TDBAL(reg_idx),
2694                         tdba & 0x00000000ffffffffULL);
2695         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2696
2697         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2698         wr32(E1000_TDH(reg_idx), 0);
2699         writel(0, ring->tail);
2700
2701         txdctl |= IGB_TX_PTHRESH;
2702         txdctl |= IGB_TX_HTHRESH << 8;
2703         txdctl |= IGB_TX_WTHRESH << 16;
2704
2705         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2706         wr32(E1000_TXDCTL(reg_idx), txdctl);
2707 }
2708
2709 /**
2710  * igb_configure_tx - Configure transmit Unit after Reset
2711  * @adapter: board private structure
2712  *
2713  * Configure the Tx unit of the MAC after a reset.
2714  **/
2715 static void igb_configure_tx(struct igb_adapter *adapter)
2716 {
2717         int i;
2718
2719         for (i = 0; i < adapter->num_tx_queues; i++)
2720                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2721 }
2722
2723 /**
2724  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2725  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2726  *
2727  * Returns 0 on success, negative on failure
2728  **/
2729 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2730 {
2731         struct device *dev = rx_ring->dev;
2732         int orig_node = dev_to_node(dev);
2733         int size, desc_len;
2734
2735         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2736         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2737         if (!rx_ring->rx_buffer_info)
2738                 rx_ring->rx_buffer_info = vzalloc(size);
2739         if (!rx_ring->rx_buffer_info)
2740                 goto err;
2741
2742         desc_len = sizeof(union e1000_adv_rx_desc);
2743
2744         /* Round up to nearest 4K */
2745         rx_ring->size = rx_ring->count * desc_len;
2746         rx_ring->size = ALIGN(rx_ring->size, 4096);
2747
2748         set_dev_node(dev, rx_ring->numa_node);
2749         rx_ring->desc = dma_alloc_coherent(dev,
2750                                            rx_ring->size,
2751                                            &rx_ring->dma,
2752                                            GFP_KERNEL);
2753         set_dev_node(dev, orig_node);
2754         if (!rx_ring->desc)
2755                 rx_ring->desc = dma_alloc_coherent(dev,
2756                                                    rx_ring->size,
2757                                                    &rx_ring->dma,
2758                                                    GFP_KERNEL);
2759
2760         if (!rx_ring->desc)
2761                 goto err;
2762
2763         rx_ring->next_to_clean = 0;
2764         rx_ring->next_to_use = 0;
2765
2766         return 0;
2767
2768 err:
2769         vfree(rx_ring->rx_buffer_info);
2770         rx_ring->rx_buffer_info = NULL;
2771         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2772                 " ring\n");
2773         return -ENOMEM;
2774 }
2775
2776 /**
2777  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2778  *                                (Descriptors) for all queues
2779  * @adapter: board private structure
2780  *
2781  * Return 0 on success, negative on failure
2782  **/
2783 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2784 {
2785         struct pci_dev *pdev = adapter->pdev;
2786         int i, err = 0;
2787
2788         for (i = 0; i < adapter->num_rx_queues; i++) {
2789                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2790                 if (err) {
2791                         dev_err(&pdev->dev,
2792                                 "Allocation for Rx Queue %u failed\n", i);
2793                         for (i--; i >= 0; i--)
2794                                 igb_free_rx_resources(adapter->rx_ring[i]);
2795                         break;
2796                 }
2797         }
2798
2799         return err;
2800 }
2801
2802 /**
2803  * igb_setup_mrqc - configure the multiple receive queue control registers
2804  * @adapter: Board private structure
2805  **/
2806 static void igb_setup_mrqc(struct igb_adapter *adapter)
2807 {
2808         struct e1000_hw *hw = &adapter->hw;
2809         u32 mrqc, rxcsum;
2810         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2811         union e1000_reta {
2812                 u32 dword;
2813                 u8  bytes[4];
2814         } reta;
2815         static const u8 rsshash[40] = {
2816                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2817                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2818                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2819                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2820
2821         /* Fill out hash function seeds */
2822         for (j = 0; j < 10; j++) {
2823                 u32 rsskey = rsshash[(j * 4)];
2824                 rsskey |= rsshash[(j * 4) + 1] << 8;
2825                 rsskey |= rsshash[(j * 4) + 2] << 16;
2826                 rsskey |= rsshash[(j * 4) + 3] << 24;
2827                 array_wr32(E1000_RSSRK(0), j, rsskey);
2828         }
2829
2830         num_rx_queues = adapter->rss_queues;
2831
2832         if (adapter->vfs_allocated_count) {
2833                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2834                 switch (hw->mac.type) {
2835                 case e1000_i350:
2836                 case e1000_82580:
2837                         num_rx_queues = 1;
2838                         shift = 0;
2839                         break;
2840                 case e1000_82576:
2841                         shift = 3;
2842                         num_rx_queues = 2;
2843                         break;
2844                 case e1000_82575:
2845                         shift = 2;
2846                         shift2 = 6;
2847                 default:
2848                         break;
2849                 }
2850         } else {
2851                 if (hw->mac.type == e1000_82575)
2852                         shift = 6;
2853         }
2854
2855         for (j = 0; j < (32 * 4); j++) {
2856                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2857                 if (shift2)
2858                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2859                 if ((j & 3) == 3)
2860                         wr32(E1000_RETA(j >> 2), reta.dword);
2861         }
2862
2863         /*
2864          * Disable raw packet checksumming so that RSS hash is placed in
2865          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2866          * offloads as they are enabled by default
2867          */
2868         rxcsum = rd32(E1000_RXCSUM);
2869         rxcsum |= E1000_RXCSUM_PCSD;
2870
2871         if (adapter->hw.mac.type >= e1000_82576)
2872                 /* Enable Receive Checksum Offload for SCTP */
2873                 rxcsum |= E1000_RXCSUM_CRCOFL;
2874
2875         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2876         wr32(E1000_RXCSUM, rxcsum);
2877         /*
2878          * Generate RSS hash based on TCP port numbers and/or
2879          * IPv4/v6 src and dst addresses since UDP cannot be
2880          * hashed reliably due to IP fragmentation
2881          */
2882
2883         mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2884                E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885                E1000_MRQC_RSS_FIELD_IPV6 |
2886                E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887                E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2888
2889         /* If VMDq is enabled then we set the appropriate mode for that, else
2890          * we default to RSS so that an RSS hash is calculated per packet even
2891          * if we are only using one queue */
2892         if (adapter->vfs_allocated_count) {
2893                 if (hw->mac.type > e1000_82575) {
2894                         /* Set the default pool for the PF's first queue */
2895                         u32 vtctl = rd32(E1000_VT_CTL);
2896                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2897                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2898                         vtctl |= adapter->vfs_allocated_count <<
2899                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2900                         wr32(E1000_VT_CTL, vtctl);
2901                 }
2902                 if (adapter->rss_queues > 1)
2903                         mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2904                 else
2905                         mrqc |= E1000_MRQC_ENABLE_VMDQ;
2906         } else {
2907                 if (hw->mac.type != e1000_i211)
2908                         mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
2909         }
2910         igb_vmm_control(adapter);
2911
2912         wr32(E1000_MRQC, mrqc);
2913 }
2914
2915 /**
2916  * igb_setup_rctl - configure the receive control registers
2917  * @adapter: Board private structure
2918  **/
2919 void igb_setup_rctl(struct igb_adapter *adapter)
2920 {
2921         struct e1000_hw *hw = &adapter->hw;
2922         u32 rctl;
2923
2924         rctl = rd32(E1000_RCTL);
2925
2926         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2927         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2928
2929         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2930                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2931
2932         /*
2933          * enable stripping of CRC. It's unlikely this will break BMC
2934          * redirection as it did with e1000. Newer features require
2935          * that the HW strips the CRC.
2936          */
2937         rctl |= E1000_RCTL_SECRC;
2938
2939         /* disable store bad packets and clear size bits. */
2940         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2941
2942         /* enable LPE to prevent packets larger than max_frame_size */
2943         rctl |= E1000_RCTL_LPE;
2944
2945         /* disable queue 0 to prevent tail write w/o re-config */
2946         wr32(E1000_RXDCTL(0), 0);
2947
2948         /* Attention!!!  For SR-IOV PF driver operations you must enable
2949          * queue drop for all VF and PF queues to prevent head of line blocking
2950          * if an un-trusted VF does not provide descriptors to hardware.
2951          */
2952         if (adapter->vfs_allocated_count) {
2953                 /* set all queue drop enable bits */
2954                 wr32(E1000_QDE, ALL_QUEUES);
2955         }
2956
2957         /* This is useful for sniffing bad packets. */
2958         if (adapter->netdev->features & NETIF_F_RXALL) {
2959                 /* UPE and MPE will be handled by normal PROMISC logic
2960                  * in e1000e_set_rx_mode */
2961                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
2962                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
2963                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
2964
2965                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
2966                           E1000_RCTL_DPF | /* Allow filtered pause */
2967                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
2968                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
2969                  * and that breaks VLANs.
2970                  */
2971         }
2972
2973         wr32(E1000_RCTL, rctl);
2974 }
2975
2976 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2977                                    int vfn)
2978 {
2979         struct e1000_hw *hw = &adapter->hw;
2980         u32 vmolr;
2981
2982         /* if it isn't the PF check to see if VFs are enabled and
2983          * increase the size to support vlan tags */
2984         if (vfn < adapter->vfs_allocated_count &&
2985             adapter->vf_data[vfn].vlans_enabled)
2986                 size += VLAN_TAG_SIZE;
2987
2988         vmolr = rd32(E1000_VMOLR(vfn));
2989         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2990         vmolr |= size | E1000_VMOLR_LPE;
2991         wr32(E1000_VMOLR(vfn), vmolr);
2992
2993         return 0;
2994 }
2995
2996 /**
2997  * igb_rlpml_set - set maximum receive packet size
2998  * @adapter: board private structure
2999  *
3000  * Configure maximum receivable packet size.
3001  **/
3002 static void igb_rlpml_set(struct igb_adapter *adapter)
3003 {
3004         u32 max_frame_size = adapter->max_frame_size;
3005         struct e1000_hw *hw = &adapter->hw;
3006         u16 pf_id = adapter->vfs_allocated_count;
3007
3008         if (pf_id) {
3009                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3010                 /*
3011                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3012                  * to our max jumbo frame size, in case we need to enable
3013                  * jumbo frames on one of the rings later.
3014                  * This will not pass over-length frames into the default
3015                  * queue because it's gated by the VMOLR.RLPML.
3016                  */
3017                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3018         }
3019
3020         wr32(E1000_RLPML, max_frame_size);
3021 }
3022
3023 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3024                                  int vfn, bool aupe)
3025 {
3026         struct e1000_hw *hw = &adapter->hw;
3027         u32 vmolr;
3028
3029         /*
3030          * This register exists only on 82576 and newer so if we are older then
3031          * we should exit and do nothing
3032          */
3033         if (hw->mac.type < e1000_82576)
3034                 return;
3035
3036         vmolr = rd32(E1000_VMOLR(vfn));
3037         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3038         if (aupe)
3039                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3040         else
3041                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3042
3043         /* clear all bits that might not be set */
3044         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3045
3046         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3047                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3048         /*
3049          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3050          * multicast packets
3051          */
3052         if (vfn <= adapter->vfs_allocated_count)
3053                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3054
3055         wr32(E1000_VMOLR(vfn), vmolr);
3056 }
3057
3058 /**
3059  * igb_configure_rx_ring - Configure a receive ring after Reset
3060  * @adapter: board private structure
3061  * @ring: receive ring to be configured
3062  *
3063  * Configure the Rx unit of the MAC after a reset.
3064  **/
3065 void igb_configure_rx_ring(struct igb_adapter *adapter,
3066                            struct igb_ring *ring)
3067 {
3068         struct e1000_hw *hw = &adapter->hw;
3069         u64 rdba = ring->dma;
3070         int reg_idx = ring->reg_idx;
3071         u32 srrctl = 0, rxdctl = 0;
3072
3073         /* disable the queue */
3074         wr32(E1000_RXDCTL(reg_idx), 0);
3075
3076         /* Set DMA base address registers */
3077         wr32(E1000_RDBAL(reg_idx),
3078              rdba & 0x00000000ffffffffULL);
3079         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3080         wr32(E1000_RDLEN(reg_idx),
3081                        ring->count * sizeof(union e1000_adv_rx_desc));
3082
3083         /* initialize head and tail */
3084         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3085         wr32(E1000_RDH(reg_idx), 0);
3086         writel(0, ring->tail);
3087
3088         /* set descriptor configuration */
3089         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3090 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3091         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3092 #else
3093         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3094 #endif
3095         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3096         if (hw->mac.type >= e1000_82580)
3097                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3098         /* Only set Drop Enable if we are supporting multiple queues */
3099         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3100                 srrctl |= E1000_SRRCTL_DROP_EN;
3101
3102         wr32(E1000_SRRCTL(reg_idx), srrctl);
3103
3104         /* set filtering for VMDQ pools */
3105         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3106
3107         rxdctl |= IGB_RX_PTHRESH;
3108         rxdctl |= IGB_RX_HTHRESH << 8;
3109         rxdctl |= IGB_RX_WTHRESH << 16;
3110
3111         /* enable receive descriptor fetching */
3112         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3113         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3114 }
3115
3116 /**
3117  * igb_configure_rx - Configure receive Unit after Reset
3118  * @adapter: board private structure
3119  *
3120  * Configure the Rx unit of the MAC after a reset.
3121  **/
3122 static void igb_configure_rx(struct igb_adapter *adapter)
3123 {
3124         int i;
3125
3126         /* set UTA to appropriate mode */
3127         igb_set_uta(adapter);
3128
3129         /* set the correct pool for the PF default MAC address in entry 0 */
3130         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3131                          adapter->vfs_allocated_count);
3132
3133         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3134          * the Base and Length of the Rx Descriptor Ring */
3135         for (i = 0; i < adapter->num_rx_queues; i++)
3136                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3137 }
3138
3139 /**
3140  * igb_free_tx_resources - Free Tx Resources per Queue
3141  * @tx_ring: Tx descriptor ring for a specific queue
3142  *
3143  * Free all transmit software resources
3144  **/
3145 void igb_free_tx_resources(struct igb_ring *tx_ring)
3146 {
3147         igb_clean_tx_ring(tx_ring);
3148
3149         vfree(tx_ring->tx_buffer_info);
3150         tx_ring->tx_buffer_info = NULL;
3151
3152         /* if not set, then don't free */
3153         if (!tx_ring->desc)
3154                 return;
3155
3156         dma_free_coherent(tx_ring->dev, tx_ring->size,
3157                           tx_ring->desc, tx_ring->dma);
3158
3159         tx_ring->desc = NULL;
3160 }
3161
3162 /**
3163  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3164  * @adapter: board private structure
3165  *
3166  * Free all transmit software resources
3167  **/
3168 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3169 {
3170         int i;
3171
3172         for (i = 0; i < adapter->num_tx_queues; i++)
3173                 igb_free_tx_resources(adapter->tx_ring[i]);
3174 }
3175
3176 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3177                                     struct igb_tx_buffer *tx_buffer)
3178 {
3179         if (tx_buffer->skb) {
3180                 dev_kfree_skb_any(tx_buffer->skb);
3181                 if (tx_buffer->dma)
3182                         dma_unmap_single(ring->dev,
3183                                          tx_buffer->dma,
3184                                          tx_buffer->length,
3185                                          DMA_TO_DEVICE);
3186         } else if (tx_buffer->dma) {
3187                 dma_unmap_page(ring->dev,
3188                                tx_buffer->dma,
3189                                tx_buffer->length,
3190                                DMA_TO_DEVICE);
3191         }
3192         tx_buffer->next_to_watch = NULL;
3193         tx_buffer->skb = NULL;
3194         tx_buffer->dma = 0;
3195         /* buffer_info must be completely set up in the transmit path */
3196 }
3197
3198 /**
3199  * igb_clean_tx_ring - Free Tx Buffers
3200  * @tx_ring: ring to be cleaned
3201  **/
3202 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3203 {
3204         struct igb_tx_buffer *buffer_info;
3205         unsigned long size;
3206         u16 i;
3207
3208         if (!tx_ring->tx_buffer_info)
3209                 return;
3210         /* Free all the Tx ring sk_buffs */
3211
3212         for (i = 0; i < tx_ring->count; i++) {
3213                 buffer_info = &tx_ring->tx_buffer_info[i];
3214                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3215         }
3216
3217         netdev_tx_reset_queue(txring_txq(tx_ring));
3218
3219         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3220         memset(tx_ring->tx_buffer_info, 0, size);
3221
3222         /* Zero out the descriptor ring */
3223         memset(tx_ring->desc, 0, tx_ring->size);
3224
3225         tx_ring->next_to_use = 0;
3226         tx_ring->next_to_clean = 0;
3227 }
3228
3229 /**
3230  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3231  * @adapter: board private structure
3232  **/
3233 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3234 {
3235         int i;
3236
3237         for (i = 0; i < adapter->num_tx_queues; i++)
3238                 igb_clean_tx_ring(adapter->tx_ring[i]);
3239 }
3240
3241 /**
3242  * igb_free_rx_resources - Free Rx Resources
3243  * @rx_ring: ring to clean the resources from
3244  *
3245  * Free all receive software resources
3246  **/
3247 void igb_free_rx_resources(struct igb_ring *rx_ring)
3248 {
3249         igb_clean_rx_ring(rx_ring);
3250
3251         vfree(rx_ring->rx_buffer_info);
3252         rx_ring->rx_buffer_info = NULL;
3253
3254         /* if not set, then don't free */
3255         if (!rx_ring->desc)
3256                 return;
3257
3258         dma_free_coherent(rx_ring->dev, rx_ring->size,
3259                           rx_ring->desc, rx_ring->dma);
3260
3261         rx_ring->desc = NULL;
3262 }
3263
3264 /**
3265  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3266  * @adapter: board private structure
3267  *
3268  * Free all receive software resources
3269  **/
3270 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3271 {
3272         int i;
3273
3274         for (i = 0; i < adapter->num_rx_queues; i++)
3275                 igb_free_rx_resources(adapter->rx_ring[i]);
3276 }
3277
3278 /**
3279  * igb_clean_rx_ring - Free Rx Buffers per Queue
3280  * @rx_ring: ring to free buffers from
3281  **/
3282 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3283 {
3284         unsigned long size;
3285         u16 i;
3286
3287         if (!rx_ring->rx_buffer_info)
3288                 return;
3289
3290         /* Free all the Rx ring sk_buffs */
3291         for (i = 0; i < rx_ring->count; i++) {
3292                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3293                 if (buffer_info->dma) {
3294                         dma_unmap_single(rx_ring->dev,
3295                                          buffer_info->dma,
3296                                          IGB_RX_HDR_LEN,
3297                                          DMA_FROM_DEVICE);
3298                         buffer_info->dma = 0;
3299                 }
3300
3301                 if (buffer_info->skb) {
3302                         dev_kfree_skb(buffer_info->skb);
3303                         buffer_info->skb = NULL;
3304                 }
3305                 if (buffer_info->page_dma) {
3306                         dma_unmap_page(rx_ring->dev,
3307                                        buffer_info->page_dma,
3308                                        PAGE_SIZE / 2,
3309                                        DMA_FROM_DEVICE);
3310                         buffer_info->page_dma = 0;
3311                 }
3312                 if (buffer_info->page) {
3313                         put_page(buffer_info->page);
3314                         buffer_info->page = NULL;
3315                         buffer_info->page_offset = 0;
3316                 }
3317         }
3318
3319         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3320         memset(rx_ring->rx_buffer_info, 0, size);
3321
3322         /* Zero out the descriptor ring */
3323         memset(rx_ring->desc, 0, rx_ring->size);
3324
3325         rx_ring->next_to_clean = 0;
3326         rx_ring->next_to_use = 0;
3327 }
3328
3329 /**
3330  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3331  * @adapter: board private structure
3332  **/
3333 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3334 {
3335         int i;
3336
3337         for (i = 0; i < adapter->num_rx_queues; i++)
3338                 igb_clean_rx_ring(adapter->rx_ring[i]);
3339 }
3340
3341 /**
3342  * igb_set_mac - Change the Ethernet Address of the NIC
3343  * @netdev: network interface device structure
3344  * @p: pointer to an address structure
3345  *
3346  * Returns 0 on success, negative on failure
3347  **/
3348 static int igb_set_mac(struct net_device *netdev, void *p)
3349 {
3350         struct igb_adapter *adapter = netdev_priv(netdev);
3351         struct e1000_hw *hw = &adapter->hw;
3352         struct sockaddr *addr = p;
3353
3354         if (!is_valid_ether_addr(addr->sa_data))
3355                 return -EADDRNOTAVAIL;
3356
3357         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3358         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3359
3360         /* set the correct pool for the new PF MAC address in entry 0 */
3361         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3362                          adapter->vfs_allocated_count);
3363
3364         return 0;
3365 }
3366
3367 /**
3368  * igb_write_mc_addr_list - write multicast addresses to MTA
3369  * @netdev: network interface device structure
3370  *
3371  * Writes multicast address list to the MTA hash table.
3372  * Returns: -ENOMEM on failure
3373  *                0 on no addresses written
3374  *                X on writing X addresses to MTA
3375  **/
3376 static int igb_write_mc_addr_list(struct net_device *netdev)
3377 {
3378         struct igb_adapter *adapter = netdev_priv(netdev);
3379         struct e1000_hw *hw = &adapter->hw;
3380         struct netdev_hw_addr *ha;
3381         u8  *mta_list;
3382         int i;
3383
3384         if (netdev_mc_empty(netdev)) {
3385                 /* nothing to program, so clear mc list */
3386                 igb_update_mc_addr_list(hw, NULL, 0);
3387                 igb_restore_vf_multicasts(adapter);
3388                 return 0;
3389         }
3390
3391         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3392         if (!mta_list)
3393                 return -ENOMEM;
3394
3395         /* The shared function expects a packed array of only addresses. */
3396         i = 0;
3397         netdev_for_each_mc_addr(ha, netdev)
3398                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3399
3400         igb_update_mc_addr_list(hw, mta_list, i);
3401         kfree(mta_list);
3402
3403         return netdev_mc_count(netdev);
3404 }
3405
3406 /**
3407  * igb_write_uc_addr_list - write unicast addresses to RAR table
3408  * @netdev: network interface device structure
3409  *
3410  * Writes unicast address list to the RAR table.
3411  * Returns: -ENOMEM on failure/insufficient address space
3412  *                0 on no addresses written
3413  *                X on writing X addresses to the RAR table
3414  **/
3415 static int igb_write_uc_addr_list(struct net_device *netdev)
3416 {
3417         struct igb_adapter *adapter = netdev_priv(netdev);
3418         struct e1000_hw *hw = &adapter->hw;
3419         unsigned int vfn = adapter->vfs_allocated_count;
3420         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3421         int count = 0;
3422
3423         /* return ENOMEM indicating insufficient memory for addresses */
3424         if (netdev_uc_count(netdev) > rar_entries)
3425                 return -ENOMEM;
3426
3427         if (!netdev_uc_empty(netdev) && rar_entries) {
3428                 struct netdev_hw_addr *ha;
3429
3430                 netdev_for_each_uc_addr(ha, netdev) {
3431                         if (!rar_entries)
3432                                 break;
3433                         igb_rar_set_qsel(adapter, ha->addr,
3434                                          rar_entries--,
3435                                          vfn);
3436                         count++;
3437                 }
3438         }
3439         /* write the addresses in reverse order to avoid write combining */
3440         for (; rar_entries > 0 ; rar_entries--) {
3441                 wr32(E1000_RAH(rar_entries), 0);
3442                 wr32(E1000_RAL(rar_entries), 0);
3443         }
3444         wrfl();
3445
3446         return count;
3447 }
3448
3449 /**
3450  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3451  * @netdev: network interface device structure
3452  *
3453  * The set_rx_mode entry point is called whenever the unicast or multicast
3454  * address lists or the network interface flags are updated.  This routine is
3455  * responsible for configuring the hardware for proper unicast, multicast,
3456  * promiscuous mode, and all-multi behavior.
3457  **/
3458 static void igb_set_rx_mode(struct net_device *netdev)
3459 {
3460         struct igb_adapter *adapter = netdev_priv(netdev);
3461         struct e1000_hw *hw = &adapter->hw;
3462         unsigned int vfn = adapter->vfs_allocated_count;
3463         u32 rctl, vmolr = 0;
3464         int count;
3465
3466         /* Check for Promiscuous and All Multicast modes */
3467         rctl = rd32(E1000_RCTL);
3468
3469         /* clear the effected bits */
3470         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3471
3472         if (netdev->flags & IFF_PROMISC) {
3473                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3474                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3475         } else {
3476                 if (netdev->flags & IFF_ALLMULTI) {
3477                         rctl |= E1000_RCTL_MPE;
3478                         vmolr |= E1000_VMOLR_MPME;
3479                 } else {
3480                         /*
3481                          * Write addresses to the MTA, if the attempt fails
3482                          * then we should just turn on promiscuous mode so
3483                          * that we can at least receive multicast traffic
3484                          */
3485                         count = igb_write_mc_addr_list(netdev);
3486                         if (count < 0) {
3487                                 rctl |= E1000_RCTL_MPE;
3488                                 vmolr |= E1000_VMOLR_MPME;
3489                         } else if (count) {
3490                                 vmolr |= E1000_VMOLR_ROMPE;
3491                         }
3492                 }
3493                 /*
3494                  * Write addresses to available RAR registers, if there is not
3495                  * sufficient space to store all the addresses then enable
3496                  * unicast promiscuous mode
3497                  */
3498                 count = igb_write_uc_addr_list(netdev);
3499                 if (count < 0) {
3500                         rctl |= E1000_RCTL_UPE;
3501                         vmolr |= E1000_VMOLR_ROPE;
3502                 }
3503                 rctl |= E1000_RCTL_VFE;
3504         }
3505         wr32(E1000_RCTL, rctl);
3506
3507         /*
3508          * In order to support SR-IOV and eventually VMDq it is necessary to set
3509          * the VMOLR to enable the appropriate modes.  Without this workaround
3510          * we will have issues with VLAN tag stripping not being done for frames
3511          * that are only arriving because we are the default pool
3512          */
3513         if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3514                 return;
3515
3516         vmolr |= rd32(E1000_VMOLR(vfn)) &
3517                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3518         wr32(E1000_VMOLR(vfn), vmolr);
3519         igb_restore_vf_multicasts(adapter);
3520 }
3521
3522 static void igb_check_wvbr(struct igb_adapter *adapter)
3523 {
3524         struct e1000_hw *hw = &adapter->hw;
3525         u32 wvbr = 0;
3526
3527         switch (hw->mac.type) {
3528         case e1000_82576:
3529         case e1000_i350:
3530                 if (!(wvbr = rd32(E1000_WVBR)))
3531                         return;
3532                 break;
3533         default:
3534                 break;
3535         }
3536
3537         adapter->wvbr |= wvbr;
3538 }
3539
3540 #define IGB_STAGGERED_QUEUE_OFFSET 8
3541
3542 static void igb_spoof_check(struct igb_adapter *adapter)
3543 {
3544         int j;
3545
3546         if (!adapter->wvbr)
3547                 return;
3548
3549         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3550                 if (adapter->wvbr & (1 << j) ||
3551                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3552                         dev_warn(&adapter->pdev->dev,
3553                                 "Spoof event(s) detected on VF %d\n", j);
3554                         adapter->wvbr &=
3555                                 ~((1 << j) |
3556                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3557                 }
3558         }
3559 }
3560
3561 /* Need to wait a few seconds after link up to get diagnostic information from
3562  * the phy */
3563 static void igb_update_phy_info(unsigned long data)
3564 {
3565         struct igb_adapter *adapter = (struct igb_adapter *) data;
3566         igb_get_phy_info(&adapter->hw);
3567 }
3568
3569 /**
3570  * igb_has_link - check shared code for link and determine up/down
3571  * @adapter: pointer to driver private info
3572  **/
3573 bool igb_has_link(struct igb_adapter *adapter)
3574 {
3575         struct e1000_hw *hw = &adapter->hw;
3576         bool link_active = false;
3577         s32 ret_val = 0;
3578
3579         /* get_link_status is set on LSC (link status) interrupt or
3580          * rx sequence error interrupt.  get_link_status will stay
3581          * false until the e1000_check_for_link establishes link
3582          * for copper adapters ONLY
3583          */
3584         switch (hw->phy.media_type) {
3585         case e1000_media_type_copper:
3586                 if (hw->mac.get_link_status) {
3587                         ret_val = hw->mac.ops.check_for_link(hw);
3588                         link_active = !hw->mac.get_link_status;
3589                 } else {
3590                         link_active = true;
3591                 }
3592                 break;
3593         case e1000_media_type_internal_serdes:
3594                 ret_val = hw->mac.ops.check_for_link(hw);
3595                 link_active = hw->mac.serdes_has_link;
3596                 break;
3597         default:
3598         case e1000_media_type_unknown:
3599                 break;
3600         }
3601
3602         return link_active;
3603 }
3604
3605 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3606 {
3607         bool ret = false;
3608         u32 ctrl_ext, thstat;
3609
3610         /* check for thermal sensor event on i350 copper only */
3611         if (hw->mac.type == e1000_i350) {
3612                 thstat = rd32(E1000_THSTAT);
3613                 ctrl_ext = rd32(E1000_CTRL_EXT);
3614
3615                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3616                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3617                         ret = !!(thstat & event);
3618                 }
3619         }
3620
3621         return ret;
3622 }
3623
3624 /**
3625  * igb_watchdog - Timer Call-back
3626  * @data: pointer to adapter cast into an unsigned long
3627  **/
3628 static void igb_watchdog(unsigned long data)
3629 {
3630         struct igb_adapter *adapter = (struct igb_adapter *)data;
3631         /* Do the rest outside of interrupt context */
3632         schedule_work(&adapter->watchdog_task);
3633 }
3634
3635 static void igb_watchdog_task(struct work_struct *work)
3636 {
3637         struct igb_adapter *adapter = container_of(work,
3638                                                    struct igb_adapter,
3639                                                    watchdog_task);
3640         struct e1000_hw *hw = &adapter->hw;
3641         struct net_device *netdev = adapter->netdev;
3642         u32 link;
3643         int i;
3644
3645         link = igb_has_link(adapter);
3646         if (link) {
3647                 /* Cancel scheduled suspend requests. */
3648                 pm_runtime_resume(netdev->dev.parent);
3649
3650                 if (!netif_carrier_ok(netdev)) {
3651                         u32 ctrl;
3652                         hw->mac.ops.get_speed_and_duplex(hw,
3653                                                          &adapter->link_speed,
3654                                                          &adapter->link_duplex);
3655
3656                         ctrl = rd32(E1000_CTRL);
3657                         /* Links status message must follow this format */
3658                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3659                                "Duplex, Flow Control: %s\n",
3660                                netdev->name,
3661                                adapter->link_speed,
3662                                adapter->link_duplex == FULL_DUPLEX ?
3663                                "Full" : "Half",
3664                                (ctrl & E1000_CTRL_TFCE) &&
3665                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3666                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3667                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3668
3669                         /* check for thermal sensor event */
3670                         if (igb_thermal_sensor_event(hw,
3671                             E1000_THSTAT_LINK_THROTTLE)) {
3672                                 netdev_info(netdev, "The network adapter link "
3673                                             "speed was downshifted because it "
3674                                             "overheated\n");
3675                         }
3676
3677                         /* adjust timeout factor according to speed/duplex */
3678                         adapter->tx_timeout_factor = 1;
3679                         switch (adapter->link_speed) {
3680                         case SPEED_10:
3681                                 adapter->tx_timeout_factor = 14;
3682                                 break;
3683                         case SPEED_100:
3684                                 /* maybe add some timeout factor ? */
3685                                 break;
3686                         }
3687
3688                         netif_carrier_on(netdev);
3689
3690                         igb_ping_all_vfs(adapter);
3691                         igb_check_vf_rate_limit(adapter);
3692
3693                         /* link state has changed, schedule phy info update */
3694                         if (!test_bit(__IGB_DOWN, &adapter->state))
3695                                 mod_timer(&adapter->phy_info_timer,
3696                                           round_jiffies(jiffies + 2 * HZ));
3697                 }
3698         } else {
3699                 if (netif_carrier_ok(netdev)) {
3700                         adapter->link_speed = 0;
3701                         adapter->link_duplex = 0;
3702
3703                         /* check for thermal sensor event */
3704                         if (igb_thermal_sensor_event(hw,
3705                             E1000_THSTAT_PWR_DOWN)) {
3706                                 netdev_err(netdev, "The network adapter was "
3707                                            "stopped because it overheated\n");
3708                         }
3709
3710                         /* Links status message must follow this format */
3711                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3712                                netdev->name);
3713                         netif_carrier_off(netdev);
3714
3715                         igb_ping_all_vfs(adapter);
3716
3717                         /* link state has changed, schedule phy info update */
3718                         if (!test_bit(__IGB_DOWN, &adapter->state))
3719                                 mod_timer(&adapter->phy_info_timer,
3720                                           round_jiffies(jiffies + 2 * HZ));
3721
3722                         pm_schedule_suspend(netdev->dev.parent,
3723                                             MSEC_PER_SEC * 5);
3724                 }
3725         }
3726
3727         spin_lock(&adapter->stats64_lock);
3728         igb_update_stats(adapter, &adapter->stats64);
3729         spin_unlock(&adapter->stats64_lock);
3730
3731         for (i = 0; i < adapter->num_tx_queues; i++) {
3732                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3733                 if (!netif_carrier_ok(netdev)) {
3734                         /* We've lost link, so the controller stops DMA,
3735                          * but we've got queued Tx work that's never going
3736                          * to get done, so reset controller to flush Tx.
3737                          * (Do the reset outside of interrupt context). */
3738                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3739                                 adapter->tx_timeout_count++;
3740                                 schedule_work(&adapter->reset_task);
3741                                 /* return immediately since reset is imminent */
3742                                 return;
3743                         }
3744                 }
3745
3746                 /* Force detection of hung controller every watchdog period */
3747                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3748         }
3749
3750         /* Cause software interrupt to ensure rx ring is cleaned */
3751         if (adapter->msix_entries) {
3752                 u32 eics = 0;
3753                 for (i = 0; i < adapter->num_q_vectors; i++)
3754                         eics |= adapter->q_vector[i]->eims_value;
3755                 wr32(E1000_EICS, eics);
3756         } else {
3757                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3758         }
3759
3760         igb_spoof_check(adapter);
3761
3762         /* Reset the timer */
3763         if (!test_bit(__IGB_DOWN, &adapter->state))
3764                 mod_timer(&adapter->watchdog_timer,
3765                           round_jiffies(jiffies + 2 * HZ));
3766 }
3767
3768 enum latency_range {
3769         lowest_latency = 0,
3770         low_latency = 1,
3771         bulk_latency = 2,
3772         latency_invalid = 255
3773 };
3774
3775 /**
3776  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3777  *
3778  *      Stores a new ITR value based on strictly on packet size.  This
3779  *      algorithm is less sophisticated than that used in igb_update_itr,
3780  *      due to the difficulty of synchronizing statistics across multiple
3781  *      receive rings.  The divisors and thresholds used by this function
3782  *      were determined based on theoretical maximum wire speed and testing
3783  *      data, in order to minimize response time while increasing bulk
3784  *      throughput.
3785  *      This functionality is controlled by the InterruptThrottleRate module
3786  *      parameter (see igb_param.c)
3787  *      NOTE:  This function is called only when operating in a multiqueue
3788  *             receive environment.
3789  * @q_vector: pointer to q_vector
3790  **/
3791 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3792 {
3793         int new_val = q_vector->itr_val;
3794         int avg_wire_size = 0;
3795         struct igb_adapter *adapter = q_vector->adapter;
3796         unsigned int packets;
3797
3798         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3799          * ints/sec - ITR timer value of 120 ticks.
3800          */
3801         if (adapter->link_speed != SPEED_1000) {
3802                 new_val = IGB_4K_ITR;
3803                 goto set_itr_val;
3804         }
3805
3806         packets = q_vector->rx.total_packets;
3807         if (packets)
3808                 avg_wire_size = q_vector->rx.total_bytes / packets;
3809
3810         packets = q_vector->tx.total_packets;
3811         if (packets)
3812                 avg_wire_size = max_t(u32, avg_wire_size,
3813                                       q_vector->tx.total_bytes / packets);
3814
3815         /* if avg_wire_size isn't set no work was done */
3816         if (!avg_wire_size)
3817                 goto clear_counts;
3818
3819         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3820         avg_wire_size += 24;
3821
3822         /* Don't starve jumbo frames */
3823         avg_wire_size = min(avg_wire_size, 3000);
3824
3825         /* Give a little boost to mid-size frames */
3826         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3827                 new_val = avg_wire_size / 3;
3828         else
3829                 new_val = avg_wire_size / 2;
3830
3831         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3832         if (new_val < IGB_20K_ITR &&
3833             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3834              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3835                 new_val = IGB_20K_ITR;
3836
3837 set_itr_val:
3838         if (new_val != q_vector->itr_val) {
3839                 q_vector->itr_val = new_val;
3840                 q_vector->set_itr = 1;
3841         }
3842 clear_counts:
3843         q_vector->rx.total_bytes = 0;
3844         q_vector->rx.total_packets = 0;
3845         q_vector->tx.total_bytes = 0;
3846         q_vector->tx.total_packets = 0;
3847 }
3848
3849 /**
3850  * igb_update_itr - update the dynamic ITR value based on statistics
3851  *      Stores a new ITR value based on packets and byte
3852  *      counts during the last interrupt.  The advantage of per interrupt
3853  *      computation is faster updates and more accurate ITR for the current
3854  *      traffic pattern.  Constants in this function were computed
3855  *      based on theoretical maximum wire speed and thresholds were set based
3856  *      on testing data as well as attempting to minimize response time
3857  *      while increasing bulk throughput.
3858  *      this functionality is controlled by the InterruptThrottleRate module
3859  *      parameter (see igb_param.c)
3860  *      NOTE:  These calculations are only valid when operating in a single-
3861  *             queue environment.
3862  * @q_vector: pointer to q_vector
3863  * @ring_container: ring info to update the itr for
3864  **/
3865 static void igb_update_itr(struct igb_q_vector *q_vector,
3866                            struct igb_ring_container *ring_container)
3867 {
3868         unsigned int packets = ring_container->total_packets;
3869         unsigned int bytes = ring_container->total_bytes;
3870         u8 itrval = ring_container->itr;
3871
3872         /* no packets, exit with status unchanged */
3873         if (packets == 0)
3874                 return;
3875
3876         switch (itrval) {
3877         case lowest_latency:
3878                 /* handle TSO and jumbo frames */
3879                 if (bytes/packets > 8000)
3880                         itrval = bulk_latency;
3881                 else if ((packets < 5) && (bytes > 512))
3882                         itrval = low_latency;
3883                 break;
3884         case low_latency:  /* 50 usec aka 20000 ints/s */
3885                 if (bytes > 10000) {
3886                         /* this if handles the TSO accounting */
3887                         if (bytes/packets > 8000) {
3888                                 itrval = bulk_latency;
3889                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3890                                 itrval = bulk_latency;
3891                         } else if ((packets > 35)) {
3892                                 itrval = lowest_latency;
3893                         }
3894                 } else if (bytes/packets > 2000) {
3895                         itrval = bulk_latency;
3896                 } else if (packets <= 2 && bytes < 512) {
3897                         itrval = lowest_latency;
3898                 }
3899                 break;
3900         case bulk_latency: /* 250 usec aka 4000 ints/s */
3901                 if (bytes > 25000) {
3902                         if (packets > 35)
3903                                 itrval = low_latency;
3904                 } else if (bytes < 1500) {
3905                         itrval = low_latency;
3906                 }
3907                 break;
3908         }
3909
3910         /* clear work counters since we have the values we need */
3911         ring_container->total_bytes = 0;
3912         ring_container->total_packets = 0;
3913
3914         /* write updated itr to ring container */
3915         ring_container->itr = itrval;
3916 }
3917
3918 static void igb_set_itr(struct igb_q_vector *q_vector)
3919 {
3920         struct igb_adapter *adapter = q_vector->adapter;
3921         u32 new_itr = q_vector->itr_val;
3922         u8 current_itr = 0;
3923
3924         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3925         if (adapter->link_speed != SPEED_1000) {
3926                 current_itr = 0;
3927                 new_itr = IGB_4K_ITR;
3928                 goto set_itr_now;
3929         }
3930
3931         igb_update_itr(q_vector, &q_vector->tx);
3932         igb_update_itr(q_vector, &q_vector->rx);
3933
3934         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3935
3936         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3937         if (current_itr == lowest_latency &&
3938             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3939              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3940                 current_itr = low_latency;
3941
3942         switch (current_itr) {
3943         /* counts and packets in update_itr are dependent on these numbers */
3944         case lowest_latency:
3945                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3946                 break;
3947         case low_latency:
3948                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3949                 break;
3950         case bulk_latency:
3951                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3952                 break;
3953         default:
3954                 break;
3955         }
3956
3957 set_itr_now:
3958         if (new_itr != q_vector->itr_val) {
3959                 /* this attempts to bias the interrupt rate towards Bulk
3960                  * by adding intermediate steps when interrupt rate is
3961                  * increasing */
3962                 new_itr = new_itr > q_vector->itr_val ?
3963                              max((new_itr * q_vector->itr_val) /
3964                                  (new_itr + (q_vector->itr_val >> 2)),
3965                                  new_itr) :
3966                              new_itr;
3967                 /* Don't write the value here; it resets the adapter's
3968                  * internal timer, and causes us to delay far longer than
3969                  * we should between interrupts.  Instead, we write the ITR
3970                  * value at the beginning of the next interrupt so the timing
3971                  * ends up being correct.
3972                  */
3973                 q_vector->itr_val = new_itr;
3974                 q_vector->set_itr = 1;
3975         }
3976 }
3977
3978 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3979                             u32 type_tucmd, u32 mss_l4len_idx)
3980 {
3981         struct e1000_adv_tx_context_desc *context_desc;
3982         u16 i = tx_ring->next_to_use;
3983
3984         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3985
3986         i++;
3987         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3988
3989         /* set bits to identify this as an advanced context descriptor */
3990         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3991
3992         /* For 82575, context index must be unique per ring. */
3993         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3994                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3995
3996         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
3997         context_desc->seqnum_seed       = 0;
3998         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
3999         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4000 }
4001
4002 static int igb_tso(struct igb_ring *tx_ring,
4003                    struct igb_tx_buffer *first,
4004                    u8 *hdr_len)
4005 {
4006         struct sk_buff *skb = first->skb;
4007         u32 vlan_macip_lens, type_tucmd;
4008         u32 mss_l4len_idx, l4len;
4009
4010         if (!skb_is_gso(skb))
4011                 return 0;
4012
4013         if (skb_header_cloned(skb)) {
4014                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4015                 if (err)
4016                         return err;
4017         }
4018
4019         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4020         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4021
4022         if (first->protocol == __constant_htons(ETH_P_IP)) {
4023                 struct iphdr *iph = ip_hdr(skb);
4024                 iph->tot_len = 0;
4025                 iph->check = 0;
4026                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4027                                                          iph->daddr, 0,
4028                                                          IPPROTO_TCP,
4029                                                          0);
4030                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4031                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4032                                    IGB_TX_FLAGS_CSUM |
4033                                    IGB_TX_FLAGS_IPV4;
4034         } else if (skb_is_gso_v6(skb)) {
4035                 ipv6_hdr(skb)->payload_len = 0;
4036                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4037                                                        &ipv6_hdr(skb)->daddr,
4038                                                        0, IPPROTO_TCP, 0);
4039                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4040                                    IGB_TX_FLAGS_CSUM;
4041         }
4042
4043         /* compute header lengths */
4044         l4len = tcp_hdrlen(skb);
4045         *hdr_len = skb_transport_offset(skb) + l4len;
4046
4047         /* update gso size and bytecount with header size */
4048         first->gso_segs = skb_shinfo(skb)->gso_segs;
4049         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4050
4051         /* MSS L4LEN IDX */
4052         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4053         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4054
4055         /* VLAN MACLEN IPLEN */
4056         vlan_macip_lens = skb_network_header_len(skb);
4057         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4058         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4059
4060         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4061
4062         return 1;
4063 }
4064
4065 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4066 {
4067         struct sk_buff *skb = first->skb;
4068         u32 vlan_macip_lens = 0;
4069         u32 mss_l4len_idx = 0;
4070         u32 type_tucmd = 0;
4071
4072         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4073                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4074                         return;
4075         } else {
4076                 u8 l4_hdr = 0;
4077                 switch (first->protocol) {
4078                 case __constant_htons(ETH_P_IP):
4079                         vlan_macip_lens |= skb_network_header_len(skb);
4080                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4081                         l4_hdr = ip_hdr(skb)->protocol;
4082                         break;
4083                 case __constant_htons(ETH_P_IPV6):
4084                         vlan_macip_lens |= skb_network_header_len(skb);
4085                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4086                         break;
4087                 default:
4088                         if (unlikely(net_ratelimit())) {
4089                                 dev_warn(tx_ring->dev,
4090                                  "partial checksum but proto=%x!\n",
4091                                  first->protocol);
4092                         }
4093                         break;
4094                 }
4095
4096                 switch (l4_hdr) {
4097                 case IPPROTO_TCP:
4098                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4099                         mss_l4len_idx = tcp_hdrlen(skb) <<
4100                                         E1000_ADVTXD_L4LEN_SHIFT;
4101                         break;
4102                 case IPPROTO_SCTP:
4103                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104                         mss_l4len_idx = sizeof(struct sctphdr) <<
4105                                         E1000_ADVTXD_L4LEN_SHIFT;
4106                         break;
4107                 case IPPROTO_UDP:
4108                         mss_l4len_idx = sizeof(struct udphdr) <<
4109                                         E1000_ADVTXD_L4LEN_SHIFT;
4110                         break;
4111                 default:
4112                         if (unlikely(net_ratelimit())) {
4113                                 dev_warn(tx_ring->dev,
4114                                  "partial checksum but l4 proto=%x!\n",
4115                                  l4_hdr);
4116                         }
4117                         break;
4118                 }
4119
4120                 /* update TX checksum flag */
4121                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4122         }
4123
4124         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4125         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4126
4127         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4128 }
4129
4130 static __le32 igb_tx_cmd_type(u32 tx_flags)
4131 {
4132         /* set type for advanced descriptor with frame checksum insertion */
4133         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4134                                       E1000_ADVTXD_DCMD_IFCS |
4135                                       E1000_ADVTXD_DCMD_DEXT);
4136
4137         /* set HW vlan bit if vlan is present */
4138         if (tx_flags & IGB_TX_FLAGS_VLAN)
4139                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4140
4141         /* set timestamp bit if present */
4142         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4143                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4144
4145         /* set segmentation bits for TSO */
4146         if (tx_flags & IGB_TX_FLAGS_TSO)
4147                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4148
4149         return cmd_type;
4150 }
4151
4152 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4153                                  union e1000_adv_tx_desc *tx_desc,
4154                                  u32 tx_flags, unsigned int paylen)
4155 {
4156         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4157
4158         /* 82575 requires a unique index per ring if any offload is enabled */
4159         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4160             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4161                 olinfo_status |= tx_ring->reg_idx << 4;
4162
4163         /* insert L4 checksum */
4164         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4165                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4166
4167                 /* insert IPv4 checksum */
4168                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4169                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4170         }
4171
4172         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4173 }
4174
4175 /*
4176  * The largest size we can write to the descriptor is 65535.  In order to
4177  * maintain a power of two alignment we have to limit ourselves to 32K.
4178  */
4179 #define IGB_MAX_TXD_PWR 15
4180 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4181
4182 static void igb_tx_map(struct igb_ring *tx_ring,
4183                        struct igb_tx_buffer *first,
4184                        const u8 hdr_len)
4185 {
4186         struct sk_buff *skb = first->skb;
4187         struct igb_tx_buffer *tx_buffer_info;
4188         union e1000_adv_tx_desc *tx_desc;
4189         dma_addr_t dma;
4190         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4191         unsigned int data_len = skb->data_len;
4192         unsigned int size = skb_headlen(skb);
4193         unsigned int paylen = skb->len - hdr_len;
4194         __le32 cmd_type;
4195         u32 tx_flags = first->tx_flags;
4196         u16 i = tx_ring->next_to_use;
4197
4198         tx_desc = IGB_TX_DESC(tx_ring, i);
4199
4200         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4201         cmd_type = igb_tx_cmd_type(tx_flags);
4202
4203         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4204         if (dma_mapping_error(tx_ring->dev, dma))
4205                 goto dma_error;
4206
4207         /* record length, and DMA address */
4208         first->length = size;
4209         first->dma = dma;
4210         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4211
4212         for (;;) {
4213                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4214                         tx_desc->read.cmd_type_len =
4215                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4216
4217                         i++;
4218                         tx_desc++;
4219                         if (i == tx_ring->count) {
4220                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4221                                 i = 0;
4222                         }
4223
4224                         dma += IGB_MAX_DATA_PER_TXD;
4225                         size -= IGB_MAX_DATA_PER_TXD;
4226
4227                         tx_desc->read.olinfo_status = 0;
4228                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4229                 }
4230
4231                 if (likely(!data_len))
4232                         break;
4233
4234                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4235
4236                 i++;
4237                 tx_desc++;
4238                 if (i == tx_ring->count) {
4239                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4240                         i = 0;
4241                 }
4242
4243                 size = skb_frag_size(frag);
4244                 data_len -= size;
4245
4246                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4247                                    size, DMA_TO_DEVICE);
4248                 if (dma_mapping_error(tx_ring->dev, dma))
4249                         goto dma_error;
4250
4251                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4252                 tx_buffer_info->length = size;
4253                 tx_buffer_info->dma = dma;
4254
4255                 tx_desc->read.olinfo_status = 0;
4256                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4257
4258                 frag++;
4259         }
4260
4261         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4262
4263         /* write last descriptor with RS and EOP bits */
4264         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4265         if (unlikely(skb->no_fcs))
4266                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4267         tx_desc->read.cmd_type_len = cmd_type;
4268
4269         /* set the timestamp */
4270         first->time_stamp = jiffies;
4271
4272         /*
4273          * Force memory writes to complete before letting h/w know there
4274          * are new descriptors to fetch.  (Only applicable for weak-ordered
4275          * memory model archs, such as IA-64).
4276          *
4277          * We also need this memory barrier to make certain all of the
4278          * status bits have been updated before next_to_watch is written.
4279          */
4280         wmb();
4281
4282         /* set next_to_watch value indicating a packet is present */
4283         first->next_to_watch = tx_desc;
4284
4285         i++;
4286         if (i == tx_ring->count)
4287                 i = 0;
4288
4289         tx_ring->next_to_use = i;
4290
4291         writel(i, tx_ring->tail);
4292
4293         /* we need this if more than one processor can write to our tail
4294          * at a time, it syncronizes IO on IA64/Altix systems */
4295         mmiowb();
4296
4297         return;
4298
4299 dma_error:
4300         dev_err(tx_ring->dev, "TX DMA map failed\n");
4301
4302         /* clear dma mappings for failed tx_buffer_info map */
4303         for (;;) {
4304                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4305                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4306                 if (tx_buffer_info == first)
4307                         break;
4308                 if (i == 0)
4309                         i = tx_ring->count;
4310                 i--;
4311         }
4312
4313         tx_ring->next_to_use = i;
4314 }
4315
4316 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4317 {
4318         struct net_device *netdev = tx_ring->netdev;
4319
4320         netif_stop_subqueue(netdev, tx_ring->queue_index);
4321
4322         /* Herbert's original patch had:
4323          *  smp_mb__after_netif_stop_queue();
4324          * but since that doesn't exist yet, just open code it. */
4325         smp_mb();
4326
4327         /* We need to check again in a case another CPU has just
4328          * made room available. */
4329         if (igb_desc_unused(tx_ring) < size)
4330                 return -EBUSY;
4331
4332         /* A reprieve! */
4333         netif_wake_subqueue(netdev, tx_ring->queue_index);
4334
4335         u64_stats_update_begin(&tx_ring->tx_syncp2);
4336         tx_ring->tx_stats.restart_queue2++;
4337         u64_stats_update_end(&tx_ring->tx_syncp2);
4338
4339         return 0;
4340 }
4341
4342 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4343 {
4344         if (igb_desc_unused(tx_ring) >= size)
4345                 return 0;
4346         return __igb_maybe_stop_tx(tx_ring, size);
4347 }
4348
4349 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4350                                 struct igb_ring *tx_ring)
4351 {
4352         struct igb_tx_buffer *first;
4353         int tso;
4354         u32 tx_flags = 0;
4355         __be16 protocol = vlan_get_protocol(skb);
4356         u8 hdr_len = 0;
4357
4358         /* need: 1 descriptor per page,
4359          *       + 2 desc gap to keep tail from touching head,
4360          *       + 1 desc for skb->data,
4361          *       + 1 desc for context descriptor,
4362          * otherwise try next time */
4363         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4364                 /* this is a hard error */
4365                 return NETDEV_TX_BUSY;
4366         }
4367
4368         /* record the location of the first descriptor for this packet */
4369         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4370         first->skb = skb;
4371         first->bytecount = skb->len;
4372         first->gso_segs = 1;
4373
4374         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4375                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4376                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4377         }
4378
4379         if (vlan_tx_tag_present(skb)) {
4380                 tx_flags |= IGB_TX_FLAGS_VLAN;
4381                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4382         }
4383
4384         /* record initial flags and protocol */
4385         first->tx_flags = tx_flags;
4386         first->protocol = protocol;
4387
4388         tso = igb_tso(tx_ring, first, &hdr_len);
4389         if (tso < 0)
4390                 goto out_drop;
4391         else if (!tso)
4392                 igb_tx_csum(tx_ring, first);
4393
4394         igb_tx_map(tx_ring, first, hdr_len);
4395
4396         /* Make sure there is space in the ring for the next send. */
4397         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4398
4399         return NETDEV_TX_OK;
4400
4401 out_drop:
4402         igb_unmap_and_free_tx_resource(tx_ring, first);
4403
4404         return NETDEV_TX_OK;
4405 }
4406
4407 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4408                                                     struct sk_buff *skb)
4409 {
4410         unsigned int r_idx = skb->queue_mapping;
4411
4412         if (r_idx >= adapter->num_tx_queues)
4413                 r_idx = r_idx % adapter->num_tx_queues;
4414
4415         return adapter->tx_ring[r_idx];
4416 }
4417
4418 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4419                                   struct net_device *netdev)
4420 {
4421         struct igb_adapter *adapter = netdev_priv(netdev);
4422
4423         if (test_bit(__IGB_DOWN, &adapter->state)) {
4424                 dev_kfree_skb_any(skb);
4425                 return NETDEV_TX_OK;
4426         }
4427
4428         if (skb->len <= 0) {
4429                 dev_kfree_skb_any(skb);
4430                 return NETDEV_TX_OK;
4431         }
4432
4433         /*
4434          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4435          * in order to meet this minimum size requirement.
4436          */
4437         if (skb->len < 17) {
4438                 if (skb_padto(skb, 17))
4439                         return NETDEV_TX_OK;
4440                 skb->len = 17;
4441         }
4442
4443         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4444 }
4445
4446 /**
4447  * igb_tx_timeout - Respond to a Tx Hang
4448  * @netdev: network interface device structure
4449  **/
4450 static void igb_tx_timeout(struct net_device *netdev)
4451 {
4452         struct igb_adapter *adapter = netdev_priv(netdev);
4453         struct e1000_hw *hw = &adapter->hw;
4454
4455         /* Do the reset outside of interrupt context */
4456         adapter->tx_timeout_count++;
4457
4458         if (hw->mac.type >= e1000_82580)
4459                 hw->dev_spec._82575.global_device_reset = true;
4460
4461         schedule_work(&adapter->reset_task);
4462         wr32(E1000_EICS,
4463              (adapter->eims_enable_mask & ~adapter->eims_other));
4464 }
4465
4466 static void igb_reset_task(struct work_struct *work)
4467 {
4468         struct igb_adapter *adapter;
4469         adapter = container_of(work, struct igb_adapter, reset_task);
4470
4471         igb_dump(adapter);
4472         netdev_err(adapter->netdev, "Reset adapter\n");
4473         igb_reinit_locked(adapter);
4474 }
4475
4476 /**
4477  * igb_get_stats64 - Get System Network Statistics
4478  * @netdev: network interface device structure
4479  * @stats: rtnl_link_stats64 pointer
4480  *
4481  **/
4482 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4483                                                  struct rtnl_link_stats64 *stats)
4484 {
4485         struct igb_adapter *adapter = netdev_priv(netdev);
4486
4487         spin_lock(&adapter->stats64_lock);
4488         igb_update_stats(adapter, &adapter->stats64);
4489         memcpy(stats, &adapter->stats64, sizeof(*stats));
4490         spin_unlock(&adapter->stats64_lock);
4491
4492         return stats;
4493 }
4494
4495 /**
4496  * igb_change_mtu - Change the Maximum Transfer Unit
4497  * @netdev: network interface device structure
4498  * @new_mtu: new value for maximum frame size
4499  *
4500  * Returns 0 on success, negative on failure
4501  **/
4502 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4503 {
4504         struct igb_adapter *adapter = netdev_priv(netdev);
4505         struct pci_dev *pdev = adapter->pdev;
4506         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4507
4508         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4509                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4510                 return -EINVAL;
4511         }
4512
4513 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4514         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4515                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4516                 return -EINVAL;
4517         }
4518
4519         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4520                 msleep(1);
4521
4522         /* igb_down has a dependency on max_frame_size */
4523         adapter->max_frame_size = max_frame;
4524
4525         if (netif_running(netdev))
4526                 igb_down(adapter);
4527
4528         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4529                  netdev->mtu, new_mtu);
4530         netdev->mtu = new_mtu;
4531
4532         if (netif_running(netdev))
4533                 igb_up(adapter);
4534         else
4535                 igb_reset(adapter);
4536
4537         clear_bit(__IGB_RESETTING, &adapter->state);
4538
4539         return 0;
4540 }
4541
4542 /**
4543  * igb_update_stats - Update the board statistics counters
4544  * @adapter: board private structure
4545  **/
4546
4547 void igb_update_stats(struct igb_adapter *adapter,
4548                       struct rtnl_link_stats64 *net_stats)
4549 {
4550         struct e1000_hw *hw = &adapter->hw;
4551         struct pci_dev *pdev = adapter->pdev;
4552         u32 reg, mpc;
4553         u16 phy_tmp;
4554         int i;
4555         u64 bytes, packets;
4556         unsigned int start;
4557         u64 _bytes, _packets;
4558
4559 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4560
4561         /*
4562          * Prevent stats update while adapter is being reset, or if the pci
4563          * connection is down.
4564          */
4565         if (adapter->link_speed == 0)
4566                 return;
4567         if (pci_channel_offline(pdev))
4568                 return;
4569
4570         bytes = 0;
4571         packets = 0;
4572         for (i = 0; i < adapter->num_rx_queues; i++) {
4573                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4574                 struct igb_ring *ring = adapter->rx_ring[i];
4575
4576                 ring->rx_stats.drops += rqdpc_tmp;
4577                 net_stats->rx_fifo_errors += rqdpc_tmp;
4578
4579                 do {
4580                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4581                         _bytes = ring->rx_stats.bytes;
4582                         _packets = ring->rx_stats.packets;
4583                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4584                 bytes += _bytes;
4585                 packets += _packets;
4586         }
4587
4588         net_stats->rx_bytes = bytes;
4589         net_stats->rx_packets = packets;
4590
4591         bytes = 0;
4592         packets = 0;
4593         for (i = 0; i < adapter->num_tx_queues; i++) {
4594                 struct igb_ring *ring = adapter->tx_ring[i];
4595                 do {
4596                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4597                         _bytes = ring->tx_stats.bytes;
4598                         _packets = ring->tx_stats.packets;
4599                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4600                 bytes += _bytes;
4601                 packets += _packets;
4602         }
4603         net_stats->tx_bytes = bytes;
4604         net_stats->tx_packets = packets;
4605
4606         /* read stats registers */
4607         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4608         adapter->stats.gprc += rd32(E1000_GPRC);
4609         adapter->stats.gorc += rd32(E1000_GORCL);
4610         rd32(E1000_GORCH); /* clear GORCL */
4611         adapter->stats.bprc += rd32(E1000_BPRC);
4612         adapter->stats.mprc += rd32(E1000_MPRC);
4613         adapter->stats.roc += rd32(E1000_ROC);
4614
4615         adapter->stats.prc64 += rd32(E1000_PRC64);
4616         adapter->stats.prc127 += rd32(E1000_PRC127);
4617         adapter->stats.prc255 += rd32(E1000_PRC255);
4618         adapter->stats.prc511 += rd32(E1000_PRC511);
4619         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4620         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4621         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4622         adapter->stats.sec += rd32(E1000_SEC);
4623
4624         mpc = rd32(E1000_MPC);
4625         adapter->stats.mpc += mpc;
4626         net_stats->rx_fifo_errors += mpc;
4627         adapter->stats.scc += rd32(E1000_SCC);
4628         adapter->stats.ecol += rd32(E1000_ECOL);
4629         adapter->stats.mcc += rd32(E1000_MCC);
4630         adapter->stats.latecol += rd32(E1000_LATECOL);
4631         adapter->stats.dc += rd32(E1000_DC);
4632         adapter->stats.rlec += rd32(E1000_RLEC);
4633         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4634         adapter->stats.xontxc += rd32(E1000_XONTXC);
4635         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4636         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4637         adapter->stats.fcruc += rd32(E1000_FCRUC);
4638         adapter->stats.gptc += rd32(E1000_GPTC);
4639         adapter->stats.gotc += rd32(E1000_GOTCL);
4640         rd32(E1000_GOTCH); /* clear GOTCL */
4641         adapter->stats.rnbc += rd32(E1000_RNBC);
4642         adapter->stats.ruc += rd32(E1000_RUC);
4643         adapter->stats.rfc += rd32(E1000_RFC);
4644         adapter->stats.rjc += rd32(E1000_RJC);
4645         adapter->stats.tor += rd32(E1000_TORH);
4646         adapter->stats.tot += rd32(E1000_TOTH);
4647         adapter->stats.tpr += rd32(E1000_TPR);
4648
4649         adapter->stats.ptc64 += rd32(E1000_PTC64);
4650         adapter->stats.ptc127 += rd32(E1000_PTC127);
4651         adapter->stats.ptc255 += rd32(E1000_PTC255);
4652         adapter->stats.ptc511 += rd32(E1000_PTC511);
4653         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4654         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4655
4656         adapter->stats.mptc += rd32(E1000_MPTC);
4657         adapter->stats.bptc += rd32(E1000_BPTC);
4658
4659         adapter->stats.tpt += rd32(E1000_TPT);
4660         adapter->stats.colc += rd32(E1000_COLC);
4661
4662         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4663         /* read internal phy specific stats */
4664         reg = rd32(E1000_CTRL_EXT);
4665         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4666                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4667                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4668         }
4669
4670         adapter->stats.tsctc += rd32(E1000_TSCTC);
4671         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4672
4673         adapter->stats.iac += rd32(E1000_IAC);
4674         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4675         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4676         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4677         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4678         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4679         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4680         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4681         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4682
4683         /* Fill out the OS statistics structure */
4684         net_stats->multicast = adapter->stats.mprc;
4685         net_stats->collisions = adapter->stats.colc;
4686
4687         /* Rx Errors */
4688
4689         /* RLEC on some newer hardware can be incorrect so build
4690          * our own version based on RUC and ROC */
4691         net_stats->rx_errors = adapter->stats.rxerrc +
4692                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4693                 adapter->stats.ruc + adapter->stats.roc +
4694                 adapter->stats.cexterr;
4695         net_stats->rx_length_errors = adapter->stats.ruc +
4696                                       adapter->stats.roc;
4697         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4698         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4699         net_stats->rx_missed_errors = adapter->stats.mpc;
4700
4701         /* Tx Errors */
4702         net_stats->tx_errors = adapter->stats.ecol +
4703                                adapter->stats.latecol;
4704         net_stats->tx_aborted_errors = adapter->stats.ecol;
4705         net_stats->tx_window_errors = adapter->stats.latecol;
4706         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4707
4708         /* Tx Dropped needs to be maintained elsewhere */
4709
4710         /* Phy Stats */
4711         if (hw->phy.media_type == e1000_media_type_copper) {
4712                 if ((adapter->link_speed == SPEED_1000) &&
4713                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4714                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4715                         adapter->phy_stats.idle_errors += phy_tmp;
4716                 }
4717         }
4718
4719         /* Management Stats */
4720         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4721         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4722         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4723
4724         /* OS2BMC Stats */
4725         reg = rd32(E1000_MANC);
4726         if (reg & E1000_MANC_EN_BMC2OS) {
4727                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4728                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4729                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4730                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4731         }
4732 }
4733
4734 static irqreturn_t igb_msix_other(int irq, void *data)
4735 {
4736         struct igb_adapter *adapter = data;
4737         struct e1000_hw *hw = &adapter->hw;
4738         u32 icr = rd32(E1000_ICR);
4739         /* reading ICR causes bit 31 of EICR to be cleared */
4740
4741         if (icr & E1000_ICR_DRSTA)
4742                 schedule_work(&adapter->reset_task);
4743
4744         if (icr & E1000_ICR_DOUTSYNC) {
4745                 /* HW is reporting DMA is out of sync */
4746                 adapter->stats.doosync++;
4747                 /* The DMA Out of Sync is also indication of a spoof event
4748                  * in IOV mode. Check the Wrong VM Behavior register to
4749                  * see if it is really a spoof event. */
4750                 igb_check_wvbr(adapter);
4751         }
4752
4753         /* Check for a mailbox event */
4754         if (icr & E1000_ICR_VMMB)
4755                 igb_msg_task(adapter);
4756
4757         if (icr & E1000_ICR_LSC) {
4758                 hw->mac.get_link_status = 1;
4759                 /* guard against interrupt when we're going down */
4760                 if (!test_bit(__IGB_DOWN, &adapter->state))
4761                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4762         }
4763
4764         wr32(E1000_EIMS, adapter->eims_other);
4765
4766         return IRQ_HANDLED;
4767 }
4768
4769 static void igb_write_itr(struct igb_q_vector *q_vector)
4770 {
4771         struct igb_adapter *adapter = q_vector->adapter;
4772         u32 itr_val = q_vector->itr_val & 0x7FFC;
4773
4774         if (!q_vector->set_itr)
4775                 return;
4776
4777         if (!itr_val)
4778                 itr_val = 0x4;
4779
4780         if (adapter->hw.mac.type == e1000_82575)
4781                 itr_val |= itr_val << 16;
4782         else
4783                 itr_val |= E1000_EITR_CNT_IGNR;
4784
4785         writel(itr_val, q_vector->itr_register);
4786         q_vector->set_itr = 0;
4787 }
4788
4789 static irqreturn_t igb_msix_ring(int irq, void *data)
4790 {
4791         struct igb_q_vector *q_vector = data;
4792
4793         /* Write the ITR value calculated from the previous interrupt. */
4794         igb_write_itr(q_vector);
4795
4796         napi_schedule(&q_vector->napi);
4797
4798         return IRQ_HANDLED;
4799 }
4800
4801 #ifdef CONFIG_IGB_DCA
4802 static void igb_update_dca(struct igb_q_vector *q_vector)
4803 {
4804         struct igb_adapter *adapter = q_vector->adapter;
4805         struct e1000_hw *hw = &adapter->hw;
4806         int cpu = get_cpu();
4807
4808         if (q_vector->cpu == cpu)
4809                 goto out_no_update;
4810
4811         if (q_vector->tx.ring) {
4812                 int q = q_vector->tx.ring->reg_idx;
4813                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4814                 if (hw->mac.type == e1000_82575) {
4815                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4816                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4817                 } else {
4818                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4819                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4820                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4821                 }
4822                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4823                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4824         }
4825         if (q_vector->rx.ring) {
4826                 int q = q_vector->rx.ring->reg_idx;
4827                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4828                 if (hw->mac.type == e1000_82575) {
4829                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4830                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4831                 } else {
4832                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4833                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4834                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4835                 }
4836                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4837                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4838                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4839                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4840         }
4841         q_vector->cpu = cpu;
4842 out_no_update:
4843         put_cpu();
4844 }
4845
4846 static void igb_setup_dca(struct igb_adapter *adapter)
4847 {
4848         struct e1000_hw *hw = &adapter->hw;
4849         int i;
4850
4851         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4852                 return;
4853
4854         /* Always use CB2 mode, difference is masked in the CB driver. */
4855         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4856
4857         for (i = 0; i < adapter->num_q_vectors; i++) {
4858                 adapter->q_vector[i]->cpu = -1;
4859                 igb_update_dca(adapter->q_vector[i]);
4860         }
4861 }
4862
4863 static int __igb_notify_dca(struct device *dev, void *data)
4864 {
4865         struct net_device *netdev = dev_get_drvdata(dev);
4866         struct igb_adapter *adapter = netdev_priv(netdev);
4867         struct pci_dev *pdev = adapter->pdev;
4868         struct e1000_hw *hw = &adapter->hw;
4869         unsigned long event = *(unsigned long *)data;
4870
4871         switch (event) {
4872         case DCA_PROVIDER_ADD:
4873                 /* if already enabled, don't do it again */
4874                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4875                         break;
4876                 if (dca_add_requester(dev) == 0) {
4877                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4878                         dev_info(&pdev->dev, "DCA enabled\n");
4879                         igb_setup_dca(adapter);
4880                         break;
4881                 }
4882                 /* Fall Through since DCA is disabled. */
4883         case DCA_PROVIDER_REMOVE:
4884                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4885                         /* without this a class_device is left
4886                          * hanging around in the sysfs model */
4887                         dca_remove_requester(dev);
4888                         dev_info(&pdev->dev, "DCA disabled\n");
4889                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4890                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4891                 }
4892                 break;
4893         }
4894
4895         return 0;
4896 }
4897
4898 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4899                           void *p)
4900 {
4901         int ret_val;
4902
4903         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4904                                          __igb_notify_dca);
4905
4906         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4907 }
4908 #endif /* CONFIG_IGB_DCA */
4909
4910 #ifdef CONFIG_PCI_IOV
4911 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4912 {
4913         unsigned char mac_addr[ETH_ALEN];
4914         struct pci_dev *pdev = adapter->pdev;
4915         struct e1000_hw *hw = &adapter->hw;
4916         struct pci_dev *pvfdev;
4917         unsigned int device_id;
4918         u16 thisvf_devfn;
4919
4920         random_ether_addr(mac_addr);
4921         igb_set_vf_mac(adapter, vf, mac_addr);
4922
4923         switch (adapter->hw.mac.type) {
4924         case e1000_82576:
4925                 device_id = IGB_82576_VF_DEV_ID;
4926                 /* VF Stride for 82576 is 2 */
4927                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4928                         (pdev->devfn & 1);
4929                 break;
4930         case e1000_i350:
4931                 device_id = IGB_I350_VF_DEV_ID;
4932                 /* VF Stride for I350 is 4 */
4933                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4934                                 (pdev->devfn & 3);
4935                 break;
4936         default:
4937                 device_id = 0;
4938                 thisvf_devfn = 0;
4939                 break;
4940         }
4941
4942         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4943         while (pvfdev) {
4944                 if (pvfdev->devfn == thisvf_devfn)
4945                         break;
4946                 pvfdev = pci_get_device(hw->vendor_id,
4947                                         device_id, pvfdev);
4948         }
4949
4950         if (pvfdev)
4951                 adapter->vf_data[vf].vfdev = pvfdev;
4952         else
4953                 dev_err(&pdev->dev,
4954                         "Couldn't find pci dev ptr for VF %4.4x\n",
4955                         thisvf_devfn);
4956         return pvfdev != NULL;
4957 }
4958
4959 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4960 {
4961         struct e1000_hw *hw = &adapter->hw;
4962         struct pci_dev *pdev = adapter->pdev;
4963         struct pci_dev *pvfdev;
4964         u16 vf_devfn = 0;
4965         u16 vf_stride;
4966         unsigned int device_id;
4967         int vfs_found = 0;
4968
4969         switch (adapter->hw.mac.type) {
4970         case e1000_82576:
4971                 device_id = IGB_82576_VF_DEV_ID;
4972                 /* VF Stride for 82576 is 2 */
4973                 vf_stride = 2;
4974                 break;
4975         case e1000_i350:
4976                 device_id = IGB_I350_VF_DEV_ID;
4977                 /* VF Stride for I350 is 4 */
4978                 vf_stride = 4;
4979                 break;
4980         default:
4981                 device_id = 0;
4982                 vf_stride = 0;
4983                 break;
4984         }
4985
4986         vf_devfn = pdev->devfn + 0x80;
4987         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4988         while (pvfdev) {
4989                 if (pvfdev->devfn == vf_devfn &&
4990                     (pvfdev->bus->number >= pdev->bus->number))
4991                         vfs_found++;
4992                 vf_devfn += vf_stride;
4993                 pvfdev = pci_get_device(hw->vendor_id,
4994                                         device_id, pvfdev);
4995         }
4996
4997         return vfs_found;
4998 }
4999
5000 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5001 {
5002         int i;
5003         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5004                 if (adapter->vf_data[i].vfdev) {
5005                         if (adapter->vf_data[i].vfdev->dev_flags &
5006                             PCI_DEV_FLAGS_ASSIGNED)
5007                                 return true;
5008                 }
5009         }
5010         return false;
5011 }
5012
5013 #endif
5014 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5015 {
5016         struct e1000_hw *hw = &adapter->hw;
5017         u32 ping;
5018         int i;
5019
5020         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5021                 ping = E1000_PF_CONTROL_MSG;
5022                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5023                         ping |= E1000_VT_MSGTYPE_CTS;
5024                 igb_write_mbx(hw, &ping, 1, i);
5025         }
5026 }
5027
5028 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5029 {
5030         struct e1000_hw *hw = &adapter->hw;
5031         u32 vmolr = rd32(E1000_VMOLR(vf));
5032         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5033
5034         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5035                             IGB_VF_FLAG_MULTI_PROMISC);
5036         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5037
5038         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5039                 vmolr |= E1000_VMOLR_MPME;
5040                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5041                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5042         } else {
5043                 /*
5044                  * if we have hashes and we are clearing a multicast promisc
5045                  * flag we need to write the hashes to the MTA as this step
5046                  * was previously skipped
5047                  */
5048                 if (vf_data->num_vf_mc_hashes > 30) {
5049                         vmolr |= E1000_VMOLR_MPME;
5050                 } else if (vf_data->num_vf_mc_hashes) {
5051                         int j;
5052                         vmolr |= E1000_VMOLR_ROMPE;
5053                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5054                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5055                 }
5056         }
5057
5058         wr32(E1000_VMOLR(vf), vmolr);
5059
5060         /* there are flags left unprocessed, likely not supported */
5061         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5062                 return -EINVAL;
5063
5064         return 0;
5065
5066 }
5067
5068 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5069                                   u32 *msgbuf, u32 vf)
5070 {
5071         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5072         u16 *hash_list = (u16 *)&msgbuf[1];
5073         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5074         int i;
5075
5076         /* salt away the number of multicast addresses assigned
5077          * to this VF for later use to restore when the PF multi cast
5078          * list changes
5079          */
5080         vf_data->num_vf_mc_hashes = n;
5081
5082         /* only up to 30 hash values supported */
5083         if (n > 30)
5084                 n = 30;
5085
5086         /* store the hashes for later use */
5087         for (i = 0; i < n; i++)
5088                 vf_data->vf_mc_hashes[i] = hash_list[i];
5089
5090         /* Flush and reset the mta with the new values */
5091         igb_set_rx_mode(adapter->netdev);
5092
5093         return 0;
5094 }
5095
5096 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5097 {
5098         struct e1000_hw *hw = &adapter->hw;
5099         struct vf_data_storage *vf_data;
5100         int i, j;
5101
5102         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5103                 u32 vmolr = rd32(E1000_VMOLR(i));
5104                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5105
5106                 vf_data = &adapter->vf_data[i];
5107
5108                 if ((vf_data->num_vf_mc_hashes > 30) ||
5109                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5110                         vmolr |= E1000_VMOLR_MPME;
5111                 } else if (vf_data->num_vf_mc_hashes) {
5112                         vmolr |= E1000_VMOLR_ROMPE;
5113                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5114                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5115                 }
5116                 wr32(E1000_VMOLR(i), vmolr);
5117         }
5118 }
5119
5120 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5121 {
5122         struct e1000_hw *hw = &adapter->hw;
5123         u32 pool_mask, reg, vid;
5124         int i;
5125
5126         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5127
5128         /* Find the vlan filter for this id */
5129         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5130                 reg = rd32(E1000_VLVF(i));
5131
5132                 /* remove the vf from the pool */
5133                 reg &= ~pool_mask;
5134
5135                 /* if pool is empty then remove entry from vfta */
5136                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5137                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5138                         reg = 0;
5139                         vid = reg & E1000_VLVF_VLANID_MASK;
5140                         igb_vfta_set(hw, vid, false);
5141                 }
5142
5143                 wr32(E1000_VLVF(i), reg);
5144         }
5145
5146         adapter->vf_data[vf].vlans_enabled = 0;
5147 }
5148
5149 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5150 {
5151         struct e1000_hw *hw = &adapter->hw;
5152         u32 reg, i;
5153
5154         /* The vlvf table only exists on 82576 hardware and newer */
5155         if (hw->mac.type < e1000_82576)
5156                 return -1;
5157
5158         /* we only need to do this if VMDq is enabled */
5159         if (!adapter->vfs_allocated_count)
5160                 return -1;
5161
5162         /* Find the vlan filter for this id */
5163         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5164                 reg = rd32(E1000_VLVF(i));
5165                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5166                     vid == (reg & E1000_VLVF_VLANID_MASK))
5167                         break;
5168         }
5169
5170         if (add) {
5171                 if (i == E1000_VLVF_ARRAY_SIZE) {
5172                         /* Did not find a matching VLAN ID entry that was
5173                          * enabled.  Search for a free filter entry, i.e.
5174                          * one without the enable bit set
5175                          */
5176                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5177                                 reg = rd32(E1000_VLVF(i));
5178                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5179                                         break;
5180                         }
5181                 }
5182                 if (i < E1000_VLVF_ARRAY_SIZE) {
5183                         /* Found an enabled/available entry */
5184                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5185
5186                         /* if !enabled we need to set this up in vfta */
5187                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5188                                 /* add VID to filter table */
5189                                 igb_vfta_set(hw, vid, true);
5190                                 reg |= E1000_VLVF_VLANID_ENABLE;
5191                         }
5192                         reg &= ~E1000_VLVF_VLANID_MASK;
5193                         reg |= vid;
5194                         wr32(E1000_VLVF(i), reg);
5195
5196                         /* do not modify RLPML for PF devices */
5197                         if (vf >= adapter->vfs_allocated_count)
5198                                 return 0;
5199
5200                         if (!adapter->vf_data[vf].vlans_enabled) {
5201                                 u32 size;
5202                                 reg = rd32(E1000_VMOLR(vf));
5203                                 size = reg & E1000_VMOLR_RLPML_MASK;
5204                                 size += 4;
5205                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5206                                 reg |= size;
5207                                 wr32(E1000_VMOLR(vf), reg);
5208                         }
5209
5210                         adapter->vf_data[vf].vlans_enabled++;
5211                 }
5212         } else {
5213                 if (i < E1000_VLVF_ARRAY_SIZE) {
5214                         /* remove vf from the pool */
5215                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5216                         /* if pool is empty then remove entry from vfta */
5217                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5218                                 reg = 0;
5219                                 igb_vfta_set(hw, vid, false);
5220                         }
5221                         wr32(E1000_VLVF(i), reg);
5222
5223                         /* do not modify RLPML for PF devices */
5224                         if (vf >= adapter->vfs_allocated_count)
5225                                 return 0;
5226
5227                         adapter->vf_data[vf].vlans_enabled--;
5228                         if (!adapter->vf_data[vf].vlans_enabled) {
5229                                 u32 size;
5230                                 reg = rd32(E1000_VMOLR(vf));
5231                                 size = reg & E1000_VMOLR_RLPML_MASK;
5232                                 size -= 4;
5233                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5234                                 reg |= size;
5235                                 wr32(E1000_VMOLR(vf), reg);
5236                         }
5237                 }
5238         }
5239         return 0;
5240 }
5241
5242 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5243 {
5244         struct e1000_hw *hw = &adapter->hw;
5245
5246         if (vid)
5247                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5248         else
5249                 wr32(E1000_VMVIR(vf), 0);
5250 }
5251
5252 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5253                                int vf, u16 vlan, u8 qos)
5254 {
5255         int err = 0;
5256         struct igb_adapter *adapter = netdev_priv(netdev);
5257
5258         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5259                 return -EINVAL;
5260         if (vlan || qos) {
5261                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5262                 if (err)
5263                         goto out;
5264                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5265                 igb_set_vmolr(adapter, vf, !vlan);
5266                 adapter->vf_data[vf].pf_vlan = vlan;
5267                 adapter->vf_data[vf].pf_qos = qos;
5268                 dev_info(&adapter->pdev->dev,
5269                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5270                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5271                         dev_warn(&adapter->pdev->dev,
5272                                  "The VF VLAN has been set,"
5273                                  " but the PF device is not up.\n");
5274                         dev_warn(&adapter->pdev->dev,
5275                                  "Bring the PF device up before"
5276                                  " attempting to use the VF device.\n");
5277                 }
5278         } else {
5279                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5280                                    false, vf);
5281                 igb_set_vmvir(adapter, vlan, vf);
5282                 igb_set_vmolr(adapter, vf, true);
5283                 adapter->vf_data[vf].pf_vlan = 0;
5284                 adapter->vf_data[vf].pf_qos = 0;
5285        }
5286 out:
5287        return err;
5288 }
5289
5290 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5291 {
5292         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5293         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5294
5295         return igb_vlvf_set(adapter, vid, add, vf);
5296 }
5297
5298 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5299 {
5300         /* clear flags - except flag that indicates PF has set the MAC */
5301         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5302         adapter->vf_data[vf].last_nack = jiffies;
5303
5304         /* reset offloads to defaults */
5305         igb_set_vmolr(adapter, vf, true);
5306
5307         /* reset vlans for device */
5308         igb_clear_vf_vfta(adapter, vf);
5309         if (adapter->vf_data[vf].pf_vlan)
5310                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5311                                     adapter->vf_data[vf].pf_vlan,
5312                                     adapter->vf_data[vf].pf_qos);
5313         else
5314                 igb_clear_vf_vfta(adapter, vf);
5315
5316         /* reset multicast table array for vf */
5317         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5318
5319         /* Flush and reset the mta with the new values */
5320         igb_set_rx_mode(adapter->netdev);
5321 }
5322
5323 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5324 {
5325         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5326
5327         /* generate a new mac address as we were hotplug removed/added */
5328         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5329                 random_ether_addr(vf_mac);
5330
5331         /* process remaining reset events */
5332         igb_vf_reset(adapter, vf);
5333 }
5334
5335 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5336 {
5337         struct e1000_hw *hw = &adapter->hw;
5338         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5339         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5340         u32 reg, msgbuf[3];
5341         u8 *addr = (u8 *)(&msgbuf[1]);
5342
5343         /* process all the same items cleared in a function level reset */
5344         igb_vf_reset(adapter, vf);
5345
5346         /* set vf mac address */
5347         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5348
5349         /* enable transmit and receive for vf */
5350         reg = rd32(E1000_VFTE);
5351         wr32(E1000_VFTE, reg | (1 << vf));
5352         reg = rd32(E1000_VFRE);
5353         wr32(E1000_VFRE, reg | (1 << vf));
5354
5355         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5356
5357         /* reply to reset with ack and vf mac address */
5358         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5359         memcpy(addr, vf_mac, 6);
5360         igb_write_mbx(hw, msgbuf, 3, vf);
5361 }
5362
5363 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5364 {
5365         /*
5366          * The VF MAC Address is stored in a packed array of bytes
5367          * starting at the second 32 bit word of the msg array
5368          */
5369         unsigned char *addr = (char *)&msg[1];
5370         int err = -1;
5371
5372         if (is_valid_ether_addr(addr))
5373                 err = igb_set_vf_mac(adapter, vf, addr);
5374
5375         return err;
5376 }
5377
5378 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5379 {
5380         struct e1000_hw *hw = &adapter->hw;
5381         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5382         u32 msg = E1000_VT_MSGTYPE_NACK;
5383
5384         /* if device isn't clear to send it shouldn't be reading either */
5385         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5386             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5387                 igb_write_mbx(hw, &msg, 1, vf);
5388                 vf_data->last_nack = jiffies;
5389         }
5390 }
5391
5392 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5393 {
5394         struct pci_dev *pdev = adapter->pdev;
5395         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5396         struct e1000_hw *hw = &adapter->hw;
5397         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5398         s32 retval;
5399
5400         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5401
5402         if (retval) {
5403                 /* if receive failed revoke VF CTS stats and restart init */
5404                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5405                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5406                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5407                         return;
5408                 goto out;
5409         }
5410
5411         /* this is a message we already processed, do nothing */
5412         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5413                 return;
5414
5415         /*
5416          * until the vf completes a reset it should not be
5417          * allowed to start any configuration.
5418          */
5419
5420         if (msgbuf[0] == E1000_VF_RESET) {
5421                 igb_vf_reset_msg(adapter, vf);
5422                 return;
5423         }
5424
5425         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5426                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5427                         return;
5428                 retval = -1;
5429                 goto out;
5430         }
5431
5432         switch ((msgbuf[0] & 0xFFFF)) {
5433         case E1000_VF_SET_MAC_ADDR:
5434                 retval = -EINVAL;
5435                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5436                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5437                 else
5438                         dev_warn(&pdev->dev,
5439                                  "VF %d attempted to override administratively "
5440                                  "set MAC address\nReload the VF driver to "
5441                                  "resume operations\n", vf);
5442                 break;
5443         case E1000_VF_SET_PROMISC:
5444                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5445                 break;
5446         case E1000_VF_SET_MULTICAST:
5447                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5448                 break;
5449         case E1000_VF_SET_LPE:
5450                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5451                 break;
5452         case E1000_VF_SET_VLAN:
5453                 retval = -1;
5454                 if (vf_data->pf_vlan)
5455                         dev_warn(&pdev->dev,
5456                                  "VF %d attempted to override administratively "
5457                                  "set VLAN tag\nReload the VF driver to "
5458                                  "resume operations\n", vf);
5459                 else
5460                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5461                 break;
5462         default:
5463                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5464                 retval = -1;
5465                 break;
5466         }
5467
5468         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5469 out:
5470         /* notify the VF of the results of what it sent us */
5471         if (retval)
5472                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5473         else
5474                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5475
5476         igb_write_mbx(hw, msgbuf, 1, vf);
5477 }
5478
5479 static void igb_msg_task(struct igb_adapter *adapter)
5480 {
5481         struct e1000_hw *hw = &adapter->hw;
5482         u32 vf;
5483
5484         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5485                 /* process any reset requests */
5486                 if (!igb_check_for_rst(hw, vf))
5487                         igb_vf_reset_event(adapter, vf);
5488
5489                 /* process any messages pending */
5490                 if (!igb_check_for_msg(hw, vf))
5491                         igb_rcv_msg_from_vf(adapter, vf);
5492
5493                 /* process any acks */
5494                 if (!igb_check_for_ack(hw, vf))
5495                         igb_rcv_ack_from_vf(adapter, vf);
5496         }
5497 }
5498
5499 /**
5500  *  igb_set_uta - Set unicast filter table address
5501  *  @adapter: board private structure
5502  *
5503  *  The unicast table address is a register array of 32-bit registers.
5504  *  The table is meant to be used in a way similar to how the MTA is used
5505  *  however due to certain limitations in the hardware it is necessary to
5506  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5507  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5508  **/
5509 static void igb_set_uta(struct igb_adapter *adapter)
5510 {
5511         struct e1000_hw *hw = &adapter->hw;
5512         int i;
5513
5514         /* The UTA table only exists on 82576 hardware and newer */
5515         if (hw->mac.type < e1000_82576)
5516                 return;
5517
5518         /* we only need to do this if VMDq is enabled */
5519         if (!adapter->vfs_allocated_count)
5520                 return;
5521
5522         for (i = 0; i < hw->mac.uta_reg_count; i++)
5523                 array_wr32(E1000_UTA, i, ~0);
5524 }
5525
5526 /**
5527  * igb_intr_msi - Interrupt Handler
5528  * @irq: interrupt number
5529  * @data: pointer to a network interface device structure
5530  **/
5531 static irqreturn_t igb_intr_msi(int irq, void *data)
5532 {
5533         struct igb_adapter *adapter = data;
5534         struct igb_q_vector *q_vector = adapter->q_vector[0];
5535         struct e1000_hw *hw = &adapter->hw;
5536         /* read ICR disables interrupts using IAM */
5537         u32 icr = rd32(E1000_ICR);
5538
5539         igb_write_itr(q_vector);
5540
5541         if (icr & E1000_ICR_DRSTA)
5542                 schedule_work(&adapter->reset_task);
5543
5544         if (icr & E1000_ICR_DOUTSYNC) {
5545                 /* HW is reporting DMA is out of sync */
5546                 adapter->stats.doosync++;
5547         }
5548
5549         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5550                 hw->mac.get_link_status = 1;
5551                 if (!test_bit(__IGB_DOWN, &adapter->state))
5552                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5553         }
5554
5555         napi_schedule(&q_vector->napi);
5556
5557         return IRQ_HANDLED;
5558 }
5559
5560 /**
5561  * igb_intr - Legacy Interrupt Handler
5562  * @irq: interrupt number
5563  * @data: pointer to a network interface device structure
5564  **/
5565 static irqreturn_t igb_intr(int irq, void *data)
5566 {
5567         struct igb_adapter *adapter = data;
5568         struct igb_q_vector *q_vector = adapter->q_vector[0];
5569         struct e1000_hw *hw = &adapter->hw;
5570         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5571          * need for the IMC write */
5572         u32 icr = rd32(E1000_ICR);
5573
5574         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5575          * not set, then the adapter didn't send an interrupt */
5576         if (!(icr & E1000_ICR_INT_ASSERTED))
5577                 return IRQ_NONE;
5578
5579         igb_write_itr(q_vector);
5580
5581         if (icr & E1000_ICR_DRSTA)
5582                 schedule_work(&adapter->reset_task);
5583
5584         if (icr & E1000_ICR_DOUTSYNC) {
5585                 /* HW is reporting DMA is out of sync */
5586                 adapter->stats.doosync++;
5587         }
5588
5589         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5590                 hw->mac.get_link_status = 1;
5591                 /* guard against interrupt when we're going down */
5592                 if (!test_bit(__IGB_DOWN, &adapter->state))
5593                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5594         }
5595
5596         napi_schedule(&q_vector->napi);
5597
5598         return IRQ_HANDLED;
5599 }
5600
5601 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5602 {
5603         struct igb_adapter *adapter = q_vector->adapter;
5604         struct e1000_hw *hw = &adapter->hw;
5605
5606         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5607             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5608                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5609                         igb_set_itr(q_vector);
5610                 else
5611                         igb_update_ring_itr(q_vector);
5612         }
5613
5614         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5615                 if (adapter->msix_entries)
5616                         wr32(E1000_EIMS, q_vector->eims_value);
5617                 else
5618                         igb_irq_enable(adapter);
5619         }
5620 }
5621
5622 /**
5623  * igb_poll - NAPI Rx polling callback
5624  * @napi: napi polling structure
5625  * @budget: count of how many packets we should handle
5626  **/
5627 static int igb_poll(struct napi_struct *napi, int budget)
5628 {
5629         struct igb_q_vector *q_vector = container_of(napi,
5630                                                      struct igb_q_vector,
5631                                                      napi);
5632         bool clean_complete = true;
5633
5634 #ifdef CONFIG_IGB_DCA
5635         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5636                 igb_update_dca(q_vector);
5637 #endif
5638         if (q_vector->tx.ring)
5639                 clean_complete = igb_clean_tx_irq(q_vector);
5640
5641         if (q_vector->rx.ring)
5642                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5643
5644         /* If all work not completed, return budget and keep polling */
5645         if (!clean_complete)
5646                 return budget;
5647
5648         /* If not enough Rx work done, exit the polling mode */
5649         napi_complete(napi);
5650         igb_ring_irq_enable(q_vector);
5651
5652         return 0;
5653 }
5654
5655 #ifdef CONFIG_IGB_PTP
5656 /**
5657  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5658  * @q_vector: pointer to q_vector containing needed info
5659  * @buffer: pointer to igb_tx_buffer structure
5660  *
5661  * If we were asked to do hardware stamping and such a time stamp is
5662  * available, then it must have been for this skb here because we only
5663  * allow only one such packet into the queue.
5664  */
5665 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5666                             struct igb_tx_buffer *buffer_info)
5667 {
5668         struct igb_adapter *adapter = q_vector->adapter;
5669         struct e1000_hw *hw = &adapter->hw;
5670         struct skb_shared_hwtstamps shhwtstamps;
5671         u64 regval;
5672
5673         /* if skb does not support hw timestamp or TX stamp not valid exit */
5674         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5675             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5676                 return;
5677
5678         regval = rd32(E1000_TXSTMPL);
5679         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5680
5681         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5682         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5683 }
5684
5685 #endif
5686 /**
5687  * igb_clean_tx_irq - Reclaim resources after transmit completes
5688  * @q_vector: pointer to q_vector containing needed info
5689  * returns true if ring is completely cleaned
5690  **/
5691 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5692 {
5693         struct igb_adapter *adapter = q_vector->adapter;
5694         struct igb_ring *tx_ring = q_vector->tx.ring;
5695         struct igb_tx_buffer *tx_buffer;
5696         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5697         unsigned int total_bytes = 0, total_packets = 0;
5698         unsigned int budget = q_vector->tx.work_limit;
5699         unsigned int i = tx_ring->next_to_clean;
5700
5701         if (test_bit(__IGB_DOWN, &adapter->state))
5702                 return true;
5703
5704         tx_buffer = &tx_ring->tx_buffer_info[i];
5705         tx_desc = IGB_TX_DESC(tx_ring, i);
5706         i -= tx_ring->count;
5707
5708         for (; budget; budget--) {
5709                 eop_desc = tx_buffer->next_to_watch;
5710
5711                 /* prevent any other reads prior to eop_desc */
5712                 rmb();
5713
5714                 /* if next_to_watch is not set then there is no work pending */
5715                 if (!eop_desc)
5716                         break;
5717
5718                 /* if DD is not set pending work has not been completed */
5719                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5720                         break;
5721
5722                 /* clear next_to_watch to prevent false hangs */
5723                 tx_buffer->next_to_watch = NULL;
5724
5725                 /* update the statistics for this packet */
5726                 total_bytes += tx_buffer->bytecount;
5727                 total_packets += tx_buffer->gso_segs;
5728
5729 #ifdef CONFIG_IGB_PTP
5730                 /* retrieve hardware timestamp */
5731                 igb_tx_hwtstamp(q_vector, tx_buffer);
5732
5733 #endif
5734                 /* free the skb */
5735                 dev_kfree_skb_any(tx_buffer->skb);
5736                 tx_buffer->skb = NULL;
5737
5738                 /* unmap skb header data */
5739                 dma_unmap_single(tx_ring->dev,
5740                                  tx_buffer->dma,
5741                                  tx_buffer->length,
5742                                  DMA_TO_DEVICE);
5743
5744                 /* clear last DMA location and unmap remaining buffers */
5745                 while (tx_desc != eop_desc) {
5746                         tx_buffer->dma = 0;
5747
5748                         tx_buffer++;
5749                         tx_desc++;
5750                         i++;
5751                         if (unlikely(!i)) {
5752                                 i -= tx_ring->count;
5753                                 tx_buffer = tx_ring->tx_buffer_info;
5754                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5755                         }
5756
5757                         /* unmap any remaining paged data */
5758                         if (tx_buffer->dma) {
5759                                 dma_unmap_page(tx_ring->dev,
5760                                                tx_buffer->dma,
5761                                                tx_buffer->length,
5762                                                DMA_TO_DEVICE);
5763                         }
5764                 }
5765
5766                 /* clear last DMA location */
5767                 tx_buffer->dma = 0;
5768
5769                 /* move us one more past the eop_desc for start of next pkt */
5770                 tx_buffer++;
5771                 tx_desc++;
5772                 i++;
5773                 if (unlikely(!i)) {
5774                         i -= tx_ring->count;
5775                         tx_buffer = tx_ring->tx_buffer_info;
5776                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5777                 }
5778         }
5779
5780         netdev_tx_completed_queue(txring_txq(tx_ring),
5781                                   total_packets, total_bytes);
5782         i += tx_ring->count;
5783         tx_ring->next_to_clean = i;
5784         u64_stats_update_begin(&tx_ring->tx_syncp);
5785         tx_ring->tx_stats.bytes += total_bytes;
5786         tx_ring->tx_stats.packets += total_packets;
5787         u64_stats_update_end(&tx_ring->tx_syncp);
5788         q_vector->tx.total_bytes += total_bytes;
5789         q_vector->tx.total_packets += total_packets;
5790
5791         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5792                 struct e1000_hw *hw = &adapter->hw;
5793
5794                 eop_desc = tx_buffer->next_to_watch;
5795
5796                 /* Detect a transmit hang in hardware, this serializes the
5797                  * check with the clearing of time_stamp and movement of i */
5798                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5799                 if (eop_desc &&
5800                     time_after(jiffies, tx_buffer->time_stamp +
5801                                (adapter->tx_timeout_factor * HZ)) &&
5802                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5803
5804                         /* detected Tx unit hang */
5805                         dev_err(tx_ring->dev,
5806                                 "Detected Tx Unit Hang\n"
5807                                 "  Tx Queue             <%d>\n"
5808                                 "  TDH                  <%x>\n"
5809                                 "  TDT                  <%x>\n"
5810                                 "  next_to_use          <%x>\n"
5811                                 "  next_to_clean        <%x>\n"
5812                                 "buffer_info[next_to_clean]\n"
5813                                 "  time_stamp           <%lx>\n"
5814                                 "  next_to_watch        <%p>\n"
5815                                 "  jiffies              <%lx>\n"
5816                                 "  desc.status          <%x>\n",
5817                                 tx_ring->queue_index,
5818                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5819                                 readl(tx_ring->tail),
5820                                 tx_ring->next_to_use,
5821                                 tx_ring->next_to_clean,
5822                                 tx_buffer->time_stamp,
5823                                 eop_desc,
5824                                 jiffies,
5825                                 eop_desc->wb.status);
5826                         netif_stop_subqueue(tx_ring->netdev,
5827                                             tx_ring->queue_index);
5828
5829                         /* we are about to reset, no point in enabling stuff */
5830                         return true;
5831                 }
5832         }
5833
5834         if (unlikely(total_packets &&
5835                      netif_carrier_ok(tx_ring->netdev) &&
5836                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5837                 /* Make sure that anybody stopping the queue after this
5838                  * sees the new next_to_clean.
5839                  */
5840                 smp_mb();
5841                 if (__netif_subqueue_stopped(tx_ring->netdev,
5842                                              tx_ring->queue_index) &&
5843                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5844                         netif_wake_subqueue(tx_ring->netdev,
5845                                             tx_ring->queue_index);
5846
5847                         u64_stats_update_begin(&tx_ring->tx_syncp);
5848                         tx_ring->tx_stats.restart_queue++;
5849                         u64_stats_update_end(&tx_ring->tx_syncp);
5850                 }
5851         }
5852
5853         return !!budget;
5854 }
5855
5856 static inline void igb_rx_checksum(struct igb_ring *ring,
5857                                    union e1000_adv_rx_desc *rx_desc,
5858                                    struct sk_buff *skb)
5859 {
5860         skb_checksum_none_assert(skb);
5861
5862         /* Ignore Checksum bit is set */
5863         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5864                 return;
5865
5866         /* Rx checksum disabled via ethtool */
5867         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5868                 return;
5869
5870         /* TCP/UDP checksum error bit is set */
5871         if (igb_test_staterr(rx_desc,
5872                              E1000_RXDEXT_STATERR_TCPE |
5873                              E1000_RXDEXT_STATERR_IPE)) {
5874                 /*
5875                  * work around errata with sctp packets where the TCPE aka
5876                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5877                  * packets, (aka let the stack check the crc32c)
5878                  */
5879                 if (!((skb->len == 60) &&
5880                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5881                         u64_stats_update_begin(&ring->rx_syncp);
5882                         ring->rx_stats.csum_err++;
5883                         u64_stats_update_end(&ring->rx_syncp);
5884                 }
5885                 /* let the stack verify checksum errors */
5886                 return;
5887         }
5888         /* It must be a TCP or UDP packet with a valid checksum */
5889         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5890                                       E1000_RXD_STAT_UDPCS))
5891                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5892
5893         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5894                 le32_to_cpu(rx_desc->wb.upper.status_error));
5895 }
5896
5897 static inline void igb_rx_hash(struct igb_ring *ring,
5898                                union e1000_adv_rx_desc *rx_desc,
5899                                struct sk_buff *skb)
5900 {
5901         if (ring->netdev->features & NETIF_F_RXHASH)
5902                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5903 }
5904
5905 #ifdef CONFIG_IGB_PTP
5906 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907                             union e1000_adv_rx_desc *rx_desc,
5908                             struct sk_buff *skb)
5909 {
5910         struct igb_adapter *adapter = q_vector->adapter;
5911         struct e1000_hw *hw = &adapter->hw;
5912         u64 regval;
5913
5914         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915                                        E1000_RXDADV_STAT_TS))
5916                 return;
5917
5918         /*
5919          * If this bit is set, then the RX registers contain the time stamp. No
5920          * other packet will be time stamped until we read these registers, so
5921          * read the registers to make them available again. Because only one
5922          * packet can be time stamped at a time, we know that the register
5923          * values must belong to this one here and therefore we don't need to
5924          * compare any of the additional attributes stored for it.
5925          *
5926          * If nothing went wrong, then it should have a shared tx_flags that we
5927          * can turn into a skb_shared_hwtstamps.
5928          */
5929         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930                 u32 *stamp = (u32 *)skb->data;
5931                 regval = le32_to_cpu(*(stamp + 2));
5932                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933                 skb_pull(skb, IGB_TS_HDR_LEN);
5934         } else {
5935                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5936                         return;
5937
5938                 regval = rd32(E1000_RXSTMPL);
5939                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5940         }
5941
5942         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5943 }
5944
5945 #endif
5946 static void igb_rx_vlan(struct igb_ring *ring,
5947                         union e1000_adv_rx_desc *rx_desc,
5948                         struct sk_buff *skb)
5949 {
5950         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5951                 u16 vid;
5952                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5953                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5954                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5955                 else
5956                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5957
5958                 __vlan_hwaccel_put_tag(skb, vid);
5959         }
5960 }
5961
5962 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5963 {
5964         /* HW will not DMA in data larger than the given buffer, even if it
5965          * parses the (NFS, of course) header to be larger.  In that case, it
5966          * fills the header buffer and spills the rest into the page.
5967          */
5968         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5969                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5970         if (hlen > IGB_RX_HDR_LEN)
5971                 hlen = IGB_RX_HDR_LEN;
5972         return hlen;
5973 }
5974
5975 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5976 {
5977         struct igb_ring *rx_ring = q_vector->rx.ring;
5978         union e1000_adv_rx_desc *rx_desc;
5979         const int current_node = numa_node_id();
5980         unsigned int total_bytes = 0, total_packets = 0;
5981         u16 cleaned_count = igb_desc_unused(rx_ring);
5982         u16 i = rx_ring->next_to_clean;
5983
5984         rx_desc = IGB_RX_DESC(rx_ring, i);
5985
5986         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5987                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5988                 struct sk_buff *skb = buffer_info->skb;
5989                 union e1000_adv_rx_desc *next_rxd;
5990
5991                 buffer_info->skb = NULL;
5992                 prefetch(skb->data);
5993
5994                 i++;
5995                 if (i == rx_ring->count)
5996                         i = 0;
5997
5998                 next_rxd = IGB_RX_DESC(rx_ring, i);
5999                 prefetch(next_rxd);
6000
6001                 /*
6002                  * This memory barrier is needed to keep us from reading
6003                  * any other fields out of the rx_desc until we know the
6004                  * RXD_STAT_DD bit is set
6005                  */
6006                 rmb();
6007
6008                 if (!skb_is_nonlinear(skb)) {
6009                         __skb_put(skb, igb_get_hlen(rx_desc));
6010                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
6011                                          IGB_RX_HDR_LEN,
6012                                          DMA_FROM_DEVICE);
6013                         buffer_info->dma = 0;
6014                 }
6015
6016                 if (rx_desc->wb.upper.length) {
6017                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6018
6019                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6020                                                 buffer_info->page,
6021                                                 buffer_info->page_offset,
6022                                                 length);
6023
6024                         skb->len += length;
6025                         skb->data_len += length;
6026                         skb->truesize += PAGE_SIZE / 2;
6027
6028                         if ((page_count(buffer_info->page) != 1) ||
6029                             (page_to_nid(buffer_info->page) != current_node))
6030                                 buffer_info->page = NULL;
6031                         else
6032                                 get_page(buffer_info->page);
6033
6034                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6035                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
6036                         buffer_info->page_dma = 0;
6037                 }
6038
6039                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6040                         struct igb_rx_buffer *next_buffer;
6041                         next_buffer = &rx_ring->rx_buffer_info[i];
6042                         buffer_info->skb = next_buffer->skb;
6043                         buffer_info->dma = next_buffer->dma;
6044                         next_buffer->skb = skb;
6045                         next_buffer->dma = 0;
6046                         goto next_desc;
6047                 }
6048
6049                 if (unlikely((igb_test_staterr(rx_desc,
6050                                                E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6051                              && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6052                         dev_kfree_skb_any(skb);
6053                         goto next_desc;
6054                 }
6055
6056 #ifdef CONFIG_IGB_PTP
6057                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6058 #endif
6059                 igb_rx_hash(rx_ring, rx_desc, skb);
6060                 igb_rx_checksum(rx_ring, rx_desc, skb);
6061                 igb_rx_vlan(rx_ring, rx_desc, skb);
6062
6063                 total_bytes += skb->len;
6064                 total_packets++;
6065
6066                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6067
6068                 napi_gro_receive(&q_vector->napi, skb);
6069
6070                 budget--;
6071 next_desc:
6072                 if (!budget)
6073                         break;
6074
6075                 cleaned_count++;
6076                 /* return some buffers to hardware, one at a time is too slow */
6077                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6078                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
6079                         cleaned_count = 0;
6080                 }
6081
6082                 /* use prefetched values */
6083                 rx_desc = next_rxd;
6084         }
6085
6086         rx_ring->next_to_clean = i;
6087         u64_stats_update_begin(&rx_ring->rx_syncp);
6088         rx_ring->rx_stats.packets += total_packets;
6089         rx_ring->rx_stats.bytes += total_bytes;
6090         u64_stats_update_end(&rx_ring->rx_syncp);
6091         q_vector->rx.total_packets += total_packets;
6092         q_vector->rx.total_bytes += total_bytes;
6093
6094         if (cleaned_count)
6095                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6096
6097         return !!budget;
6098 }
6099
6100 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6101                                  struct igb_rx_buffer *bi)
6102 {
6103         struct sk_buff *skb = bi->skb;
6104         dma_addr_t dma = bi->dma;
6105
6106         if (dma)
6107                 return true;
6108
6109         if (likely(!skb)) {
6110                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6111                                                 IGB_RX_HDR_LEN);
6112                 bi->skb = skb;
6113                 if (!skb) {
6114                         rx_ring->rx_stats.alloc_failed++;
6115                         return false;
6116                 }
6117
6118                 /* initialize skb for ring */
6119                 skb_record_rx_queue(skb, rx_ring->queue_index);
6120         }
6121
6122         dma = dma_map_single(rx_ring->dev, skb->data,
6123                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6124
6125         if (dma_mapping_error(rx_ring->dev, dma)) {
6126                 rx_ring->rx_stats.alloc_failed++;
6127                 return false;
6128         }
6129
6130         bi->dma = dma;
6131         return true;
6132 }
6133
6134 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6135                                   struct igb_rx_buffer *bi)
6136 {
6137         struct page *page = bi->page;
6138         dma_addr_t page_dma = bi->page_dma;
6139         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6140
6141         if (page_dma)
6142                 return true;
6143
6144         if (!page) {
6145                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6146                 bi->page = page;
6147                 if (unlikely(!page)) {
6148                         rx_ring->rx_stats.alloc_failed++;
6149                         return false;
6150                 }
6151         }
6152
6153         page_dma = dma_map_page(rx_ring->dev, page,
6154                                 page_offset, PAGE_SIZE / 2,
6155                                 DMA_FROM_DEVICE);
6156
6157         if (dma_mapping_error(rx_ring->dev, page_dma)) {
6158                 rx_ring->rx_stats.alloc_failed++;
6159                 return false;
6160         }
6161
6162         bi->page_dma = page_dma;
6163         bi->page_offset = page_offset;
6164         return true;
6165 }
6166
6167 /**
6168  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6169  * @adapter: address of board private structure
6170  **/
6171 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6172 {
6173         union e1000_adv_rx_desc *rx_desc;
6174         struct igb_rx_buffer *bi;
6175         u16 i = rx_ring->next_to_use;
6176
6177         rx_desc = IGB_RX_DESC(rx_ring, i);
6178         bi = &rx_ring->rx_buffer_info[i];
6179         i -= rx_ring->count;
6180
6181         while (cleaned_count--) {
6182                 if (!igb_alloc_mapped_skb(rx_ring, bi))
6183                         break;
6184
6185                 /* Refresh the desc even if buffer_addrs didn't change
6186                  * because each write-back erases this info. */
6187                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6188
6189                 if (!igb_alloc_mapped_page(rx_ring, bi))
6190                         break;
6191
6192                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6193
6194                 rx_desc++;
6195                 bi++;
6196                 i++;
6197                 if (unlikely(!i)) {
6198                         rx_desc = IGB_RX_DESC(rx_ring, 0);
6199                         bi = rx_ring->rx_buffer_info;
6200                         i -= rx_ring->count;
6201                 }
6202
6203                 /* clear the hdr_addr for the next_to_use descriptor */
6204                 rx_desc->read.hdr_addr = 0;
6205         }
6206
6207         i += rx_ring->count;
6208
6209         if (rx_ring->next_to_use != i) {
6210                 rx_ring->next_to_use = i;
6211
6212                 /* Force memory writes to complete before letting h/w
6213                  * know there are new descriptors to fetch.  (Only
6214                  * applicable for weak-ordered memory model archs,
6215                  * such as IA-64). */
6216                 wmb();
6217                 writel(i, rx_ring->tail);
6218         }
6219 }
6220
6221 /**
6222  * igb_mii_ioctl -
6223  * @netdev:
6224  * @ifreq:
6225  * @cmd:
6226  **/
6227 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6228 {
6229         struct igb_adapter *adapter = netdev_priv(netdev);
6230         struct mii_ioctl_data *data = if_mii(ifr);
6231
6232         if (adapter->hw.phy.media_type != e1000_media_type_copper)
6233                 return -EOPNOTSUPP;
6234
6235         switch (cmd) {
6236         case SIOCGMIIPHY:
6237                 data->phy_id = adapter->hw.phy.addr;
6238                 break;
6239         case SIOCGMIIREG:
6240                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6241                                      &data->val_out))
6242                         return -EIO;
6243                 break;
6244         case SIOCSMIIREG:
6245         default:
6246                 return -EOPNOTSUPP;
6247         }
6248         return 0;
6249 }
6250
6251 /**
6252  * igb_hwtstamp_ioctl - control hardware time stamping
6253  * @netdev:
6254  * @ifreq:
6255  * @cmd:
6256  *
6257  * Outgoing time stamping can be enabled and disabled. Play nice and
6258  * disable it when requested, although it shouldn't case any overhead
6259  * when no packet needs it. At most one packet in the queue may be
6260  * marked for time stamping, otherwise it would be impossible to tell
6261  * for sure to which packet the hardware time stamp belongs.
6262  *
6263  * Incoming time stamping has to be configured via the hardware
6264  * filters. Not all combinations are supported, in particular event
6265  * type has to be specified. Matching the kind of event packet is
6266  * not supported, with the exception of "all V2 events regardless of
6267  * level 2 or 4".
6268  *
6269  **/
6270 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6271                               struct ifreq *ifr, int cmd)
6272 {
6273         struct igb_adapter *adapter = netdev_priv(netdev);
6274         struct e1000_hw *hw = &adapter->hw;
6275         struct hwtstamp_config config;
6276         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6277         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6278         u32 tsync_rx_cfg = 0;
6279         bool is_l4 = false;
6280         bool is_l2 = false;
6281         u32 regval;
6282
6283         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6284                 return -EFAULT;
6285
6286         /* reserved for future extensions */
6287         if (config.flags)
6288                 return -EINVAL;
6289
6290         switch (config.tx_type) {
6291         case HWTSTAMP_TX_OFF:
6292                 tsync_tx_ctl = 0;
6293         case HWTSTAMP_TX_ON:
6294                 break;
6295         default:
6296                 return -ERANGE;
6297         }
6298
6299         switch (config.rx_filter) {
6300         case HWTSTAMP_FILTER_NONE:
6301                 tsync_rx_ctl = 0;
6302                 break;
6303         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6304         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6305         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6306         case HWTSTAMP_FILTER_ALL:
6307                 /*
6308                  * register TSYNCRXCFG must be set, therefore it is not
6309                  * possible to time stamp both Sync and Delay_Req messages
6310                  * => fall back to time stamping all packets
6311                  */
6312                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6313                 config.rx_filter = HWTSTAMP_FILTER_ALL;
6314                 break;
6315         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6316                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6317                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6318                 is_l4 = true;
6319                 break;
6320         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6321                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6322                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6323                 is_l4 = true;
6324                 break;
6325         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6326         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6327                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6328                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6329                 is_l2 = true;
6330                 is_l4 = true;
6331                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6332                 break;
6333         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6334         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6335                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6336                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6337                 is_l2 = true;
6338                 is_l4 = true;
6339                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6340                 break;
6341         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6342         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6343         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6344                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6345                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6346                 is_l2 = true;
6347                 is_l4 = true;
6348                 break;
6349         default:
6350                 return -ERANGE;
6351         }
6352
6353         if (hw->mac.type == e1000_82575) {
6354                 if (tsync_rx_ctl | tsync_tx_ctl)
6355                         return -EINVAL;
6356                 return 0;
6357         }
6358
6359         /*
6360          * Per-packet timestamping only works if all packets are
6361          * timestamped, so enable timestamping in all packets as
6362          * long as one rx filter was configured.
6363          */
6364         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6365                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6366                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6367         }
6368
6369         /* enable/disable TX */
6370         regval = rd32(E1000_TSYNCTXCTL);
6371         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6372         regval |= tsync_tx_ctl;
6373         wr32(E1000_TSYNCTXCTL, regval);
6374
6375         /* enable/disable RX */
6376         regval = rd32(E1000_TSYNCRXCTL);
6377         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6378         regval |= tsync_rx_ctl;
6379         wr32(E1000_TSYNCRXCTL, regval);
6380
6381         /* define which PTP packets are time stamped */
6382         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6383
6384         /* define ethertype filter for timestamped packets */
6385         if (is_l2)
6386                 wr32(E1000_ETQF(3),
6387                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6388                                  E1000_ETQF_1588 | /* enable timestamping */
6389                                  ETH_P_1588));     /* 1588 eth protocol type */
6390         else
6391                 wr32(E1000_ETQF(3), 0);
6392
6393 #define PTP_PORT 319
6394         /* L4 Queue Filter[3]: filter by destination port and protocol */
6395         if (is_l4) {
6396                 u32 ftqf = (IPPROTO_UDP /* UDP */
6397                         | E1000_FTQF_VF_BP /* VF not compared */
6398                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6399                         | E1000_FTQF_MASK); /* mask all inputs */
6400                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6401
6402                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6403                 wr32(E1000_IMIREXT(3),
6404                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6405                 if (hw->mac.type == e1000_82576) {
6406                         /* enable source port check */
6407                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6408                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6409                 }
6410                 wr32(E1000_FTQF(3), ftqf);
6411         } else {
6412                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6413         }
6414         wrfl();
6415
6416         adapter->hwtstamp_config = config;
6417
6418         /* clear TX/RX time stamp registers, just to be sure */
6419         regval = rd32(E1000_TXSTMPH);
6420         regval = rd32(E1000_RXSTMPH);
6421
6422         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6423                 -EFAULT : 0;
6424 }
6425
6426 /**
6427  * igb_ioctl -
6428  * @netdev:
6429  * @ifreq:
6430  * @cmd:
6431  **/
6432 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6433 {
6434         switch (cmd) {
6435         case SIOCGMIIPHY:
6436         case SIOCGMIIREG:
6437         case SIOCSMIIREG:
6438                 return igb_mii_ioctl(netdev, ifr, cmd);
6439         case SIOCSHWTSTAMP:
6440                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6441         default:
6442                 return -EOPNOTSUPP;
6443         }
6444 }
6445
6446 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6447 {
6448         struct igb_adapter *adapter = hw->back;
6449         u16 cap_offset;
6450
6451         cap_offset = adapter->pdev->pcie_cap;
6452         if (!cap_offset)
6453                 return -E1000_ERR_CONFIG;
6454
6455         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6456
6457         return 0;
6458 }
6459
6460 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6461 {
6462         struct igb_adapter *adapter = hw->back;
6463         u16 cap_offset;
6464
6465         cap_offset = adapter->pdev->pcie_cap;
6466         if (!cap_offset)
6467                 return -E1000_ERR_CONFIG;
6468
6469         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6470
6471         return 0;
6472 }
6473
6474 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6475 {
6476         struct igb_adapter *adapter = netdev_priv(netdev);
6477         struct e1000_hw *hw = &adapter->hw;
6478         u32 ctrl, rctl;
6479         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6480
6481         if (enable) {
6482                 /* enable VLAN tag insert/strip */
6483                 ctrl = rd32(E1000_CTRL);
6484                 ctrl |= E1000_CTRL_VME;
6485                 wr32(E1000_CTRL, ctrl);
6486
6487                 /* Disable CFI check */
6488                 rctl = rd32(E1000_RCTL);
6489                 rctl &= ~E1000_RCTL_CFIEN;
6490                 wr32(E1000_RCTL, rctl);
6491         } else {
6492                 /* disable VLAN tag insert/strip */
6493                 ctrl = rd32(E1000_CTRL);
6494                 ctrl &= ~E1000_CTRL_VME;
6495                 wr32(E1000_CTRL, ctrl);
6496         }
6497
6498         igb_rlpml_set(adapter);
6499 }
6500
6501 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6502 {
6503         struct igb_adapter *adapter = netdev_priv(netdev);
6504         struct e1000_hw *hw = &adapter->hw;
6505         int pf_id = adapter->vfs_allocated_count;
6506
6507         /* attempt to add filter to vlvf array */
6508         igb_vlvf_set(adapter, vid, true, pf_id);
6509
6510         /* add the filter since PF can receive vlans w/o entry in vlvf */
6511         igb_vfta_set(hw, vid, true);
6512
6513         set_bit(vid, adapter->active_vlans);
6514
6515         return 0;
6516 }
6517
6518 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6519 {
6520         struct igb_adapter *adapter = netdev_priv(netdev);
6521         struct e1000_hw *hw = &adapter->hw;
6522         int pf_id = adapter->vfs_allocated_count;
6523         s32 err;
6524
6525         /* remove vlan from VLVF table array */
6526         err = igb_vlvf_set(adapter, vid, false, pf_id);
6527
6528         /* if vid was not present in VLVF just remove it from table */
6529         if (err)
6530                 igb_vfta_set(hw, vid, false);
6531
6532         clear_bit(vid, adapter->active_vlans);
6533
6534         return 0;
6535 }
6536
6537 static void igb_restore_vlan(struct igb_adapter *adapter)
6538 {
6539         u16 vid;
6540
6541         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6542
6543         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6544                 igb_vlan_rx_add_vid(adapter->netdev, vid);
6545 }
6546
6547 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6548 {
6549         struct pci_dev *pdev = adapter->pdev;
6550         struct e1000_mac_info *mac = &adapter->hw.mac;
6551
6552         mac->autoneg = 0;
6553
6554         /* Make sure dplx is at most 1 bit and lsb of speed is not set
6555          * for the switch() below to work */
6556         if ((spd & 1) || (dplx & ~1))
6557                 goto err_inval;
6558
6559         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6560         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6561             spd != SPEED_1000 &&
6562             dplx != DUPLEX_FULL)
6563                 goto err_inval;
6564
6565         switch (spd + dplx) {
6566         case SPEED_10 + DUPLEX_HALF:
6567                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6568                 break;
6569         case SPEED_10 + DUPLEX_FULL:
6570                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6571                 break;
6572         case SPEED_100 + DUPLEX_HALF:
6573                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6574                 break;
6575         case SPEED_100 + DUPLEX_FULL:
6576                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6577                 break;
6578         case SPEED_1000 + DUPLEX_FULL:
6579                 mac->autoneg = 1;
6580                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6581                 break;
6582         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6583         default:
6584                 goto err_inval;
6585         }
6586         return 0;
6587
6588 err_inval:
6589         dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6590         return -EINVAL;
6591 }
6592
6593 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6594                           bool runtime)
6595 {
6596         struct net_device *netdev = pci_get_drvdata(pdev);
6597         struct igb_adapter *adapter = netdev_priv(netdev);
6598         struct e1000_hw *hw = &adapter->hw;
6599         u32 ctrl, rctl, status;
6600         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6601 #ifdef CONFIG_PM
6602         int retval = 0;
6603 #endif
6604
6605         netif_device_detach(netdev);
6606
6607         if (netif_running(netdev))
6608                 __igb_close(netdev, true);
6609
6610         igb_clear_interrupt_scheme(adapter);
6611
6612 #ifdef CONFIG_PM
6613         retval = pci_save_state(pdev);
6614         if (retval)
6615                 return retval;
6616 #endif
6617
6618         status = rd32(E1000_STATUS);
6619         if (status & E1000_STATUS_LU)
6620                 wufc &= ~E1000_WUFC_LNKC;
6621
6622         if (wufc) {
6623                 igb_setup_rctl(adapter);
6624                 igb_set_rx_mode(netdev);
6625
6626                 /* turn on all-multi mode if wake on multicast is enabled */
6627                 if (wufc & E1000_WUFC_MC) {
6628                         rctl = rd32(E1000_RCTL);
6629                         rctl |= E1000_RCTL_MPE;
6630                         wr32(E1000_RCTL, rctl);
6631                 }
6632
6633                 ctrl = rd32(E1000_CTRL);
6634                 /* advertise wake from D3Cold */
6635                 #define E1000_CTRL_ADVD3WUC 0x00100000
6636                 /* phy power management enable */
6637                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6638                 ctrl |= E1000_CTRL_ADVD3WUC;
6639                 wr32(E1000_CTRL, ctrl);
6640
6641                 /* Allow time for pending master requests to run */
6642                 igb_disable_pcie_master(hw);
6643
6644                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6645                 wr32(E1000_WUFC, wufc);
6646         } else {
6647                 wr32(E1000_WUC, 0);
6648                 wr32(E1000_WUFC, 0);
6649         }
6650
6651         *enable_wake = wufc || adapter->en_mng_pt;
6652         if (!*enable_wake)
6653                 igb_power_down_link(adapter);
6654         else
6655                 igb_power_up_link(adapter);
6656
6657         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6658          * would have already happened in close and is redundant. */
6659         igb_release_hw_control(adapter);
6660
6661         pci_disable_device(pdev);
6662
6663         return 0;
6664 }
6665
6666 #ifdef CONFIG_PM
6667 #ifdef CONFIG_PM_SLEEP
6668 static int igb_suspend(struct device *dev)
6669 {
6670         int retval;
6671         bool wake;
6672         struct pci_dev *pdev = to_pci_dev(dev);
6673
6674         retval = __igb_shutdown(pdev, &wake, 0);
6675         if (retval)
6676                 return retval;
6677
6678         if (wake) {
6679                 pci_prepare_to_sleep(pdev);
6680         } else {
6681                 pci_wake_from_d3(pdev, false);
6682                 pci_set_power_state(pdev, PCI_D3hot);
6683         }
6684
6685         return 0;
6686 }
6687 #endif /* CONFIG_PM_SLEEP */
6688
6689 static int igb_resume(struct device *dev)
6690 {
6691         struct pci_dev *pdev = to_pci_dev(dev);
6692         struct net_device *netdev = pci_get_drvdata(pdev);
6693         struct igb_adapter *adapter = netdev_priv(netdev);
6694         struct e1000_hw *hw = &adapter->hw;
6695         u32 err;
6696
6697         pci_set_power_state(pdev, PCI_D0);
6698         pci_restore_state(pdev);
6699         pci_save_state(pdev);
6700
6701         err = pci_enable_device_mem(pdev);
6702         if (err) {
6703                 dev_err(&pdev->dev,
6704                         "igb: Cannot enable PCI device from suspend\n");
6705                 return err;
6706         }
6707         pci_set_master(pdev);
6708
6709         pci_enable_wake(pdev, PCI_D3hot, 0);
6710         pci_enable_wake(pdev, PCI_D3cold, 0);
6711
6712         if (igb_init_interrupt_scheme(adapter)) {
6713                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6714                 return -ENOMEM;
6715         }
6716
6717         igb_reset(adapter);
6718
6719         /* let the f/w know that the h/w is now under the control of the
6720          * driver. */
6721         igb_get_hw_control(adapter);
6722
6723         wr32(E1000_WUS, ~0);
6724
6725         if (netdev->flags & IFF_UP) {
6726                 err = __igb_open(netdev, true);
6727                 if (err)
6728                         return err;
6729         }
6730
6731         netif_device_attach(netdev);
6732         return 0;
6733 }
6734
6735 #ifdef CONFIG_PM_RUNTIME
6736 static int igb_runtime_idle(struct device *dev)
6737 {
6738         struct pci_dev *pdev = to_pci_dev(dev);
6739         struct net_device *netdev = pci_get_drvdata(pdev);
6740         struct igb_adapter *adapter = netdev_priv(netdev);
6741
6742         if (!igb_has_link(adapter))
6743                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6744
6745         return -EBUSY;
6746 }
6747
6748 static int igb_runtime_suspend(struct device *dev)
6749 {
6750         struct pci_dev *pdev = to_pci_dev(dev);
6751         int retval;
6752         bool wake;
6753
6754         retval = __igb_shutdown(pdev, &wake, 1);
6755         if (retval)
6756                 return retval;
6757
6758         if (wake) {
6759                 pci_prepare_to_sleep(pdev);
6760         } else {
6761                 pci_wake_from_d3(pdev, false);
6762                 pci_set_power_state(pdev, PCI_D3hot);
6763         }
6764
6765         return 0;
6766 }
6767
6768 static int igb_runtime_resume(struct device *dev)
6769 {
6770         return igb_resume(dev);
6771 }
6772 #endif /* CONFIG_PM_RUNTIME */
6773 #endif
6774
6775 static void igb_shutdown(struct pci_dev *pdev)
6776 {
6777         bool wake;
6778
6779         __igb_shutdown(pdev, &wake, 0);
6780
6781         if (system_state == SYSTEM_POWER_OFF) {
6782                 pci_wake_from_d3(pdev, wake);
6783                 pci_set_power_state(pdev, PCI_D3hot);
6784         }
6785 }
6786
6787 #ifdef CONFIG_NET_POLL_CONTROLLER
6788 /*
6789  * Polling 'interrupt' - used by things like netconsole to send skbs
6790  * without having to re-enable interrupts. It's not called while
6791  * the interrupt routine is executing.
6792  */
6793 static void igb_netpoll(struct net_device *netdev)
6794 {
6795         struct igb_adapter *adapter = netdev_priv(netdev);
6796         struct e1000_hw *hw = &adapter->hw;
6797         struct igb_q_vector *q_vector;
6798         int i;
6799
6800         for (i = 0; i < adapter->num_q_vectors; i++) {
6801                 q_vector = adapter->q_vector[i];
6802                 if (adapter->msix_entries)
6803                         wr32(E1000_EIMC, q_vector->eims_value);
6804                 else
6805                         igb_irq_disable(adapter);
6806                 napi_schedule(&q_vector->napi);
6807         }
6808 }
6809 #endif /* CONFIG_NET_POLL_CONTROLLER */
6810
6811 /**
6812  * igb_io_error_detected - called when PCI error is detected
6813  * @pdev: Pointer to PCI device
6814  * @state: The current pci connection state
6815  *
6816  * This function is called after a PCI bus error affecting
6817  * this device has been detected.
6818  */
6819 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6820                                               pci_channel_state_t state)
6821 {
6822         struct net_device *netdev = pci_get_drvdata(pdev);
6823         struct igb_adapter *adapter = netdev_priv(netdev);
6824
6825         netif_device_detach(netdev);
6826
6827         if (state == pci_channel_io_perm_failure)
6828                 return PCI_ERS_RESULT_DISCONNECT;
6829
6830         if (netif_running(netdev))
6831                 igb_down(adapter);
6832         pci_disable_device(pdev);
6833
6834         /* Request a slot slot reset. */
6835         return PCI_ERS_RESULT_NEED_RESET;
6836 }
6837
6838 /**
6839  * igb_io_slot_reset - called after the pci bus has been reset.
6840  * @pdev: Pointer to PCI device
6841  *
6842  * Restart the card from scratch, as if from a cold-boot. Implementation
6843  * resembles the first-half of the igb_resume routine.
6844  */
6845 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6846 {
6847         struct net_device *netdev = pci_get_drvdata(pdev);
6848         struct igb_adapter *adapter = netdev_priv(netdev);
6849         struct e1000_hw *hw = &adapter->hw;
6850         pci_ers_result_t result;
6851         int err;
6852
6853         if (pci_enable_device_mem(pdev)) {
6854                 dev_err(&pdev->dev,
6855                         "Cannot re-enable PCI device after reset.\n");
6856                 result = PCI_ERS_RESULT_DISCONNECT;
6857         } else {
6858                 pci_set_master(pdev);
6859                 pci_restore_state(pdev);
6860                 pci_save_state(pdev);
6861
6862                 pci_enable_wake(pdev, PCI_D3hot, 0);
6863                 pci_enable_wake(pdev, PCI_D3cold, 0);
6864
6865                 igb_reset(adapter);
6866                 wr32(E1000_WUS, ~0);
6867                 result = PCI_ERS_RESULT_RECOVERED;
6868         }
6869
6870         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6871         if (err) {
6872                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6873                         "failed 0x%0x\n", err);
6874                 /* non-fatal, continue */
6875         }
6876
6877         return result;
6878 }
6879
6880 /**
6881  * igb_io_resume - called when traffic can start flowing again.
6882  * @pdev: Pointer to PCI device
6883  *
6884  * This callback is called when the error recovery driver tells us that
6885  * its OK to resume normal operation. Implementation resembles the
6886  * second-half of the igb_resume routine.
6887  */
6888 static void igb_io_resume(struct pci_dev *pdev)
6889 {
6890         struct net_device *netdev = pci_get_drvdata(pdev);
6891         struct igb_adapter *adapter = netdev_priv(netdev);
6892
6893         if (netif_running(netdev)) {
6894                 if (igb_up(adapter)) {
6895                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6896                         return;
6897                 }
6898         }
6899
6900         netif_device_attach(netdev);
6901
6902         /* let the f/w know that the h/w is now under the control of the
6903          * driver. */
6904         igb_get_hw_control(adapter);
6905 }
6906
6907 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6908                              u8 qsel)
6909 {
6910         u32 rar_low, rar_high;
6911         struct e1000_hw *hw = &adapter->hw;
6912
6913         /* HW expects these in little endian so we reverse the byte order
6914          * from network order (big endian) to little endian
6915          */
6916         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6917                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6918         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6919
6920         /* Indicate to hardware the Address is Valid. */
6921         rar_high |= E1000_RAH_AV;
6922
6923         if (hw->mac.type == e1000_82575)
6924                 rar_high |= E1000_RAH_POOL_1 * qsel;
6925         else
6926                 rar_high |= E1000_RAH_POOL_1 << qsel;
6927
6928         wr32(E1000_RAL(index), rar_low);
6929         wrfl();
6930         wr32(E1000_RAH(index), rar_high);
6931         wrfl();
6932 }
6933
6934 static int igb_set_vf_mac(struct igb_adapter *adapter,
6935                           int vf, unsigned char *mac_addr)
6936 {
6937         struct e1000_hw *hw = &adapter->hw;
6938         /* VF MAC addresses start at end of receive addresses and moves
6939          * torwards the first, as a result a collision should not be possible */
6940         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6941
6942         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6943
6944         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6945
6946         return 0;
6947 }
6948
6949 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6950 {
6951         struct igb_adapter *adapter = netdev_priv(netdev);
6952         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6953                 return -EINVAL;
6954         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6955         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6956         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6957                                       " change effective.");
6958         if (test_bit(__IGB_DOWN, &adapter->state)) {
6959                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6960                          " but the PF device is not up.\n");
6961                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6962                          " attempting to use the VF device.\n");
6963         }
6964         return igb_set_vf_mac(adapter, vf, mac);
6965 }
6966
6967 static int igb_link_mbps(int internal_link_speed)
6968 {
6969         switch (internal_link_speed) {
6970         case SPEED_100:
6971                 return 100;
6972         case SPEED_1000:
6973                 return 1000;
6974         default:
6975                 return 0;
6976         }
6977 }
6978
6979 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6980                                   int link_speed)
6981 {
6982         int rf_dec, rf_int;
6983         u32 bcnrc_val;
6984
6985         if (tx_rate != 0) {
6986                 /* Calculate the rate factor values to set */
6987                 rf_int = link_speed / tx_rate;
6988                 rf_dec = (link_speed - (rf_int * tx_rate));
6989                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6990
6991                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6992                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6993                                E1000_RTTBCNRC_RF_INT_MASK);
6994                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6995         } else {
6996                 bcnrc_val = 0;
6997         }
6998
6999         wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7000         wr32(E1000_RTTBCNRC, bcnrc_val);
7001 }
7002
7003 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7004 {
7005         int actual_link_speed, i;
7006         bool reset_rate = false;
7007
7008         /* VF TX rate limit was not set or not supported */
7009         if ((adapter->vf_rate_link_speed == 0) ||
7010             (adapter->hw.mac.type != e1000_82576))
7011                 return;
7012
7013         actual_link_speed = igb_link_mbps(adapter->link_speed);
7014         if (actual_link_speed != adapter->vf_rate_link_speed) {
7015                 reset_rate = true;
7016                 adapter->vf_rate_link_speed = 0;
7017                 dev_info(&adapter->pdev->dev,
7018                          "Link speed has been changed. VF Transmit "
7019                          "rate is disabled\n");
7020         }
7021
7022         for (i = 0; i < adapter->vfs_allocated_count; i++) {
7023                 if (reset_rate)
7024                         adapter->vf_data[i].tx_rate = 0;
7025
7026                 igb_set_vf_rate_limit(&adapter->hw, i,
7027                                       adapter->vf_data[i].tx_rate,
7028                                       actual_link_speed);
7029         }
7030 }
7031
7032 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7033 {
7034         struct igb_adapter *adapter = netdev_priv(netdev);
7035         struct e1000_hw *hw = &adapter->hw;
7036         int actual_link_speed;
7037
7038         if (hw->mac.type != e1000_82576)
7039                 return -EOPNOTSUPP;
7040
7041         actual_link_speed = igb_link_mbps(adapter->link_speed);
7042         if ((vf >= adapter->vfs_allocated_count) ||
7043             (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7044             (tx_rate < 0) || (tx_rate > actual_link_speed))
7045                 return -EINVAL;
7046
7047         adapter->vf_rate_link_speed = actual_link_speed;
7048         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7049         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7050
7051         return 0;
7052 }
7053
7054 static int igb_ndo_get_vf_config(struct net_device *netdev,
7055                                  int vf, struct ifla_vf_info *ivi)
7056 {
7057         struct igb_adapter *adapter = netdev_priv(netdev);
7058         if (vf >= adapter->vfs_allocated_count)
7059                 return -EINVAL;
7060         ivi->vf = vf;
7061         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7062         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7063         ivi->vlan = adapter->vf_data[vf].pf_vlan;
7064         ivi->qos = adapter->vf_data[vf].pf_qos;
7065         return 0;
7066 }
7067
7068 static void igb_vmm_control(struct igb_adapter *adapter)
7069 {
7070         struct e1000_hw *hw = &adapter->hw;
7071         u32 reg;
7072
7073         switch (hw->mac.type) {
7074         case e1000_82575:
7075         case e1000_i210:
7076         case e1000_i211:
7077         default:
7078                 /* replication is not supported for 82575 */
7079                 return;
7080         case e1000_82576:
7081                 /* notify HW that the MAC is adding vlan tags */
7082                 reg = rd32(E1000_DTXCTL);
7083                 reg |= E1000_DTXCTL_VLAN_ADDED;
7084                 wr32(E1000_DTXCTL, reg);
7085         case e1000_82580:
7086                 /* enable replication vlan tag stripping */
7087                 reg = rd32(E1000_RPLOLR);
7088                 reg |= E1000_RPLOLR_STRVLAN;
7089                 wr32(E1000_RPLOLR, reg);
7090         case e1000_i350:
7091                 /* none of the above registers are supported by i350 */
7092                 break;
7093         }
7094
7095         if (adapter->vfs_allocated_count) {
7096                 igb_vmdq_set_loopback_pf(hw, true);
7097                 igb_vmdq_set_replication_pf(hw, true);
7098                 igb_vmdq_set_anti_spoofing_pf(hw, true,
7099                                                 adapter->vfs_allocated_count);
7100         } else {
7101                 igb_vmdq_set_loopback_pf(hw, false);
7102                 igb_vmdq_set_replication_pf(hw, false);
7103         }
7104 }
7105
7106 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7107 {
7108         struct e1000_hw *hw = &adapter->hw;
7109         u32 dmac_thr;
7110         u16 hwm;
7111
7112         if (hw->mac.type > e1000_82580) {
7113                 if (adapter->flags & IGB_FLAG_DMAC) {
7114                         u32 reg;
7115
7116                         /* force threshold to 0. */
7117                         wr32(E1000_DMCTXTH, 0);
7118
7119                         /*
7120                          * DMA Coalescing high water mark needs to be greater
7121                          * than the Rx threshold. Set hwm to PBA - max frame
7122                          * size in 16B units, capping it at PBA - 6KB.
7123                          */
7124                         hwm = 64 * pba - adapter->max_frame_size / 16;
7125                         if (hwm < 64 * (pba - 6))
7126                                 hwm = 64 * (pba - 6);
7127                         reg = rd32(E1000_FCRTC);
7128                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7129                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7130                                 & E1000_FCRTC_RTH_COAL_MASK);
7131                         wr32(E1000_FCRTC, reg);
7132
7133                         /*
7134                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7135                          * frame size, capping it at PBA - 10KB.
7136                          */
7137                         dmac_thr = pba - adapter->max_frame_size / 512;
7138                         if (dmac_thr < pba - 10)
7139                                 dmac_thr = pba - 10;
7140                         reg = rd32(E1000_DMACR);
7141                         reg &= ~E1000_DMACR_DMACTHR_MASK;
7142                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7143                                 & E1000_DMACR_DMACTHR_MASK);
7144
7145                         /* transition to L0x or L1 if available..*/
7146                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7147
7148                         /* watchdog timer= +-1000 usec in 32usec intervals */
7149                         reg |= (1000 >> 5);
7150
7151                         /* Disable BMC-to-OS Watchdog Enable */
7152                         reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7153                         wr32(E1000_DMACR, reg);
7154
7155                         /*
7156                          * no lower threshold to disable
7157                          * coalescing(smart fifb)-UTRESH=0
7158                          */
7159                         wr32(E1000_DMCRTRH, 0);
7160
7161                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7162
7163                         wr32(E1000_DMCTLX, reg);
7164
7165                         /*
7166                          * free space in tx packet buffer to wake from
7167                          * DMA coal
7168                          */
7169                         wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7170                              (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7171
7172                         /*
7173                          * make low power state decision controlled
7174                          * by DMA coal
7175                          */
7176                         reg = rd32(E1000_PCIEMISC);
7177                         reg &= ~E1000_PCIEMISC_LX_DECISION;
7178                         wr32(E1000_PCIEMISC, reg);
7179                 } /* endif adapter->dmac is not disabled */
7180         } else if (hw->mac.type == e1000_82580) {
7181                 u32 reg = rd32(E1000_PCIEMISC);
7182                 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7183                 wr32(E1000_DMACR, 0);
7184         }
7185 }
7186
7187 /* igb_main.c */