]> Pileus Git - ~andy/linux/blob - arch/powerpc/platforms/powernv/pci.c
nfsd: fix lost nfserrno() call in nfsd_setattr()
[~andy/linux] / arch / powerpc / platforms / powernv / pci.c
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Currently supports only P5IOC2
5  *
6  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/delay.h>
17 #include <linux/string.h>
18 #include <linux/init.h>
19 #include <linux/bootmem.h>
20 #include <linux/irq.h>
21 #include <linux/io.h>
22 #include <linux/msi.h>
23 #include <linux/iommu.h>
24
25 #include <asm/sections.h>
26 #include <asm/io.h>
27 #include <asm/prom.h>
28 #include <asm/pci-bridge.h>
29 #include <asm/machdep.h>
30 #include <asm/msi_bitmap.h>
31 #include <asm/ppc-pci.h>
32 #include <asm/opal.h>
33 #include <asm/iommu.h>
34 #include <asm/tce.h>
35 #include <asm/firmware.h>
36 #include <asm/eeh_event.h>
37 #include <asm/eeh.h>
38
39 #include "powernv.h"
40 #include "pci.h"
41
42 /* Delay in usec */
43 #define PCI_RESET_DELAY_US      3000000
44
45 #define cfg_dbg(fmt...) do { } while(0)
46 //#define cfg_dbg(fmt...)       printk(fmt)
47
48 #ifdef CONFIG_PCI_MSI
49 static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type)
50 {
51         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
52         struct pnv_phb *phb = hose->private_data;
53         struct pci_dn *pdn = pci_get_pdn(pdev);
54
55         if (pdn && pdn->force_32bit_msi && !phb->msi32_support)
56                 return -ENODEV;
57
58         return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV;
59 }
60
61 static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
62 {
63         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
64         struct pnv_phb *phb = hose->private_data;
65         struct msi_desc *entry;
66         struct msi_msg msg;
67         int hwirq;
68         unsigned int virq;
69         int rc;
70
71         if (WARN_ON(!phb))
72                 return -ENODEV;
73
74         list_for_each_entry(entry, &pdev->msi_list, list) {
75                 if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
76                         pr_warn("%s: Supports only 64-bit MSIs\n",
77                                 pci_name(pdev));
78                         return -ENXIO;
79                 }
80                 hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1);
81                 if (hwirq < 0) {
82                         pr_warn("%s: Failed to find a free MSI\n",
83                                 pci_name(pdev));
84                         return -ENOSPC;
85                 }
86                 virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
87                 if (virq == NO_IRQ) {
88                         pr_warn("%s: Failed to map MSI to linux irq\n",
89                                 pci_name(pdev));
90                         msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
91                         return -ENOMEM;
92                 }
93                 rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq,
94                                     virq, entry->msi_attrib.is_64, &msg);
95                 if (rc) {
96                         pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
97                         irq_dispose_mapping(virq);
98                         msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
99                         return rc;
100                 }
101                 irq_set_msi_desc(virq, entry);
102                 write_msi_msg(virq, &msg);
103         }
104         return 0;
105 }
106
107 static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
108 {
109         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
110         struct pnv_phb *phb = hose->private_data;
111         struct msi_desc *entry;
112
113         if (WARN_ON(!phb))
114                 return;
115
116         list_for_each_entry(entry, &pdev->msi_list, list) {
117                 if (entry->irq == NO_IRQ)
118                         continue;
119                 irq_set_msi_desc(entry->irq, NULL);
120                 msi_bitmap_free_hwirqs(&phb->msi_bmp,
121                         virq_to_hw(entry->irq) - phb->msi_base, 1);
122                 irq_dispose_mapping(entry->irq);
123         }
124 }
125 #endif /* CONFIG_PCI_MSI */
126
127 static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
128                                          struct OpalIoPhbErrorCommon *common)
129 {
130         struct OpalIoP7IOCPhbErrorData *data;
131         int i;
132
133         data = (struct OpalIoP7IOCPhbErrorData *)common;
134         pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
135                 hose->global_number, common->version);
136
137         pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
138
139         pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
140         pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
141         pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
142
143         pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
144         pr_info("  slotStatus:           %08x\n", data->slotStatus);
145         pr_info("  linkStatus:           %08x\n", data->linkStatus);
146         pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
147         pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
148
149         pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
150         pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
151         pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
152         pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
153         pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
154         pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
155         pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
156         pr_info("  sourceId:             %08x\n", data->sourceId);
157         pr_info("  errorClass:           %016llx\n", data->errorClass);
158         pr_info("  correlator:           %016llx\n", data->correlator);
159         pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
160         pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
161         pr_info("  lemFir:               %016llx\n", data->lemFir);
162         pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
163         pr_info("  lemWOF:               %016llx\n", data->lemWOF);
164         pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
165         pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
166         pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
167         pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
168         pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
169         pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
170         pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
171         pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
172         pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
173         pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
174         pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
175         pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
176         pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
177         pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
178         pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
179         pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
180
181         for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
182                 if ((data->pestA[i] >> 63) == 0 &&
183                     (data->pestB[i] >> 63) == 0)
184                         continue;
185
186                 pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
187                 pr_info("          PESTB:        %016llx\n", data->pestB[i]);
188         }
189 }
190
191 static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
192                                         struct OpalIoPhbErrorCommon *common)
193 {
194         struct OpalIoPhb3ErrorData *data;
195         int i;
196
197         data = (struct OpalIoPhb3ErrorData*)common;
198         pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
199                 hose->global_number, common->version);
200
201         pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
202
203         pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
204         pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
205         pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
206
207         pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
208         pr_info("  slotStatus:           %08x\n", data->slotStatus);
209         pr_info("  linkStatus:           %08x\n", data->linkStatus);
210         pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
211         pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
212
213         pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
214         pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
215         pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
216         pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
217         pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
218         pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
219         pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
220         pr_info("  sourceId:             %08x\n", data->sourceId);
221         pr_info("  errorClass:           %016llx\n", data->errorClass);
222         pr_info("  correlator:           %016llx\n", data->correlator);
223
224         pr_info("  nFir:                 %016llx\n", data->nFir);
225         pr_info("  nFirMask:             %016llx\n", data->nFirMask);
226         pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
227         pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
228         pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
229         pr_info("  lemFir:               %016llx\n", data->lemFir);
230         pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
231         pr_info("  lemWOF:               %016llx\n", data->lemWOF);
232         pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
233         pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
234         pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
235         pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
236         pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
237         pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
238         pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
239         pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
240         pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
241         pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
242         pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
243         pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
244         pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
245         pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
246         pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
247         pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
248
249         for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
250                 if ((data->pestA[i] >> 63) == 0 &&
251                     (data->pestB[i] >> 63) == 0)
252                         continue;
253
254                 pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
255                 pr_info("          PESTB:        %016llx\n", data->pestB[i]);
256         }
257 }
258
259 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
260                                 unsigned char *log_buff)
261 {
262         struct OpalIoPhbErrorCommon *common;
263
264         if (!hose || !log_buff)
265                 return;
266
267         common = (struct OpalIoPhbErrorCommon *)log_buff;
268         switch (common->ioType) {
269         case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
270                 pnv_pci_dump_p7ioc_diag_data(hose, common);
271                 break;
272         case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
273                 pnv_pci_dump_phb3_diag_data(hose, common);
274                 break;
275         default:
276                 pr_warn("%s: Unrecognized ioType %d\n",
277                         __func__, common->ioType);
278         }
279 }
280
281 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
282 {
283         unsigned long flags, rc;
284         int has_diag;
285
286         spin_lock_irqsave(&phb->lock, flags);
287
288         rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
289                                          PNV_PCI_DIAG_BUF_SIZE);
290         has_diag = (rc == OPAL_SUCCESS);
291
292         rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
293                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
294         if (rc) {
295                 pr_warning("PCI %d: Failed to clear EEH freeze state"
296                            " for PE#%d, err %ld\n",
297                            phb->hose->global_number, pe_no, rc);
298
299                 /* For now, let's only display the diag buffer when we fail to clear
300                  * the EEH status. We'll do more sensible things later when we have
301                  * proper EEH support. We need to make sure we don't pollute ourselves
302                  * with the normal errors generated when probing empty slots
303                  */
304                 if (has_diag)
305                         pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
306                 else
307                         pr_warning("PCI %d: No diag data available\n",
308                                    phb->hose->global_number);
309         }
310
311         spin_unlock_irqrestore(&phb->lock, flags);
312 }
313
314 static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
315                                      struct device_node *dn)
316 {
317         s64     rc;
318         u8      fstate;
319         __be16  pcierr;
320         u32     pe_no;
321
322         /*
323          * Get the PE#. During the PCI probe stage, we might not
324          * setup that yet. So all ER errors should be mapped to
325          * reserved PE.
326          */
327         pe_no = PCI_DN(dn)->pe_number;
328         if (pe_no == IODA_INVALID_PE) {
329                 if (phb->type == PNV_PHB_P5IOC2)
330                         pe_no = 0;
331                 else
332                         pe_no = phb->ioda.reserved_pe;
333         }
334
335         /* Read freeze status */
336         rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
337                                         NULL);
338         if (rc) {
339                 pr_warning("%s: Can't read EEH status (PE#%d) for "
340                            "%s, err %lld\n",
341                            __func__, pe_no, dn->full_name, rc);
342                 return;
343         }
344         cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
345                 (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
346                 pe_no, fstate);
347         if (fstate != 0)
348                 pnv_pci_handle_eeh_config(phb, pe_no);
349 }
350
351 int pnv_pci_cfg_read(struct device_node *dn,
352                      int where, int size, u32 *val)
353 {
354         struct pci_dn *pdn = PCI_DN(dn);
355         struct pnv_phb *phb = pdn->phb->private_data;
356         u32 bdfn = (pdn->busno << 8) | pdn->devfn;
357 #ifdef CONFIG_EEH
358         struct eeh_pe *phb_pe = NULL;
359 #endif
360         s64 rc;
361
362         switch (size) {
363         case 1: {
364                 u8 v8;
365                 rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
366                 *val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
367                 break;
368         }
369         case 2: {
370                 __be16 v16;
371                 rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
372                                                    &v16);
373                 *val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
374                 break;
375         }
376         case 4: {
377                 __be32 v32;
378                 rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
379                 *val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
380                 break;
381         }
382         default:
383                 return PCIBIOS_FUNC_NOT_SUPPORTED;
384         }
385         cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
386                 __func__, pdn->busno, pdn->devfn, where, size, *val);
387
388         /*
389          * Check if the specified PE has been put into frozen
390          * state. On the other hand, we needn't do that while
391          * the PHB has been put into frozen state because of
392          * PHB-fatal errors.
393          */
394 #ifdef CONFIG_EEH
395         phb_pe = eeh_phb_pe_get(pdn->phb);
396         if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
397                 return PCIBIOS_SUCCESSFUL;
398
399         if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
400                 if (*val == EEH_IO_ERROR_VALUE(size) &&
401                     eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
402                         return PCIBIOS_DEVICE_NOT_FOUND;
403         } else {
404                 pnv_pci_config_check_eeh(phb, dn);
405         }
406 #else
407         pnv_pci_config_check_eeh(phb, dn);
408 #endif
409
410         return PCIBIOS_SUCCESSFUL;
411 }
412
413 int pnv_pci_cfg_write(struct device_node *dn,
414                       int where, int size, u32 val)
415 {
416         struct pci_dn *pdn = PCI_DN(dn);
417         struct pnv_phb *phb = pdn->phb->private_data;
418         u32 bdfn = (pdn->busno << 8) | pdn->devfn;
419
420         cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
421                 pdn->busno, pdn->devfn, where, size, val);
422         switch (size) {
423         case 1:
424                 opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
425                 break;
426         case 2:
427                 opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
428                 break;
429         case 4:
430                 opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
431                 break;
432         default:
433                 return PCIBIOS_FUNC_NOT_SUPPORTED;
434         }
435
436         /* Check if the PHB got frozen due to an error (no response) */
437 #ifdef CONFIG_EEH
438         if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
439                 pnv_pci_config_check_eeh(phb, dn);
440 #else
441         pnv_pci_config_check_eeh(phb, dn);
442 #endif
443
444         return PCIBIOS_SUCCESSFUL;
445 }
446
447 static int pnv_pci_read_config(struct pci_bus *bus,
448                                unsigned int devfn,
449                                int where, int size, u32 *val)
450 {
451         struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
452         struct pci_dn *pdn;
453
454         for (dn = busdn->child; dn; dn = dn->sibling) {
455                 pdn = PCI_DN(dn);
456                 if (pdn && pdn->devfn == devfn)
457                         return pnv_pci_cfg_read(dn, where, size, val);
458         }
459
460         *val = 0xFFFFFFFF;
461         return PCIBIOS_DEVICE_NOT_FOUND;
462
463 }
464
465 static int pnv_pci_write_config(struct pci_bus *bus,
466                                 unsigned int devfn,
467                                 int where, int size, u32 val)
468 {
469         struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
470         struct pci_dn *pdn;
471
472         for (dn = busdn->child; dn; dn = dn->sibling) {
473                 pdn = PCI_DN(dn);
474                 if (pdn && pdn->devfn == devfn)
475                         return pnv_pci_cfg_write(dn, where, size, val);
476         }
477
478         return PCIBIOS_DEVICE_NOT_FOUND;
479 }
480
481 struct pci_ops pnv_pci_ops = {
482         .read  = pnv_pci_read_config,
483         .write = pnv_pci_write_config,
484 };
485
486 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
487                          unsigned long uaddr, enum dma_data_direction direction,
488                          struct dma_attrs *attrs, bool rm)
489 {
490         u64 proto_tce;
491         __be64 *tcep, *tces;
492         u64 rpn;
493
494         proto_tce = TCE_PCI_READ; // Read allowed
495
496         if (direction != DMA_TO_DEVICE)
497                 proto_tce |= TCE_PCI_WRITE;
498
499         tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
500         rpn = __pa(uaddr) >> TCE_SHIFT;
501
502         while (npages--)
503                 *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT));
504
505         /* Some implementations won't cache invalid TCEs and thus may not
506          * need that flush. We'll probably turn it_type into a bit mask
507          * of flags if that becomes the case
508          */
509         if (tbl->it_type & TCE_PCI_SWINV_CREATE)
510                 pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
511
512         return 0;
513 }
514
515 static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
516                             unsigned long uaddr,
517                             enum dma_data_direction direction,
518                             struct dma_attrs *attrs)
519 {
520         return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
521                         false);
522 }
523
524 static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
525                 bool rm)
526 {
527         __be64 *tcep, *tces;
528
529         tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
530
531         while (npages--)
532                 *(tcep++) = cpu_to_be64(0);
533
534         if (tbl->it_type & TCE_PCI_SWINV_FREE)
535                 pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
536 }
537
538 static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
539 {
540         pnv_tce_free(tbl, index, npages, false);
541 }
542
543 static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
544 {
545         return ((u64 *)tbl->it_base)[index - tbl->it_offset];
546 }
547
548 static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
549                             unsigned long uaddr,
550                             enum dma_data_direction direction,
551                             struct dma_attrs *attrs)
552 {
553         return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
554 }
555
556 static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
557 {
558         pnv_tce_free(tbl, index, npages, true);
559 }
560
561 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
562                                void *tce_mem, u64 tce_size,
563                                u64 dma_offset)
564 {
565         tbl->it_blocksize = 16;
566         tbl->it_base = (unsigned long)tce_mem;
567         tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
568         tbl->it_offset = dma_offset >> tbl->it_page_shift;
569         tbl->it_index = 0;
570         tbl->it_size = tce_size >> 3;
571         tbl->it_busno = 0;
572         tbl->it_type = TCE_PCI;
573 }
574
575 static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
576 {
577         struct iommu_table *tbl;
578         const __be64 *basep, *swinvp;
579         const __be32 *sizep;
580
581         basep = of_get_property(hose->dn, "linux,tce-base", NULL);
582         sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
583         if (basep == NULL || sizep == NULL) {
584                 pr_err("PCI: %s has missing tce entries !\n",
585                        hose->dn->full_name);
586                 return NULL;
587         }
588         tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
589         if (WARN_ON(!tbl))
590                 return NULL;
591         pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
592                                   be32_to_cpup(sizep), 0);
593         iommu_init_table(tbl, hose->node);
594         iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
595
596         /* Deal with SW invalidated TCEs when needed (BML way) */
597         swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
598                                  NULL);
599         if (swinvp) {
600                 tbl->it_busno = be64_to_cpu(swinvp[1]);
601                 tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
602                 tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
603         }
604         return tbl;
605 }
606
607 static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
608                                        struct pci_dev *pdev)
609 {
610         struct device_node *np = pci_bus_to_OF_node(hose->bus);
611         struct pci_dn *pdn;
612
613         if (np == NULL)
614                 return;
615         pdn = PCI_DN(np);
616         if (!pdn->iommu_table)
617                 pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
618         if (!pdn->iommu_table)
619                 return;
620         set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
621 }
622
623 static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
624 {
625         struct pci_controller *hose = pci_bus_to_host(pdev->bus);
626         struct pnv_phb *phb = hose->private_data;
627
628         /* If we have no phb structure, try to setup a fallback based on
629          * the device-tree (RTAS PCI for example)
630          */
631         if (phb && phb->dma_dev_setup)
632                 phb->dma_dev_setup(phb, pdev);
633         else
634                 pnv_pci_dma_fallback_setup(hose, pdev);
635 }
636
637 void pnv_pci_shutdown(void)
638 {
639         struct pci_controller *hose;
640
641         list_for_each_entry(hose, &hose_list, list_node) {
642                 struct pnv_phb *phb = hose->private_data;
643
644                 if (phb && phb->shutdown)
645                         phb->shutdown(phb);
646         }
647 }
648
649 /* Fixup wrong class code in p7ioc and p8 root complex */
650 static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
651 {
652         dev->class = PCI_CLASS_BRIDGE_PCI << 8;
653 }
654 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
655
656 static int pnv_pci_probe_mode(struct pci_bus *bus)
657 {
658         struct pci_controller *hose = pci_bus_to_host(bus);
659         const __be64 *tstamp;
660         u64 now, target;
661
662
663         /* We hijack this as a way to ensure we have waited long
664          * enough since the reset was lifted on the PCI bus
665          */
666         if (bus != hose->bus)
667                 return PCI_PROBE_NORMAL;
668         tstamp = of_get_property(hose->dn, "reset-clear-timestamp", NULL);
669         if (!tstamp || !*tstamp)
670                 return PCI_PROBE_NORMAL;
671
672         now = mftb() / tb_ticks_per_usec;
673         target = (be64_to_cpup(tstamp) / tb_ticks_per_usec)
674                 + PCI_RESET_DELAY_US;
675
676         pr_devel("pci %04d: Reset target: 0x%llx now: 0x%llx\n",
677                  hose->global_number, target, now);
678
679         if (now < target)
680                 msleep((target - now + 999) / 1000);
681
682         return PCI_PROBE_NORMAL;
683 }
684
685 void __init pnv_pci_init(void)
686 {
687         struct device_node *np;
688
689         pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
690
691         /* OPAL absent, try POPAL first then RTAS detection of PHBs */
692         if (!firmware_has_feature(FW_FEATURE_OPAL)) {
693 #ifdef CONFIG_PPC_POWERNV_RTAS
694                 init_pci_config_tokens();
695                 find_and_init_phbs();
696 #endif /* CONFIG_PPC_POWERNV_RTAS */
697         }
698         /* OPAL is here, do our normal stuff */
699         else {
700                 int found_ioda = 0;
701
702                 /* Look for IODA IO-Hubs. We don't support mixing IODA
703                  * and p5ioc2 due to the need to change some global
704                  * probing flags
705                  */
706                 for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
707                         pnv_pci_init_ioda_hub(np);
708                         found_ioda = 1;
709                 }
710
711                 /* Look for p5ioc2 IO-Hubs */
712                 if (!found_ioda)
713                         for_each_compatible_node(np, NULL, "ibm,p5ioc2")
714                                 pnv_pci_init_p5ioc2_hub(np);
715
716                 /* Look for ioda2 built-in PHB3's */
717                 for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
718                         pnv_pci_init_ioda2_phb(np);
719         }
720
721         /* Setup the linkage between OF nodes and PHBs */
722         pci_devs_phb_init();
723
724         /* Configure IOMMU DMA hooks */
725         ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
726         ppc_md.tce_build = pnv_tce_build_vm;
727         ppc_md.tce_free = pnv_tce_free_vm;
728         ppc_md.tce_build_rm = pnv_tce_build_rm;
729         ppc_md.tce_free_rm = pnv_tce_free_rm;
730         ppc_md.tce_get = pnv_tce_get;
731         ppc_md.pci_probe_mode = pnv_pci_probe_mode;
732         set_pci_dma_ops(&dma_iommu_ops);
733
734         /* Configure MSIs */
735 #ifdef CONFIG_PCI_MSI
736         ppc_md.msi_check_device = pnv_msi_check_device;
737         ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
738         ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
739 #endif
740 }
741
742 static int tce_iommu_bus_notifier(struct notifier_block *nb,
743                 unsigned long action, void *data)
744 {
745         struct device *dev = data;
746
747         switch (action) {
748         case BUS_NOTIFY_ADD_DEVICE:
749                 return iommu_add_device(dev);
750         case BUS_NOTIFY_DEL_DEVICE:
751                 if (dev->iommu_group)
752                         iommu_del_device(dev);
753                 return 0;
754         default:
755                 return 0;
756         }
757 }
758
759 static struct notifier_block tce_iommu_bus_nb = {
760         .notifier_call = tce_iommu_bus_notifier,
761 };
762
763 static int __init tce_iommu_bus_notifier_init(void)
764 {
765         bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
766         return 0;
767 }
768
769 subsys_initcall_sync(tce_iommu_bus_notifier_init);